]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b 84#include <kern/assert.h>
39037602 85#include <kern/backtrace.h>
1c79356b 86#include <kern/counters.h>
91447636 87#include <kern/kalloc.h>
1c79356b 88#include <kern/zalloc.h>
91447636
A
89
90#include <vm/cpm.h>
39236c6e 91#include <vm/vm_compressor_pager.h>
1c79356b
A
92#include <vm/vm_init.h>
93#include <vm/vm_fault.h>
94#include <vm/vm_map.h>
95#include <vm/vm_object.h>
96#include <vm/vm_page.h>
b0d623f7 97#include <vm/vm_pageout.h>
1c79356b
A
98#include <vm/vm_kern.h>
99#include <ipc/ipc_port.h>
100#include <kern/sched_prim.h>
101#include <kern/misc_protos.h>
1c79356b
A
102#include <kern/xpr.h>
103
91447636
A
104#include <mach/vm_map_server.h>
105#include <mach/mach_host_server.h>
2d21ac55 106#include <vm/vm_protos.h>
b0d623f7 107#include <vm/vm_purgeable_internal.h>
91447636 108
91447636 109#include <vm/vm_protos.h>
2d21ac55 110#include <vm/vm_shared_region.h>
6d2010ae 111#include <vm/vm_map_store.h>
91447636 112
39037602
A
113extern int proc_selfpid(void);
114extern char *proc_name_address(void *p);
115
116#if VM_MAP_DEBUG_APPLE_PROTECT
117int vm_map_debug_apple_protect = 0;
118#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
119#if VM_MAP_DEBUG_FOURK
120int vm_map_debug_fourk = 0;
121#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 122
316670eb 123extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
124/* Internal prototypes
125 */
2d21ac55 126
91447636
A
127static void vm_map_simplify_range(
128 vm_map_t map,
129 vm_map_offset_t start,
130 vm_map_offset_t end); /* forward */
131
132static boolean_t vm_map_range_check(
2d21ac55
A
133 vm_map_t map,
134 vm_map_offset_t start,
135 vm_map_offset_t end,
136 vm_map_entry_t *entry);
1c79356b 137
91447636 138static vm_map_entry_t _vm_map_entry_create(
7ddcb079 139 struct vm_map_header *map_header, boolean_t map_locked);
1c79356b 140
91447636 141static void _vm_map_entry_dispose(
2d21ac55
A
142 struct vm_map_header *map_header,
143 vm_map_entry_t entry);
1c79356b 144
91447636 145static void vm_map_pmap_enter(
2d21ac55
A
146 vm_map_t map,
147 vm_map_offset_t addr,
148 vm_map_offset_t end_addr,
149 vm_object_t object,
150 vm_object_offset_t offset,
151 vm_prot_t protection);
1c79356b 152
91447636 153static void _vm_map_clip_end(
2d21ac55
A
154 struct vm_map_header *map_header,
155 vm_map_entry_t entry,
156 vm_map_offset_t end);
91447636
A
157
158static void _vm_map_clip_start(
2d21ac55
A
159 struct vm_map_header *map_header,
160 vm_map_entry_t entry,
161 vm_map_offset_t start);
1c79356b 162
91447636 163static void vm_map_entry_delete(
2d21ac55
A
164 vm_map_t map,
165 vm_map_entry_t entry);
1c79356b 166
91447636 167static kern_return_t vm_map_delete(
2d21ac55
A
168 vm_map_t map,
169 vm_map_offset_t start,
170 vm_map_offset_t end,
171 int flags,
172 vm_map_t zap_map);
1c79356b 173
91447636 174static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
175 vm_map_t dst_map,
176 vm_map_entry_t entry,
177 vm_map_copy_t copy,
39236c6e
A
178 vm_map_address_t start,
179 boolean_t discard_on_success);
1c79356b 180
91447636 181static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
182 vm_map_t dst_map,
183 vm_map_entry_t tmp_entry,
184 vm_map_copy_t copy,
185 vm_map_offset_t start,
186 pmap_t pmap);
1c79356b 187
91447636 188static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
189 vm_map_t src_map,
190 vm_map_address_t src_addr,
191 vm_map_size_t len,
192 boolean_t src_destroy,
193 vm_map_copy_t *copy_result); /* OUT */
1c79356b 194
91447636 195static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
196 vm_map_t map,
197 vm_map_address_t *addr, /* IN/OUT */
198 vm_map_copy_t copy,
39037602 199 vm_map_size_t copy_size,
39236c6e
A
200 boolean_t overwrite,
201 boolean_t consume_on_success);
1c79356b 202
91447636 203static void vm_map_fork_share(
2d21ac55
A
204 vm_map_t old_map,
205 vm_map_entry_t old_entry,
206 vm_map_t new_map);
1c79356b 207
91447636 208static boolean_t vm_map_fork_copy(
2d21ac55
A
209 vm_map_t old_map,
210 vm_map_entry_t *old_entry_p,
39037602
A
211 vm_map_t new_map,
212 int vm_map_copyin_flags);
1c79356b 213
0c530ab8 214void vm_map_region_top_walk(
2d21ac55
A
215 vm_map_entry_t entry,
216 vm_region_top_info_t top);
1c79356b 217
0c530ab8 218void vm_map_region_walk(
2d21ac55
A
219 vm_map_t map,
220 vm_map_offset_t va,
221 vm_map_entry_t entry,
222 vm_object_offset_t offset,
223 vm_object_size_t range,
224 vm_region_extended_info_t extended,
39236c6e
A
225 boolean_t look_for_pages,
226 mach_msg_type_number_t count);
91447636
A
227
228static kern_return_t vm_map_wire_nested(
2d21ac55
A
229 vm_map_t map,
230 vm_map_offset_t start,
231 vm_map_offset_t end,
3e170ce0 232 vm_prot_t caller_prot,
2d21ac55
A
233 boolean_t user_wire,
234 pmap_t map_pmap,
fe8ab488
A
235 vm_map_offset_t pmap_addr,
236 ppnum_t *physpage_p);
91447636
A
237
238static kern_return_t vm_map_unwire_nested(
2d21ac55
A
239 vm_map_t map,
240 vm_map_offset_t start,
241 vm_map_offset_t end,
242 boolean_t user_wire,
243 pmap_t map_pmap,
244 vm_map_offset_t pmap_addr);
91447636
A
245
246static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
247 vm_map_t dst_map,
248 vm_map_offset_t dst_addr,
249 vm_map_size_t dst_size);
91447636
A
250
251static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
252 vm_map_t dst_map,
253 vm_map_offset_t dst_addr,
254 vm_map_copy_t copy,
255 boolean_t interruptible,
6d2010ae
A
256 pmap_t pmap,
257 boolean_t discard_on_success);
91447636
A
258
259static kern_return_t vm_map_remap_extract(
2d21ac55
A
260 vm_map_t map,
261 vm_map_offset_t addr,
262 vm_map_size_t size,
263 boolean_t copy,
264 struct vm_map_header *map_header,
265 vm_prot_t *cur_protection,
266 vm_prot_t *max_protection,
267 vm_inherit_t inheritance,
39037602
A
268 boolean_t pageable,
269 boolean_t same_map);
91447636
A
270
271static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
272 vm_map_t map,
273 vm_map_address_t *address,
274 vm_map_size_t size,
275 vm_map_offset_t mask,
060df5ea 276 int flags,
2d21ac55 277 vm_map_entry_t *map_entry);
91447636
A
278
279static void vm_map_region_look_for_page(
2d21ac55
A
280 vm_map_t map,
281 vm_map_offset_t va,
282 vm_object_t object,
283 vm_object_offset_t offset,
284 int max_refcnt,
285 int depth,
39236c6e
A
286 vm_region_extended_info_t extended,
287 mach_msg_type_number_t count);
91447636
A
288
289static int vm_map_region_count_obj_refs(
2d21ac55
A
290 vm_map_entry_t entry,
291 vm_object_t object);
1c79356b 292
b0d623f7
A
293
294static kern_return_t vm_map_willneed(
295 vm_map_t map,
296 vm_map_offset_t start,
297 vm_map_offset_t end);
298
299static kern_return_t vm_map_reuse_pages(
300 vm_map_t map,
301 vm_map_offset_t start,
302 vm_map_offset_t end);
303
304static kern_return_t vm_map_reusable_pages(
305 vm_map_t map,
306 vm_map_offset_t start,
307 vm_map_offset_t end);
308
309static kern_return_t vm_map_can_reuse(
310 vm_map_t map,
311 vm_map_offset_t start,
312 vm_map_offset_t end);
313
3e170ce0
A
314#if MACH_ASSERT
315static kern_return_t vm_map_pageout(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end);
319#endif /* MACH_ASSERT */
6d2010ae 320
1c79356b
A
321/*
322 * Macros to copy a vm_map_entry. We must be careful to correctly
323 * manage the wired page count. vm_map_entry_copy() creates a new
324 * map entry to the same memory - the wired count in the new entry
325 * must be set to zero. vm_map_entry_copy_full() creates a new
326 * entry that is identical to the old entry. This preserves the
327 * wire count; it's used for map splitting and zone changing in
328 * vm_map_copyout.
329 */
316670eb 330
7ddcb079
A
331#define vm_map_entry_copy(NEW,OLD) \
332MACRO_BEGIN \
333boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55
A
334 *(NEW) = *(OLD); \
335 (NEW)->is_shared = FALSE; \
336 (NEW)->needs_wakeup = FALSE; \
337 (NEW)->in_transition = FALSE; \
338 (NEW)->wired_count = 0; \
339 (NEW)->user_wired_count = 0; \
b0d623f7 340 (NEW)->permanent = FALSE; \
316670eb 341 (NEW)->used_for_jit = FALSE; \
fe8ab488
A
342 (NEW)->from_reserved_zone = _vmec_reserved; \
343 (NEW)->iokit_acct = FALSE; \
3e170ce0
A
344 (NEW)->vme_resilient_codesign = FALSE; \
345 (NEW)->vme_resilient_media = FALSE; \
39037602 346 (NEW)->vme_atomic = FALSE; \
1c79356b
A
347MACRO_END
348
7ddcb079
A
349#define vm_map_entry_copy_full(NEW,OLD) \
350MACRO_BEGIN \
351boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
352(*(NEW) = *(OLD)); \
353(NEW)->from_reserved_zone = _vmecf_reserved; \
354MACRO_END
1c79356b 355
2d21ac55
A
356/*
357 * Decide if we want to allow processes to execute from their data or stack areas.
358 * override_nx() returns true if we do. Data/stack execution can be enabled independently
359 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
360 * or allow_stack_exec to enable data execution for that type of data area for that particular
361 * ABI (or both by or'ing the flags together). These are initialized in the architecture
362 * specific pmap files since the default behavior varies according to architecture. The
363 * main reason it varies is because of the need to provide binary compatibility with old
364 * applications that were written before these restrictions came into being. In the old
365 * days, an app could execute anything it could read, but this has slowly been tightened
366 * up over time. The default behavior is:
367 *
368 * 32-bit PPC apps may execute from both stack and data areas
369 * 32-bit Intel apps may exeucte from data areas but not stack
370 * 64-bit PPC/Intel apps may not execute from either data or stack
371 *
372 * An application on any architecture may override these defaults by explicitly
373 * adding PROT_EXEC permission to the page in question with the mprotect(2)
374 * system call. This code here just determines what happens when an app tries to
375 * execute from a page that lacks execute permission.
376 *
377 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
378 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
379 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
380 * execution from data areas for a particular binary even if the arch normally permits it. As
381 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
382 * to support some complicated use cases, notably browsers with out-of-process plugins that
383 * are not all NX-safe.
2d21ac55
A
384 */
385
386extern int allow_data_exec, allow_stack_exec;
387
388int
389override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
390{
391 int current_abi;
392
3e170ce0
A
393 if (map->pmap == kernel_pmap) return FALSE;
394
2d21ac55
A
395 /*
396 * Determine if the app is running in 32 or 64 bit mode.
397 */
398
399 if (vm_map_is_64bit(map))
400 current_abi = VM_ABI_64;
401 else
402 current_abi = VM_ABI_32;
403
404 /*
405 * Determine if we should allow the execution based on whether it's a
406 * stack or data area and the current architecture.
407 */
408
409 if (user_tag == VM_MEMORY_STACK)
410 return allow_stack_exec & current_abi;
411
6d2010ae 412 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
413}
414
415
1c79356b
A
416/*
417 * Virtual memory maps provide for the mapping, protection,
418 * and sharing of virtual memory objects. In addition,
419 * this module provides for an efficient virtual copy of
420 * memory from one map to another.
421 *
422 * Synchronization is required prior to most operations.
423 *
424 * Maps consist of an ordered doubly-linked list of simple
425 * entries; a single hint is used to speed up lookups.
426 *
427 * Sharing maps have been deleted from this version of Mach.
428 * All shared objects are now mapped directly into the respective
429 * maps. This requires a change in the copy on write strategy;
430 * the asymmetric (delayed) strategy is used for shared temporary
431 * objects instead of the symmetric (shadow) strategy. All maps
432 * are now "top level" maps (either task map, kernel map or submap
433 * of the kernel map).
434 *
435 * Since portions of maps are specified by start/end addreses,
436 * which may not align with existing map entries, all
437 * routines merely "clip" entries to these start/end values.
438 * [That is, an entry is split into two, bordering at a
439 * start or end value.] Note that these clippings may not
440 * always be necessary (as the two resulting entries are then
441 * not changed); however, the clipping is done for convenience.
442 * No attempt is currently made to "glue back together" two
443 * abutting entries.
444 *
445 * The symmetric (shadow) copy strategy implements virtual copy
446 * by copying VM object references from one map to
447 * another, and then marking both regions as copy-on-write.
448 * It is important to note that only one writeable reference
449 * to a VM object region exists in any map when this strategy
450 * is used -- this means that shadow object creation can be
451 * delayed until a write operation occurs. The symmetric (delayed)
452 * strategy allows multiple maps to have writeable references to
453 * the same region of a vm object, and hence cannot delay creating
454 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
455 * Copying of permanent objects is completely different; see
456 * vm_object_copy_strategically() in vm_object.c.
457 */
458
91447636
A
459static zone_t vm_map_zone; /* zone for vm_map structures */
460static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
39037602 461zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
7ddcb079 462 * allocations */
91447636 463static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
3e170ce0 464zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
465
466
467/*
468 * Placeholder object for submap operations. This object is dropped
469 * into the range by a call to vm_map_find, and removed when
470 * vm_map_submap creates the submap.
471 */
472
473vm_object_t vm_submap_object;
474
91447636 475static void *map_data;
b0d623f7 476static vm_size_t map_data_size;
91447636 477static void *kentry_data;
b0d623f7 478static vm_size_t kentry_data_size;
3e170ce0
A
479static void *map_holes_data;
480static vm_size_t map_holes_data_size;
1c79356b 481
b0d623f7 482#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
1c79356b 483
55e303ae 484/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 485unsigned int not_in_kdp = 1;
55e303ae 486
6d2010ae
A
487unsigned int vm_map_set_cache_attr_count = 0;
488
489kern_return_t
490vm_map_set_cache_attr(
491 vm_map_t map,
492 vm_map_offset_t va)
493{
494 vm_map_entry_t map_entry;
495 vm_object_t object;
496 kern_return_t kr = KERN_SUCCESS;
497
498 vm_map_lock_read(map);
499
500 if (!vm_map_lookup_entry(map, va, &map_entry) ||
501 map_entry->is_sub_map) {
502 /*
503 * that memory is not properly mapped
504 */
505 kr = KERN_INVALID_ARGUMENT;
506 goto done;
507 }
3e170ce0 508 object = VME_OBJECT(map_entry);
6d2010ae
A
509
510 if (object == VM_OBJECT_NULL) {
511 /*
512 * there should be a VM object here at this point
513 */
514 kr = KERN_INVALID_ARGUMENT;
515 goto done;
516 }
517 vm_object_lock(object);
518 object->set_cache_attr = TRUE;
519 vm_object_unlock(object);
520
521 vm_map_set_cache_attr_count++;
522done:
523 vm_map_unlock_read(map);
524
525 return kr;
526}
527
528
593a1d5f
A
529#if CONFIG_CODE_DECRYPTION
530/*
531 * vm_map_apple_protected:
532 * This remaps the requested part of the object with an object backed by
533 * the decrypting pager.
534 * crypt_info contains entry points and session data for the crypt module.
535 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
536 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
537 */
0c530ab8
A
538kern_return_t
539vm_map_apple_protected(
3e170ce0
A
540 vm_map_t map,
541 vm_map_offset_t start,
542 vm_map_offset_t end,
543 vm_object_offset_t crypto_backing_offset,
593a1d5f 544 struct pager_crypt_info *crypt_info)
0c530ab8
A
545{
546 boolean_t map_locked;
547 kern_return_t kr;
548 vm_map_entry_t map_entry;
3e170ce0
A
549 struct vm_map_entry tmp_entry;
550 memory_object_t unprotected_mem_obj;
0c530ab8
A
551 vm_object_t protected_object;
552 vm_map_offset_t map_addr;
3e170ce0
A
553 vm_map_offset_t start_aligned, end_aligned;
554 vm_object_offset_t crypto_start, crypto_end;
555 int vm_flags;
0c530ab8 556
3e170ce0
A
557 map_locked = FALSE;
558 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 559
3e170ce0
A
560 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
561 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
562 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
563 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 564
3e170ce0
A
565 assert(start_aligned == start);
566 assert(end_aligned == end);
b0d623f7 567
3e170ce0
A
568 map_addr = start_aligned;
569 for (map_addr = start_aligned;
570 map_addr < end;
571 map_addr = tmp_entry.vme_end) {
572 vm_map_lock(map);
573 map_locked = TRUE;
b0d623f7 574
3e170ce0
A
575 /* lookup the protected VM object */
576 if (!vm_map_lookup_entry(map,
577 map_addr,
578 &map_entry) ||
579 map_entry->is_sub_map ||
580 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
581 !(map_entry->protection & VM_PROT_EXECUTE)) {
582 /* that memory is not properly mapped */
583 kr = KERN_INVALID_ARGUMENT;
584 goto done;
585 }
b0d623f7 586
3e170ce0
A
587 /* get the protected object to be decrypted */
588 protected_object = VME_OBJECT(map_entry);
589 if (protected_object == VM_OBJECT_NULL) {
590 /* there should be a VM object here at this point */
591 kr = KERN_INVALID_ARGUMENT;
592 goto done;
593 }
594 /* ensure protected object stays alive while map is unlocked */
595 vm_object_reference(protected_object);
596
597 /* limit the map entry to the area we want to cover */
598 vm_map_clip_start(map, map_entry, start_aligned);
599 vm_map_clip_end(map, map_entry, end_aligned);
600
601 tmp_entry = *map_entry;
602 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
603 vm_map_unlock(map);
604 map_locked = FALSE;
605
606 /*
607 * This map entry might be only partially encrypted
608 * (if not fully "page-aligned").
609 */
610 crypto_start = 0;
611 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
612 if (tmp_entry.vme_start < start) {
613 if (tmp_entry.vme_start != start_aligned) {
614 kr = KERN_INVALID_ADDRESS;
615 }
616 crypto_start += (start - tmp_entry.vme_start);
617 }
618 if (tmp_entry.vme_end > end) {
619 if (tmp_entry.vme_end != end_aligned) {
620 kr = KERN_INVALID_ADDRESS;
621 }
622 crypto_end -= (tmp_entry.vme_end - end);
623 }
624
625 /*
626 * This "extra backing offset" is needed to get the decryption
627 * routine to use the right key. It adjusts for the possibly
628 * relative offset of an interposed "4K" pager...
629 */
630 if (crypto_backing_offset == (vm_object_offset_t) -1) {
631 crypto_backing_offset = VME_OFFSET(&tmp_entry);
632 }
0c530ab8 633
3e170ce0
A
634 /*
635 * Lookup (and create if necessary) the protected memory object
636 * matching that VM object.
637 * If successful, this also grabs a reference on the memory object,
638 * to guarantee that it doesn't go away before we get a chance to map
639 * it.
640 */
641 unprotected_mem_obj = apple_protect_pager_setup(
642 protected_object,
643 VME_OFFSET(&tmp_entry),
644 crypto_backing_offset,
645 crypt_info,
646 crypto_start,
647 crypto_end);
648
649 /* release extra ref on protected object */
650 vm_object_deallocate(protected_object);
651
652 if (unprotected_mem_obj == NULL) {
653 kr = KERN_FAILURE;
654 goto done;
655 }
656
657 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
658
659 /* map this memory object in place of the current one */
660 map_addr = tmp_entry.vme_start;
661 kr = vm_map_enter_mem_object(map,
662 &map_addr,
663 (tmp_entry.vme_end -
664 tmp_entry.vme_start),
665 (mach_vm_offset_t) 0,
666 vm_flags,
667 (ipc_port_t) unprotected_mem_obj,
668 0,
669 TRUE,
670 tmp_entry.protection,
671 tmp_entry.max_protection,
672 tmp_entry.inheritance);
673 assert(kr == KERN_SUCCESS);
674 assert(map_addr == tmp_entry.vme_start);
675
676#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
677 if (vm_map_debug_apple_protect) {
678 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
679 " backing:[object:%p,offset:0x%llx,"
680 "crypto_backing_offset:0x%llx,"
681 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
682 map,
683 (uint64_t) map_addr,
684 (uint64_t) (map_addr + (tmp_entry.vme_end -
685 tmp_entry.vme_start)),
686 unprotected_mem_obj,
687 protected_object,
688 VME_OFFSET(&tmp_entry),
689 crypto_backing_offset,
690 crypto_start,
691 crypto_end);
692 }
3e170ce0
A
693#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
694
695 /*
696 * Release the reference obtained by
697 * apple_protect_pager_setup().
698 * The mapping (if it succeeded) is now holding a reference on
699 * the memory object.
700 */
701 memory_object_deallocate(unprotected_mem_obj);
702 unprotected_mem_obj = MEMORY_OBJECT_NULL;
703
704 /* continue with next map entry */
705 crypto_backing_offset += (tmp_entry.vme_end -
706 tmp_entry.vme_start);
707 crypto_backing_offset -= crypto_start;
708 }
709 kr = KERN_SUCCESS;
0c530ab8
A
710
711done:
712 if (map_locked) {
3e170ce0 713 vm_map_unlock(map);
0c530ab8
A
714 }
715 return kr;
716}
593a1d5f 717#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
718
719
b0d623f7
A
720lck_grp_t vm_map_lck_grp;
721lck_grp_attr_t vm_map_lck_grp_attr;
722lck_attr_t vm_map_lck_attr;
fe8ab488 723lck_attr_t vm_map_lck_rw_attr;
b0d623f7
A
724
725
593a1d5f
A
726/*
727 * vm_map_init:
728 *
729 * Initialize the vm_map module. Must be called before
730 * any other vm_map routines.
731 *
732 * Map and entry structures are allocated from zones -- we must
733 * initialize those zones.
734 *
735 * There are three zones of interest:
736 *
737 * vm_map_zone: used to allocate maps.
738 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 739 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
740 *
741 * The kernel allocates map entries from a special zone that is initially
742 * "crammed" with memory. It would be difficult (perhaps impossible) for
743 * the kernel to allocate more memory to a entry zone when it became
744 * empty since the very act of allocating memory implies the creation
745 * of a new entry.
746 */
1c79356b
A
747void
748vm_map_init(
749 void)
750{
7ddcb079 751 vm_size_t entry_zone_alloc_size;
316670eb
A
752 const char *mez_name = "VM map entries";
753
2d21ac55
A
754 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
755 PAGE_SIZE, "maps");
0b4c1975 756 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
7ddcb079
A
757#if defined(__LP64__)
758 entry_zone_alloc_size = PAGE_SIZE * 5;
759#else
760 entry_zone_alloc_size = PAGE_SIZE * 6;
761#endif
91447636 762 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
7ddcb079 763 1024*1024, entry_zone_alloc_size,
316670eb 764 mez_name);
0b4c1975 765 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 766 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 767 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 768
7ddcb079
A
769 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
770 kentry_data_size * 64, kentry_data_size,
771 "Reserved VM map entries");
772 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
1c79356b 773
91447636 774 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
7ddcb079 775 16*1024, PAGE_SIZE, "VM map copies");
0b4c1975 776 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 777
3e170ce0
A
778 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
779 16*1024, PAGE_SIZE, "VM map holes");
780 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
781
1c79356b
A
782 /*
783 * Cram the map and kentry zones with initial data.
7ddcb079 784 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
785 */
786 zone_change(vm_map_zone, Z_COLLECT, FALSE);
39037602 787 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
7ddcb079
A
788
789 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
790 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
791 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
792 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
793 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 794 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 795 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 796
3e170ce0
A
797 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
798 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
799 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
800 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
801 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
802 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
803
804 /*
805 * Add the stolen memory to zones, adjust zone size and stolen counts.
806 */
7ddcb079
A
807 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
808 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
3e170ce0
A
809 zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
810 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
811
b0d623f7
A
812 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
813 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
814 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 815
fe8ab488
A
816 lck_attr_setdefault(&vm_map_lck_rw_attr);
817 lck_attr_cleardebug(&vm_map_lck_rw_attr);
818
39037602
A
819#if VM_MAP_DEBUG_APPLE_PROTECT
820 PE_parse_boot_argn("vm_map_debug_apple_protect",
821 &vm_map_debug_apple_protect,
822 sizeof(vm_map_debug_apple_protect));
823#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
824#if VM_MAP_DEBUG_APPLE_FOURK
825 PE_parse_boot_argn("vm_map_debug_fourk",
826 &vm_map_debug_fourk,
827 sizeof(vm_map_debug_fourk));
828#endif /* VM_MAP_DEBUG_FOURK */
1c79356b
A
829}
830
831void
832vm_map_steal_memory(
833 void)
834{
7ddcb079
A
835 uint32_t kentry_initial_pages;
836
b0d623f7 837 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
838 map_data = pmap_steal_memory(map_data_size);
839
1c79356b 840 /*
7ddcb079
A
841 * kentry_initial_pages corresponds to the number of kernel map entries
842 * required during bootstrap until the asynchronous replenishment
843 * scheme is activated and/or entries are available from the general
844 * map entry pool.
1c79356b 845 */
7ddcb079
A
846#if defined(__LP64__)
847 kentry_initial_pages = 10;
848#else
849 kentry_initial_pages = 6;
1c79356b 850#endif
316670eb
A
851
852#if CONFIG_GZALLOC
853 /* If using the guard allocator, reserve more memory for the kernel
854 * reserved map entry pool.
855 */
856 if (gzalloc_enabled())
857 kentry_initial_pages *= 1024;
858#endif
859
7ddcb079 860 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 861 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
862
863 map_holes_data_size = kentry_data_size;
864 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
865}
866
3e170ce0
A
867void
868vm_kernel_reserved_entry_init(void) {
7ddcb079 869 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
3e170ce0
A
870 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
871}
872
873void
874vm_map_disable_hole_optimization(vm_map_t map)
875{
876 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
877
878 if (map->holelistenabled) {
879
880 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
881
882 while (hole_entry != NULL) {
883
884 next_hole_entry = hole_entry->vme_next;
885
886 hole_entry->vme_next = NULL;
887 hole_entry->vme_prev = NULL;
888 zfree(vm_map_holes_zone, hole_entry);
889
890 if (next_hole_entry == head_entry) {
891 hole_entry = NULL;
892 } else {
893 hole_entry = next_hole_entry;
894 }
895 }
896
897 map->holes_list = NULL;
898 map->holelistenabled = FALSE;
899
900 map->first_free = vm_map_first_entry(map);
901 SAVE_HINT_HOLE_WRITE(map, NULL);
902 }
903}
904
905boolean_t
906vm_kernel_map_is_kernel(vm_map_t map) {
907 return (map->pmap == kernel_pmap);
7ddcb079
A
908}
909
1c79356b
A
910/*
911 * vm_map_create:
912 *
913 * Creates and returns a new empty VM map with
914 * the given physical map structure, and having
915 * the given lower and upper address bounds.
916 */
3e170ce0
A
917
918boolean_t vm_map_supports_hole_optimization = TRUE;
919
1c79356b
A
920vm_map_t
921vm_map_create(
91447636
A
922 pmap_t pmap,
923 vm_map_offset_t min,
924 vm_map_offset_t max,
925 boolean_t pageable)
1c79356b 926{
2d21ac55 927 static int color_seed = 0;
39037602 928 vm_map_t result;
3e170ce0 929 struct vm_map_links *hole_entry = NULL;
1c79356b
A
930
931 result = (vm_map_t) zalloc(vm_map_zone);
932 if (result == VM_MAP_NULL)
933 panic("vm_map_create");
934
935 vm_map_first_entry(result) = vm_map_to_entry(result);
936 vm_map_last_entry(result) = vm_map_to_entry(result);
937 result->hdr.nentries = 0;
938 result->hdr.entries_pageable = pageable;
939
6d2010ae
A
940 vm_map_store_init( &(result->hdr) );
941
39236c6e
A
942 result->hdr.page_shift = PAGE_SHIFT;
943
1c79356b 944 result->size = 0;
2d21ac55
A
945 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
946 result->user_wire_size = 0;
1c79356b
A
947 result->ref_count = 1;
948#if TASK_SWAPPER
949 result->res_count = 1;
950 result->sw_state = MAP_SW_IN;
951#endif /* TASK_SWAPPER */
952 result->pmap = pmap;
953 result->min_offset = min;
954 result->max_offset = max;
955 result->wiring_required = FALSE;
956 result->no_zero_fill = FALSE;
316670eb 957 result->mapped_in_other_pmaps = FALSE;
1c79356b 958 result->wait_for_space = FALSE;
b0d623f7 959 result->switch_protect = FALSE;
6d2010ae
A
960 result->disable_vmentry_reuse = FALSE;
961 result->map_disallow_data_exec = FALSE;
39037602 962 result->is_nested_map = FALSE;
6d2010ae 963 result->highest_entry_end = 0;
1c79356b
A
964 result->first_free = vm_map_to_entry(result);
965 result->hint = vm_map_to_entry(result);
2d21ac55 966 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae 967 result->jit_entry_exists = FALSE;
3e170ce0
A
968
969 if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
970 hole_entry = zalloc(vm_map_holes_zone);
971
972 hole_entry->start = min;
973 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
974 result->holes_list = result->hole_hint = hole_entry;
975 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
976 result->holelistenabled = TRUE;
977
978 } else {
979
980 result->holelistenabled = FALSE;
981 }
982
1c79356b 983 vm_map_lock_init(result);
b0d623f7
A
984 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
985
1c79356b
A
986 return(result);
987}
988
989/*
990 * vm_map_entry_create: [ internal use only ]
991 *
992 * Allocates a VM map entry for insertion in the
993 * given map (or map copy). No fields are filled.
994 */
7ddcb079 995#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 996
7ddcb079
A
997#define vm_map_copy_entry_create(copy, map_locked) \
998 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
999unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1000
91447636 1001static vm_map_entry_t
1c79356b 1002_vm_map_entry_create(
7ddcb079 1003 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1004{
7ddcb079
A
1005 zone_t zone;
1006 vm_map_entry_t entry;
1c79356b 1007
7ddcb079
A
1008 zone = vm_map_entry_zone;
1009
1010 assert(map_header->entries_pageable ? !map_locked : TRUE);
1011
1012 if (map_header->entries_pageable) {
1013 entry = (vm_map_entry_t) zalloc(zone);
1014 }
1015 else {
1016 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1017
1018 if (entry == VM_MAP_ENTRY_NULL) {
1019 zone = vm_map_entry_reserved_zone;
1020 entry = (vm_map_entry_t) zalloc(zone);
1021 OSAddAtomic(1, &reserved_zalloc_count);
1022 } else
1023 OSAddAtomic(1, &nonreserved_zalloc_count);
1024 }
1c79356b 1025
1c79356b
A
1026 if (entry == VM_MAP_ENTRY_NULL)
1027 panic("vm_map_entry_create");
7ddcb079
A
1028 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1029
6d2010ae 1030 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
316670eb 1031#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1032 entry->vme_creation_maphdr = map_header;
39037602
A
1033 backtrace(&entry->vme_creation_bt[0],
1034 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
316670eb 1035#endif
1c79356b
A
1036 return(entry);
1037}
1038
1039/*
1040 * vm_map_entry_dispose: [ internal use only ]
1041 *
1042 * Inverse of vm_map_entry_create.
2d21ac55
A
1043 *
1044 * write map lock held so no need to
1045 * do anything special to insure correctness
1046 * of the stores
1c79356b
A
1047 */
1048#define vm_map_entry_dispose(map, entry) \
6d2010ae 1049 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
1050
1051#define vm_map_copy_entry_dispose(map, entry) \
1052 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1053
91447636 1054static void
1c79356b 1055_vm_map_entry_dispose(
39037602
A
1056 struct vm_map_header *map_header,
1057 vm_map_entry_t entry)
1c79356b 1058{
39037602 1059 zone_t zone;
1c79356b 1060
7ddcb079 1061 if (map_header->entries_pageable || !(entry->from_reserved_zone))
2d21ac55 1062 zone = vm_map_entry_zone;
1c79356b 1063 else
7ddcb079
A
1064 zone = vm_map_entry_reserved_zone;
1065
1066 if (!map_header->entries_pageable) {
1067 if (zone == vm_map_entry_zone)
1068 OSAddAtomic(-1, &nonreserved_zalloc_count);
1069 else
1070 OSAddAtomic(-1, &reserved_zalloc_count);
1071 }
1c79356b 1072
91447636 1073 zfree(zone, entry);
1c79356b
A
1074}
1075
91447636 1076#if MACH_ASSERT
91447636 1077static boolean_t first_free_check = FALSE;
6d2010ae 1078boolean_t
1c79356b
A
1079first_free_is_valid(
1080 vm_map_t map)
1081{
1c79356b
A
1082 if (!first_free_check)
1083 return TRUE;
2d21ac55 1084
6d2010ae 1085 return( first_free_is_valid_store( map ));
1c79356b 1086}
91447636 1087#endif /* MACH_ASSERT */
1c79356b 1088
1c79356b
A
1089
1090#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1091 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
1092
1093#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1094 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1095
1c79356b 1096#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1097/*
1098 * vm_map_res_reference:
1099 *
1100 * Adds another valid residence count to the given map.
1101 *
1102 * Map is locked so this function can be called from
1103 * vm_map_swapin.
1104 *
1105 */
39037602 1106void vm_map_res_reference(vm_map_t map)
1c79356b
A
1107{
1108 /* assert map is locked */
1109 assert(map->res_count >= 0);
1110 assert(map->ref_count >= map->res_count);
1111 if (map->res_count == 0) {
b0d623f7 1112 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1113 vm_map_lock(map);
1114 vm_map_swapin(map);
b0d623f7 1115 lck_mtx_lock(&map->s_lock);
1c79356b
A
1116 ++map->res_count;
1117 vm_map_unlock(map);
1118 } else
1119 ++map->res_count;
1120}
1121
1122/*
1123 * vm_map_reference_swap:
1124 *
1125 * Adds valid reference and residence counts to the given map.
1126 *
1127 * The map may not be in memory (i.e. zero residence count).
1128 *
1129 */
39037602 1130void vm_map_reference_swap(vm_map_t map)
1c79356b
A
1131{
1132 assert(map != VM_MAP_NULL);
b0d623f7 1133 lck_mtx_lock(&map->s_lock);
1c79356b
A
1134 assert(map->res_count >= 0);
1135 assert(map->ref_count >= map->res_count);
1136 map->ref_count++;
1137 vm_map_res_reference(map);
b0d623f7 1138 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1139}
1140
1141/*
1142 * vm_map_res_deallocate:
1143 *
1144 * Decrement residence count on a map; possibly causing swapout.
1145 *
1146 * The map must be in memory (i.e. non-zero residence count).
1147 *
1148 * The map is locked, so this function is callable from vm_map_deallocate.
1149 *
1150 */
39037602 1151void vm_map_res_deallocate(vm_map_t map)
1c79356b
A
1152{
1153 assert(map->res_count > 0);
1154 if (--map->res_count == 0) {
b0d623f7 1155 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1156 vm_map_lock(map);
1157 vm_map_swapout(map);
1158 vm_map_unlock(map);
b0d623f7 1159 lck_mtx_lock(&map->s_lock);
1c79356b
A
1160 }
1161 assert(map->ref_count >= map->res_count);
1162}
1163#endif /* MACH_ASSERT && TASK_SWAPPER */
1164
1c79356b
A
1165/*
1166 * vm_map_destroy:
1167 *
1168 * Actually destroy a map.
1169 */
1170void
1171vm_map_destroy(
2d21ac55
A
1172 vm_map_t map,
1173 int flags)
91447636 1174{
1c79356b 1175 vm_map_lock(map);
2d21ac55 1176
3e170ce0
A
1177 /* final cleanup: no need to unnest shared region */
1178 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1179
2d21ac55
A
1180 /* clean up regular map entries */
1181 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1182 flags, VM_MAP_NULL);
1183 /* clean up leftover special mappings (commpage, etc...) */
2d21ac55
A
1184 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1185 flags, VM_MAP_NULL);
6d2010ae 1186
3e170ce0 1187 vm_map_disable_hole_optimization(map);
1c79356b
A
1188 vm_map_unlock(map);
1189
2d21ac55
A
1190 assert(map->hdr.nentries == 0);
1191
55e303ae
A
1192 if(map->pmap)
1193 pmap_destroy(map->pmap);
1c79356b 1194
39037602
A
1195 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1196 /*
1197 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1198 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1199 * structure or kalloc'ed via lck_mtx_init.
1200 * An example is s_lock_ext within struct _vm_map.
1201 *
1202 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1203 * can add another tag to detect embedded vs alloc'ed indirect external
1204 * mutexes but that'll be additional checks in the lock path and require
1205 * updating dependencies for the old vs new tag.
1206 *
1207 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1208 * just when lock debugging is ON, we choose to forego explicitly destroying
1209 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1210 * count on vm_map_lck_grp, which has no serious side-effect.
1211 */
1212 } else {
1213 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1214 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1215 }
1216
91447636 1217 zfree(vm_map_zone, map);
1c79356b
A
1218}
1219
1220#if TASK_SWAPPER
1221/*
1222 * vm_map_swapin/vm_map_swapout
1223 *
1224 * Swap a map in and out, either referencing or releasing its resources.
1225 * These functions are internal use only; however, they must be exported
1226 * because they may be called from macros, which are exported.
1227 *
1228 * In the case of swapout, there could be races on the residence count,
1229 * so if the residence count is up, we return, assuming that a
1230 * vm_map_deallocate() call in the near future will bring us back.
1231 *
1232 * Locking:
1233 * -- We use the map write lock for synchronization among races.
1234 * -- The map write lock, and not the simple s_lock, protects the
1235 * swap state of the map.
1236 * -- If a map entry is a share map, then we hold both locks, in
1237 * hierarchical order.
1238 *
1239 * Synchronization Notes:
1240 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1241 * will block on the map lock and proceed when swapout is through.
1242 * 2) A vm_map_reference() call at this time is illegal, and will
1243 * cause a panic. vm_map_reference() is only allowed on resident
1244 * maps, since it refuses to block.
1245 * 3) A vm_map_swapin() call during a swapin will block, and
1246 * proceeed when the first swapin is done, turning into a nop.
1247 * This is the reason the res_count is not incremented until
1248 * after the swapin is complete.
1249 * 4) There is a timing hole after the checks of the res_count, before
1250 * the map lock is taken, during which a swapin may get the lock
1251 * before a swapout about to happen. If this happens, the swapin
1252 * will detect the state and increment the reference count, causing
1253 * the swapout to be a nop, thereby delaying it until a later
1254 * vm_map_deallocate. If the swapout gets the lock first, then
1255 * the swapin will simply block until the swapout is done, and
1256 * then proceed.
1257 *
1258 * Because vm_map_swapin() is potentially an expensive operation, it
1259 * should be used with caution.
1260 *
1261 * Invariants:
1262 * 1) A map with a residence count of zero is either swapped, or
1263 * being swapped.
1264 * 2) A map with a non-zero residence count is either resident,
1265 * or being swapped in.
1266 */
1267
1268int vm_map_swap_enable = 1;
1269
1270void vm_map_swapin (vm_map_t map)
1271{
39037602 1272 vm_map_entry_t entry;
2d21ac55 1273
1c79356b
A
1274 if (!vm_map_swap_enable) /* debug */
1275 return;
1276
1277 /*
1278 * Map is locked
1279 * First deal with various races.
1280 */
1281 if (map->sw_state == MAP_SW_IN)
1282 /*
1283 * we raced with swapout and won. Returning will incr.
1284 * the res_count, turning the swapout into a nop.
1285 */
1286 return;
1287
1288 /*
1289 * The residence count must be zero. If we raced with another
1290 * swapin, the state would have been IN; if we raced with a
1291 * swapout (after another competing swapin), we must have lost
1292 * the race to get here (see above comment), in which case
1293 * res_count is still 0.
1294 */
1295 assert(map->res_count == 0);
1296
1297 /*
1298 * There are no intermediate states of a map going out or
1299 * coming in, since the map is locked during the transition.
1300 */
1301 assert(map->sw_state == MAP_SW_OUT);
1302
1303 /*
1304 * We now operate upon each map entry. If the entry is a sub-
1305 * or share-map, we call vm_map_res_reference upon it.
1306 * If the entry is an object, we call vm_object_res_reference
1307 * (this may iterate through the shadow chain).
1308 * Note that we hold the map locked the entire time,
1309 * even if we get back here via a recursive call in
1310 * vm_map_res_reference.
1311 */
1312 entry = vm_map_first_entry(map);
1313
1314 while (entry != vm_map_to_entry(map)) {
3e170ce0 1315 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1316 if (entry->is_sub_map) {
3e170ce0 1317 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1318 lck_mtx_lock(&lmap->s_lock);
1c79356b 1319 vm_map_res_reference(lmap);
b0d623f7 1320 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1321 } else {
3e170ce0 1322 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1323 vm_object_lock(object);
1324 /*
1325 * This call may iterate through the
1326 * shadow chain.
1327 */
1328 vm_object_res_reference(object);
1329 vm_object_unlock(object);
1330 }
1331 }
1332 entry = entry->vme_next;
1333 }
1334 assert(map->sw_state == MAP_SW_OUT);
1335 map->sw_state = MAP_SW_IN;
1336}
1337
1338void vm_map_swapout(vm_map_t map)
1339{
39037602 1340 vm_map_entry_t entry;
1c79356b
A
1341
1342 /*
1343 * Map is locked
1344 * First deal with various races.
1345 * If we raced with a swapin and lost, the residence count
1346 * will have been incremented to 1, and we simply return.
1347 */
b0d623f7 1348 lck_mtx_lock(&map->s_lock);
1c79356b 1349 if (map->res_count != 0) {
b0d623f7 1350 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1351 return;
1352 }
b0d623f7 1353 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1354
1355 /*
1356 * There are no intermediate states of a map going out or
1357 * coming in, since the map is locked during the transition.
1358 */
1359 assert(map->sw_state == MAP_SW_IN);
1360
1361 if (!vm_map_swap_enable)
1362 return;
1363
1364 /*
1365 * We now operate upon each map entry. If the entry is a sub-
1366 * or share-map, we call vm_map_res_deallocate upon it.
1367 * If the entry is an object, we call vm_object_res_deallocate
1368 * (this may iterate through the shadow chain).
1369 * Note that we hold the map locked the entire time,
1370 * even if we get back here via a recursive call in
1371 * vm_map_res_deallocate.
1372 */
1373 entry = vm_map_first_entry(map);
1374
1375 while (entry != vm_map_to_entry(map)) {
3e170ce0 1376 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1377 if (entry->is_sub_map) {
3e170ce0 1378 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1379 lck_mtx_lock(&lmap->s_lock);
1c79356b 1380 vm_map_res_deallocate(lmap);
b0d623f7 1381 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1382 } else {
3e170ce0 1383 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1384 vm_object_lock(object);
1385 /*
1386 * This call may take a long time,
1387 * since it could actively push
1388 * out pages (if we implement it
1389 * that way).
1390 */
1391 vm_object_res_deallocate(object);
1392 vm_object_unlock(object);
1393 }
1394 }
1395 entry = entry->vme_next;
1396 }
1397 assert(map->sw_state == MAP_SW_IN);
1398 map->sw_state = MAP_SW_OUT;
1399}
1400
1401#endif /* TASK_SWAPPER */
1402
1c79356b
A
1403/*
1404 * vm_map_lookup_entry: [ internal use only ]
1405 *
6d2010ae
A
1406 * Calls into the vm map store layer to find the map
1407 * entry containing (or immediately preceding) the
1408 * specified address in the given map; the entry is returned
1c79356b
A
1409 * in the "entry" parameter. The boolean
1410 * result indicates whether the address is
1411 * actually contained in the map.
1412 */
1413boolean_t
1414vm_map_lookup_entry(
39037602
A
1415 vm_map_t map,
1416 vm_map_offset_t address,
1c79356b
A
1417 vm_map_entry_t *entry) /* OUT */
1418{
6d2010ae 1419 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1420}
1421
1422/*
1423 * Routine: vm_map_find_space
1424 * Purpose:
1425 * Allocate a range in the specified virtual address map,
1426 * returning the entry allocated for that range.
1427 * Used by kmem_alloc, etc.
1428 *
1429 * The map must be NOT be locked. It will be returned locked
1430 * on KERN_SUCCESS, unlocked on failure.
1431 *
1432 * If an entry is allocated, the object/offset fields
1433 * are initialized to zero.
1434 */
1435kern_return_t
1436vm_map_find_space(
39037602 1437 vm_map_t map,
91447636
A
1438 vm_map_offset_t *address, /* OUT */
1439 vm_map_size_t size,
1440 vm_map_offset_t mask,
0c530ab8 1441 int flags,
1c79356b
A
1442 vm_map_entry_t *o_entry) /* OUT */
1443{
3e170ce0 1444 vm_map_entry_t entry, new_entry;
39037602
A
1445 vm_map_offset_t start;
1446 vm_map_offset_t end;
3e170ce0 1447 vm_map_entry_t hole_entry;
91447636
A
1448
1449 if (size == 0) {
1450 *address = 0;
1451 return KERN_INVALID_ARGUMENT;
1452 }
1c79356b 1453
2d21ac55
A
1454 if (flags & VM_FLAGS_GUARD_AFTER) {
1455 /* account for the back guard page in the size */
39236c6e 1456 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1457 }
1458
7ddcb079 1459 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1460
1461 /*
1462 * Look for the first possible address; if there's already
1463 * something at this address, we have to start after it.
1464 */
1465
1466 vm_map_lock(map);
1467
6d2010ae
A
1468 if( map->disable_vmentry_reuse == TRUE) {
1469 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1470 } else {
3e170ce0
A
1471 if (map->holelistenabled) {
1472 hole_entry = (vm_map_entry_t)map->holes_list;
1473
1474 if (hole_entry == NULL) {
1475 /*
1476 * No more space in the map?
1477 */
1478 vm_map_entry_dispose(map, new_entry);
1479 vm_map_unlock(map);
1480 return(KERN_NO_SPACE);
1481 }
1482
1483 entry = hole_entry;
1484 start = entry->vme_start;
1485 } else {
1486 assert(first_free_is_valid(map));
1487 if ((entry = map->first_free) == vm_map_to_entry(map))
1488 start = map->min_offset;
1489 else
1490 start = entry->vme_end;
1491 }
6d2010ae 1492 }
1c79356b
A
1493
1494 /*
1495 * In any case, the "entry" always precedes
1496 * the proposed new region throughout the loop:
1497 */
1498
1499 while (TRUE) {
39037602 1500 vm_map_entry_t next;
1c79356b
A
1501
1502 /*
1503 * Find the end of the proposed new region.
1504 * Be sure we didn't go beyond the end, or
1505 * wrap around the address.
1506 */
1507
2d21ac55
A
1508 if (flags & VM_FLAGS_GUARD_BEFORE) {
1509 /* reserve space for the front guard page */
39236c6e 1510 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1511 }
1c79356b 1512 end = ((start + mask) & ~mask);
2d21ac55 1513
1c79356b
A
1514 if (end < start) {
1515 vm_map_entry_dispose(map, new_entry);
1516 vm_map_unlock(map);
1517 return(KERN_NO_SPACE);
1518 }
1519 start = end;
1520 end += size;
1521
1522 if ((end > map->max_offset) || (end < start)) {
1523 vm_map_entry_dispose(map, new_entry);
1524 vm_map_unlock(map);
1525 return(KERN_NO_SPACE);
1526 }
1527
1c79356b 1528 next = entry->vme_next;
1c79356b 1529
3e170ce0
A
1530 if (map->holelistenabled) {
1531 if (entry->vme_end >= end)
1532 break;
1533 } else {
1534 /*
1535 * If there are no more entries, we must win.
1536 *
1537 * OR
1538 *
1539 * If there is another entry, it must be
1540 * after the end of the potential new region.
1541 */
1c79356b 1542
3e170ce0
A
1543 if (next == vm_map_to_entry(map))
1544 break;
1545
1546 if (next->vme_start >= end)
1547 break;
1548 }
1c79356b
A
1549
1550 /*
1551 * Didn't fit -- move to the next entry.
1552 */
1553
1554 entry = next;
3e170ce0
A
1555
1556 if (map->holelistenabled) {
1557 if (entry == (vm_map_entry_t) map->holes_list) {
1558 /*
1559 * Wrapped around
1560 */
1561 vm_map_entry_dispose(map, new_entry);
1562 vm_map_unlock(map);
1563 return(KERN_NO_SPACE);
1564 }
1565 start = entry->vme_start;
1566 } else {
1567 start = entry->vme_end;
1568 }
1569 }
1570
1571 if (map->holelistenabled) {
1572 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1573 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1574 }
1c79356b
A
1575 }
1576
1577 /*
1578 * At this point,
1579 * "start" and "end" should define the endpoints of the
1580 * available new range, and
1581 * "entry" should refer to the region before the new
1582 * range, and
1583 *
1584 * the map should be locked.
1585 */
1586
2d21ac55
A
1587 if (flags & VM_FLAGS_GUARD_BEFORE) {
1588 /* go back for the front guard page */
39236c6e 1589 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1590 }
1c79356b
A
1591 *address = start;
1592
e2d2fc5c 1593 assert(start < end);
1c79356b
A
1594 new_entry->vme_start = start;
1595 new_entry->vme_end = end;
1596 assert(page_aligned(new_entry->vme_start));
1597 assert(page_aligned(new_entry->vme_end));
39236c6e
A
1598 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1599 VM_MAP_PAGE_MASK(map)));
1600 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1601 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1602
1603 new_entry->is_shared = FALSE;
1604 new_entry->is_sub_map = FALSE;
fe8ab488 1605 new_entry->use_pmap = TRUE;
3e170ce0
A
1606 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1607 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1608
1609 new_entry->needs_copy = FALSE;
1610
1611 new_entry->inheritance = VM_INHERIT_DEFAULT;
1612 new_entry->protection = VM_PROT_DEFAULT;
1613 new_entry->max_protection = VM_PROT_ALL;
1614 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1615 new_entry->wired_count = 0;
1616 new_entry->user_wired_count = 0;
1617
1618 new_entry->in_transition = FALSE;
1619 new_entry->needs_wakeup = FALSE;
2d21ac55 1620 new_entry->no_cache = FALSE;
b0d623f7 1621 new_entry->permanent = FALSE;
39236c6e
A
1622 new_entry->superpage_size = FALSE;
1623 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1624 new_entry->map_aligned = TRUE;
1625 } else {
1626 new_entry->map_aligned = FALSE;
1627 }
2d21ac55 1628
3e170ce0 1629 new_entry->used_for_jit = FALSE;
b0d623f7 1630 new_entry->zero_wired_pages = FALSE;
fe8ab488 1631 new_entry->iokit_acct = FALSE;
3e170ce0
A
1632 new_entry->vme_resilient_codesign = FALSE;
1633 new_entry->vme_resilient_media = FALSE;
39037602
A
1634 if (flags & VM_FLAGS_ATOMIC_ENTRY)
1635 new_entry->vme_atomic = TRUE;
1636 else
1637 new_entry->vme_atomic = FALSE;
1c79356b 1638
3e170ce0
A
1639 int alias;
1640 VM_GET_FLAGS_ALIAS(flags, alias);
1641 VME_ALIAS_SET(new_entry, alias);
0c530ab8 1642
1c79356b
A
1643 /*
1644 * Insert the new entry into the list
1645 */
1646
6d2010ae 1647 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1648
1649 map->size += size;
1650
1651 /*
1652 * Update the lookup hint
1653 */
0c530ab8 1654 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1655
1656 *o_entry = new_entry;
1657 return(KERN_SUCCESS);
1658}
1659
1660int vm_map_pmap_enter_print = FALSE;
1661int vm_map_pmap_enter_enable = FALSE;
1662
1663/*
91447636 1664 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1665 *
1666 * Description:
1667 * Force pages from the specified object to be entered into
1668 * the pmap at the specified address if they are present.
1669 * As soon as a page not found in the object the scan ends.
1670 *
1671 * Returns:
1672 * Nothing.
1673 *
1674 * In/out conditions:
1675 * The source map should not be locked on entry.
1676 */
fe8ab488 1677__unused static void
1c79356b
A
1678vm_map_pmap_enter(
1679 vm_map_t map,
39037602
A
1680 vm_map_offset_t addr,
1681 vm_map_offset_t end_addr,
1682 vm_object_t object,
1c79356b
A
1683 vm_object_offset_t offset,
1684 vm_prot_t protection)
1685{
2d21ac55
A
1686 int type_of_fault;
1687 kern_return_t kr;
0b4e3aa0 1688
55e303ae
A
1689 if(map->pmap == 0)
1690 return;
1691
1c79356b 1692 while (addr < end_addr) {
39037602 1693 vm_page_t m;
1c79356b 1694
fe8ab488
A
1695
1696 /*
1697 * TODO:
1698 * From vm_map_enter(), we come into this function without the map
1699 * lock held or the object lock held.
1700 * We haven't taken a reference on the object either.
1701 * We should do a proper lookup on the map to make sure
1702 * that things are sane before we go locking objects that
1703 * could have been deallocated from under us.
1704 */
1705
1c79356b 1706 vm_object_lock(object);
1c79356b
A
1707
1708 m = vm_page_lookup(object, offset);
91447636
A
1709 /*
1710 * ENCRYPTED SWAP:
1711 * The user should never see encrypted data, so do not
1712 * enter an encrypted page in the page table.
1713 */
1714 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
2d21ac55
A
1715 m->fictitious ||
1716 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1717 vm_object_unlock(object);
1718 return;
1719 }
1720
1c79356b
A
1721 if (vm_map_pmap_enter_print) {
1722 printf("vm_map_pmap_enter:");
2d21ac55
A
1723 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1724 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1725 }
2d21ac55 1726 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae 1727 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
fe8ab488
A
1728 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1729 0, /* XXX need user tag / alias? */
1730 0, /* alternate accounting? */
1731 NULL,
2d21ac55 1732 &type_of_fault);
1c79356b 1733
1c79356b
A
1734 vm_object_unlock(object);
1735
1736 offset += PAGE_SIZE_64;
1737 addr += PAGE_SIZE;
1738 }
1739}
1740
91447636
A
1741boolean_t vm_map_pmap_is_empty(
1742 vm_map_t map,
1743 vm_map_offset_t start,
1744 vm_map_offset_t end);
1745boolean_t vm_map_pmap_is_empty(
1746 vm_map_t map,
1747 vm_map_offset_t start,
1748 vm_map_offset_t end)
1749{
2d21ac55
A
1750#ifdef MACHINE_PMAP_IS_EMPTY
1751 return pmap_is_empty(map->pmap, start, end);
1752#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1753 vm_map_offset_t offset;
1754 ppnum_t phys_page;
1755
1756 if (map->pmap == NULL) {
1757 return TRUE;
1758 }
2d21ac55 1759
91447636
A
1760 for (offset = start;
1761 offset < end;
1762 offset += PAGE_SIZE) {
1763 phys_page = pmap_find_phys(map->pmap, offset);
1764 if (phys_page) {
1765 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1766 "page %d at 0x%llx\n",
2d21ac55
A
1767 map, (long long)start, (long long)end,
1768 phys_page, (long long)offset);
91447636
A
1769 return FALSE;
1770 }
1771 }
1772 return TRUE;
2d21ac55 1773#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1774}
1775
316670eb
A
1776#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1777kern_return_t
1778vm_map_random_address_for_size(
1779 vm_map_t map,
1780 vm_map_offset_t *address,
1781 vm_map_size_t size)
1782{
1783 kern_return_t kr = KERN_SUCCESS;
1784 int tries = 0;
1785 vm_map_offset_t random_addr = 0;
1786 vm_map_offset_t hole_end;
1787
1788 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1789 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1790 vm_map_size_t vm_hole_size = 0;
1791 vm_map_size_t addr_space_size;
1792
1793 addr_space_size = vm_map_max(map) - vm_map_min(map);
1794
1795 assert(page_aligned(size));
1796
1797 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1798 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e
A
1799 random_addr = vm_map_trunc_page(
1800 vm_map_min(map) +(random_addr % addr_space_size),
1801 VM_MAP_PAGE_MASK(map));
316670eb
A
1802
1803 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1804 if (prev_entry == vm_map_to_entry(map)) {
1805 next_entry = vm_map_first_entry(map);
1806 } else {
1807 next_entry = prev_entry->vme_next;
1808 }
1809 if (next_entry == vm_map_to_entry(map)) {
1810 hole_end = vm_map_max(map);
1811 } else {
1812 hole_end = next_entry->vme_start;
1813 }
1814 vm_hole_size = hole_end - random_addr;
1815 if (vm_hole_size >= size) {
1816 *address = random_addr;
1817 break;
1818 }
1819 }
1820 tries++;
1821 }
1822
1823 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1824 kr = KERN_NO_SPACE;
1825 }
1826 return kr;
1827}
1828
1c79356b
A
1829/*
1830 * Routine: vm_map_enter
1831 *
1832 * Description:
1833 * Allocate a range in the specified virtual address map.
1834 * The resulting range will refer to memory defined by
1835 * the given memory object and offset into that object.
1836 *
1837 * Arguments are as defined in the vm_map call.
1838 */
91447636
A
1839int _map_enter_debug = 0;
1840static unsigned int vm_map_enter_restore_successes = 0;
1841static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1842kern_return_t
1843vm_map_enter(
91447636 1844 vm_map_t map,
593a1d5f 1845 vm_map_offset_t *address, /* IN/OUT */
91447636 1846 vm_map_size_t size,
593a1d5f 1847 vm_map_offset_t mask,
1c79356b
A
1848 int flags,
1849 vm_object_t object,
1850 vm_object_offset_t offset,
1851 boolean_t needs_copy,
1852 vm_prot_t cur_protection,
1853 vm_prot_t max_protection,
1854 vm_inherit_t inheritance)
1855{
91447636 1856 vm_map_entry_t entry, new_entry;
2d21ac55 1857 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1858 vm_map_offset_t end, tmp_end;
b0d623f7
A
1859 vm_map_offset_t tmp2_start, tmp2_end;
1860 vm_map_offset_t step;
1c79356b 1861 kern_return_t result = KERN_SUCCESS;
91447636
A
1862 vm_map_t zap_old_map = VM_MAP_NULL;
1863 vm_map_t zap_new_map = VM_MAP_NULL;
1864 boolean_t map_locked = FALSE;
1865 boolean_t pmap_empty = TRUE;
1866 boolean_t new_mapping_established = FALSE;
fe8ab488 1867 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
91447636
A
1868 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1869 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1870 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55
A
1871 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1872 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
b0d623f7 1873 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
316670eb 1874 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
fe8ab488 1875 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
3e170ce0
A
1876 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1877 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
39037602 1878 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
b0d623f7 1879 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3e170ce0 1880 vm_tag_t alias, user_alias;
2d21ac55 1881 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f 1882 kern_return_t kr;
39236c6e 1883 boolean_t clear_map_aligned = FALSE;
3e170ce0 1884 vm_map_entry_t hole_entry;
593a1d5f 1885
b0d623f7
A
1886 if (superpage_size) {
1887 switch (superpage_size) {
1888 /*
1889 * Note that the current implementation only supports
1890 * a single size for superpages, SUPERPAGE_SIZE, per
1891 * architecture. As soon as more sizes are supposed
1892 * to be supported, SUPERPAGE_SIZE has to be replaced
1893 * with a lookup of the size depending on superpage_size.
1894 */
1895#ifdef __x86_64__
6d2010ae
A
1896 case SUPERPAGE_SIZE_ANY:
1897 /* handle it like 2 MB and round up to page size */
1898 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
1899 case SUPERPAGE_SIZE_2MB:
1900 break;
1901#endif
1902 default:
1903 return KERN_INVALID_ARGUMENT;
1904 }
1905 mask = SUPERPAGE_SIZE-1;
1906 if (size & (SUPERPAGE_SIZE-1))
1907 return KERN_INVALID_ARGUMENT;
1908 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1909 }
1910
6d2010ae 1911
1c79356b 1912
3e170ce0
A
1913 if (resilient_codesign || resilient_media) {
1914 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1915 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1916 return KERN_PROTECTION_FAILURE;
1917 }
1918 }
1919
2d21ac55
A
1920 if (is_submap) {
1921 if (purgable) {
1922 /* submaps can not be purgeable */
1923 return KERN_INVALID_ARGUMENT;
1924 }
1925 if (object == VM_OBJECT_NULL) {
1926 /* submaps can not be created lazily */
1927 return KERN_INVALID_ARGUMENT;
1928 }
1929 }
1930 if (flags & VM_FLAGS_ALREADY) {
1931 /*
1932 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1933 * is already present. For it to be meaningul, the requested
1934 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1935 * we shouldn't try and remove what was mapped there first
1936 * (!VM_FLAGS_OVERWRITE).
1937 */
1938 if ((flags & VM_FLAGS_ANYWHERE) ||
1939 (flags & VM_FLAGS_OVERWRITE)) {
1940 return KERN_INVALID_ARGUMENT;
1941 }
1942 }
1943
6d2010ae 1944 effective_min_offset = map->min_offset;
b0d623f7 1945
2d21ac55
A
1946 if (flags & VM_FLAGS_BEYOND_MAX) {
1947 /*
b0d623f7 1948 * Allow an insertion beyond the map's max offset.
2d21ac55
A
1949 */
1950 if (vm_map_is_64bit(map))
1951 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1952 else
1953 effective_max_offset = 0x00000000FFFFF000ULL;
1954 } else {
1955 effective_max_offset = map->max_offset;
1956 }
1957
1958 if (size == 0 ||
1959 (offset & PAGE_MASK_64) != 0) {
91447636
A
1960 *address = 0;
1961 return KERN_INVALID_ARGUMENT;
1962 }
1963
1c79356b 1964 VM_GET_FLAGS_ALIAS(flags, alias);
3e170ce0
A
1965 if (map->pmap == kernel_pmap) {
1966 user_alias = VM_KERN_MEMORY_NONE;
1967 } else {
1968 user_alias = alias;
1969 }
2d21ac55 1970
1c79356b
A
1971#define RETURN(value) { result = value; goto BailOut; }
1972
1973 assert(page_aligned(*address));
1974 assert(page_aligned(size));
91447636 1975
39236c6e
A
1976 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1977 /*
1978 * In most cases, the caller rounds the size up to the
1979 * map's page size.
1980 * If we get a size that is explicitly not map-aligned here,
1981 * we'll have to respect the caller's wish and mark the
1982 * mapping as "not map-aligned" to avoid tripping the
1983 * map alignment checks later.
1984 */
1985 clear_map_aligned = TRUE;
1986 }
fe8ab488
A
1987 if (!anywhere &&
1988 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1989 /*
1990 * We've been asked to map at a fixed address and that
1991 * address is not aligned to the map's specific alignment.
1992 * The caller should know what it's doing (i.e. most likely
1993 * mapping some fragmented copy map, transferring memory from
1994 * a VM map with a different alignment), so clear map_aligned
1995 * for this new VM map entry and proceed.
1996 */
1997 clear_map_aligned = TRUE;
1998 }
39236c6e 1999
91447636
A
2000 /*
2001 * Only zero-fill objects are allowed to be purgable.
2002 * LP64todo - limit purgable objects to 32-bits for now
2003 */
2004 if (purgable &&
2005 (offset != 0 ||
2006 (object != VM_OBJECT_NULL &&
6d2010ae 2007 (object->vo_size != size ||
2d21ac55 2008 object->purgable == VM_PURGABLE_DENY))
b0d623f7 2009 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
2010 return KERN_INVALID_ARGUMENT;
2011
2012 if (!anywhere && overwrite) {
2013 /*
2014 * Create a temporary VM map to hold the old mappings in the
2015 * affected area while we create the new one.
2016 * This avoids releasing the VM map lock in
2017 * vm_map_entry_delete() and allows atomicity
2018 * when we want to replace some mappings with a new one.
2019 * It also allows us to restore the old VM mappings if the
2020 * new mapping fails.
2021 */
2022 zap_old_map = vm_map_create(PMAP_NULL,
2023 *address,
2024 *address + size,
b0d623f7 2025 map->hdr.entries_pageable);
39236c6e 2026 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2027 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2028 }
2029
2d21ac55 2030StartAgain: ;
1c79356b
A
2031
2032 start = *address;
2033
2034 if (anywhere) {
2035 vm_map_lock(map);
91447636 2036 map_locked = TRUE;
6d2010ae 2037
316670eb
A
2038 if (entry_for_jit) {
2039 if (map->jit_entry_exists) {
2040 result = KERN_INVALID_ARGUMENT;
2041 goto BailOut;
2042 }
39037602
A
2043 random_address = TRUE;
2044 }
2045
2046 if (random_address) {
316670eb
A
2047 /*
2048 * Get a random start address.
2049 */
2050 result = vm_map_random_address_for_size(map, address, size);
2051 if (result != KERN_SUCCESS) {
2052 goto BailOut;
2053 }
2054 start = *address;
6d2010ae 2055 }
1c79356b 2056
316670eb 2057
1c79356b
A
2058 /*
2059 * Calculate the first possible address.
2060 */
2061
2d21ac55
A
2062 if (start < effective_min_offset)
2063 start = effective_min_offset;
2064 if (start > effective_max_offset)
1c79356b
A
2065 RETURN(KERN_NO_SPACE);
2066
2067 /*
2068 * Look for the first possible address;
2069 * if there's already something at this
2070 * address, we have to start after it.
2071 */
2072
6d2010ae
A
2073 if( map->disable_vmentry_reuse == TRUE) {
2074 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2075 } else {
6d2010ae 2076
3e170ce0
A
2077 if (map->holelistenabled) {
2078 hole_entry = (vm_map_entry_t)map->holes_list;
2079
2080 if (hole_entry == NULL) {
2081 /*
2082 * No more space in the map?
2083 */
2084 result = KERN_NO_SPACE;
2085 goto BailOut;
2086 } else {
2087
2088 boolean_t found_hole = FALSE;
2089
2090 do {
2091 if (hole_entry->vme_start >= start) {
2092 start = hole_entry->vme_start;
2093 found_hole = TRUE;
2094 break;
2095 }
2096
2097 if (hole_entry->vme_end > start) {
2098 found_hole = TRUE;
2099 break;
2100 }
2101 hole_entry = hole_entry->vme_next;
2102
2103 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2104
2105 if (found_hole == FALSE) {
2106 result = KERN_NO_SPACE;
2107 goto BailOut;
2108 }
2109
2110 entry = hole_entry;
6d2010ae 2111
3e170ce0
A
2112 if (start == 0)
2113 start += PAGE_SIZE_64;
2114 }
6d2010ae 2115 } else {
3e170ce0
A
2116 assert(first_free_is_valid(map));
2117
2118 entry = map->first_free;
2119
2120 if (entry == vm_map_to_entry(map)) {
6d2010ae 2121 entry = NULL;
3e170ce0
A
2122 } else {
2123 if (entry->vme_next == vm_map_to_entry(map)){
2124 /*
2125 * Hole at the end of the map.
2126 */
2127 entry = NULL;
2128 } else {
2129 if (start < (entry->vme_next)->vme_start ) {
2130 start = entry->vme_end;
2131 start = vm_map_round_page(start,
2132 VM_MAP_PAGE_MASK(map));
2133 } else {
2134 /*
2135 * Need to do a lookup.
2136 */
2137 entry = NULL;
2138 }
2139 }
2140 }
2141
2142 if (entry == NULL) {
2143 vm_map_entry_t tmp_entry;
2144 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2145 assert(!entry_for_jit);
2146 start = tmp_entry->vme_end;
39236c6e
A
2147 start = vm_map_round_page(start,
2148 VM_MAP_PAGE_MASK(map));
6d2010ae 2149 }
3e170ce0 2150 entry = tmp_entry;
316670eb 2151 }
6d2010ae 2152 }
1c79356b
A
2153 }
2154
2155 /*
2156 * In any case, the "entry" always precedes
2157 * the proposed new region throughout the
2158 * loop:
2159 */
2160
2161 while (TRUE) {
39037602 2162 vm_map_entry_t next;
1c79356b 2163
2d21ac55 2164 /*
1c79356b
A
2165 * Find the end of the proposed new region.
2166 * Be sure we didn't go beyond the end, or
2167 * wrap around the address.
2168 */
2169
2170 end = ((start + mask) & ~mask);
39236c6e
A
2171 end = vm_map_round_page(end,
2172 VM_MAP_PAGE_MASK(map));
1c79356b
A
2173 if (end < start)
2174 RETURN(KERN_NO_SPACE);
2175 start = end;
39236c6e
A
2176 assert(VM_MAP_PAGE_ALIGNED(start,
2177 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2178 end += size;
2179
2d21ac55 2180 if ((end > effective_max_offset) || (end < start)) {
1c79356b 2181 if (map->wait_for_space) {
fe8ab488 2182 assert(!keep_map_locked);
2d21ac55
A
2183 if (size <= (effective_max_offset -
2184 effective_min_offset)) {
1c79356b
A
2185 assert_wait((event_t)map,
2186 THREAD_ABORTSAFE);
2187 vm_map_unlock(map);
91447636
A
2188 map_locked = FALSE;
2189 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2190 goto StartAgain;
2191 }
2192 }
2193 RETURN(KERN_NO_SPACE);
2194 }
2195
1c79356b 2196 next = entry->vme_next;
1c79356b 2197
3e170ce0
A
2198 if (map->holelistenabled) {
2199 if (entry->vme_end >= end)
2200 break;
2201 } else {
2202 /*
2203 * If there are no more entries, we must win.
2204 *
2205 * OR
2206 *
2207 * If there is another entry, it must be
2208 * after the end of the potential new region.
2209 */
1c79356b 2210
3e170ce0
A
2211 if (next == vm_map_to_entry(map))
2212 break;
2213
2214 if (next->vme_start >= end)
2215 break;
2216 }
1c79356b
A
2217
2218 /*
2219 * Didn't fit -- move to the next entry.
2220 */
2221
2222 entry = next;
3e170ce0
A
2223
2224 if (map->holelistenabled) {
2225 if (entry == (vm_map_entry_t) map->holes_list) {
2226 /*
2227 * Wrapped around
2228 */
2229 result = KERN_NO_SPACE;
2230 goto BailOut;
2231 }
2232 start = entry->vme_start;
2233 } else {
2234 start = entry->vme_end;
2235 }
2236
39236c6e
A
2237 start = vm_map_round_page(start,
2238 VM_MAP_PAGE_MASK(map));
1c79356b 2239 }
3e170ce0
A
2240
2241 if (map->holelistenabled) {
2242 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2243 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2244 }
2245 }
2246
1c79356b 2247 *address = start;
39236c6e
A
2248 assert(VM_MAP_PAGE_ALIGNED(*address,
2249 VM_MAP_PAGE_MASK(map)));
1c79356b 2250 } else {
1c79356b
A
2251 /*
2252 * Verify that:
2253 * the address doesn't itself violate
2254 * the mask requirement.
2255 */
2256
2257 vm_map_lock(map);
91447636 2258 map_locked = TRUE;
1c79356b
A
2259 if ((start & mask) != 0)
2260 RETURN(KERN_NO_SPACE);
2261
2262 /*
2263 * ... the address is within bounds
2264 */
2265
2266 end = start + size;
2267
2d21ac55
A
2268 if ((start < effective_min_offset) ||
2269 (end > effective_max_offset) ||
1c79356b
A
2270 (start >= end)) {
2271 RETURN(KERN_INVALID_ADDRESS);
2272 }
2273
91447636
A
2274 if (overwrite && zap_old_map != VM_MAP_NULL) {
2275 /*
2276 * Fixed mapping and "overwrite" flag: attempt to
2277 * remove all existing mappings in the specified
2278 * address range, saving them in our "zap_old_map".
2279 */
2280 (void) vm_map_delete(map, start, end,
fe8ab488
A
2281 (VM_MAP_REMOVE_SAVE_ENTRIES |
2282 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2283 zap_old_map);
2284 }
2285
1c79356b
A
2286 /*
2287 * ... the starting address isn't allocated
2288 */
2289
2d21ac55
A
2290 if (vm_map_lookup_entry(map, start, &entry)) {
2291 if (! (flags & VM_FLAGS_ALREADY)) {
2292 RETURN(KERN_NO_SPACE);
2293 }
2294 /*
2295 * Check if what's already there is what we want.
2296 */
2297 tmp_start = start;
2298 tmp_offset = offset;
2299 if (entry->vme_start < start) {
2300 tmp_start -= start - entry->vme_start;
2301 tmp_offset -= start - entry->vme_start;
2302
2303 }
2304 for (; entry->vme_start < end;
2305 entry = entry->vme_next) {
4a3eedf9
A
2306 /*
2307 * Check if the mapping's attributes
2308 * match the existing map entry.
2309 */
2d21ac55
A
2310 if (entry == vm_map_to_entry(map) ||
2311 entry->vme_start != tmp_start ||
2312 entry->is_sub_map != is_submap ||
3e170ce0 2313 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2314 entry->needs_copy != needs_copy ||
2315 entry->protection != cur_protection ||
2316 entry->max_protection != max_protection ||
2317 entry->inheritance != inheritance ||
fe8ab488 2318 entry->iokit_acct != iokit_acct ||
3e170ce0 2319 VME_ALIAS(entry) != alias) {
2d21ac55
A
2320 /* not the same mapping ! */
2321 RETURN(KERN_NO_SPACE);
2322 }
4a3eedf9
A
2323 /*
2324 * Check if the same object is being mapped.
2325 */
2326 if (is_submap) {
3e170ce0 2327 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2328 (vm_map_t) object) {
2329 /* not the same submap */
2330 RETURN(KERN_NO_SPACE);
2331 }
2332 } else {
3e170ce0 2333 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2334 /* not the same VM object... */
2335 vm_object_t obj2;
2336
3e170ce0 2337 obj2 = VME_OBJECT(entry);
4a3eedf9
A
2338 if ((obj2 == VM_OBJECT_NULL ||
2339 obj2->internal) &&
2340 (object == VM_OBJECT_NULL ||
2341 object->internal)) {
2342 /*
2343 * ... but both are
2344 * anonymous memory,
2345 * so equivalent.
2346 */
2347 } else {
2348 RETURN(KERN_NO_SPACE);
2349 }
2350 }
2351 }
2352
2d21ac55
A
2353 tmp_offset += entry->vme_end - entry->vme_start;
2354 tmp_start += entry->vme_end - entry->vme_start;
2355 if (entry->vme_end >= end) {
2356 /* reached the end of our mapping */
2357 break;
2358 }
2359 }
2360 /* it all matches: let's use what's already there ! */
2361 RETURN(KERN_MEMORY_PRESENT);
2362 }
1c79356b
A
2363
2364 /*
2365 * ... the next region doesn't overlap the
2366 * end point.
2367 */
2368
2369 if ((entry->vme_next != vm_map_to_entry(map)) &&
2370 (entry->vme_next->vme_start < end))
2371 RETURN(KERN_NO_SPACE);
2372 }
2373
2374 /*
2375 * At this point,
2376 * "start" and "end" should define the endpoints of the
2377 * available new range, and
2378 * "entry" should refer to the region before the new
2379 * range, and
2380 *
2381 * the map should be locked.
2382 */
2383
2384 /*
2385 * See whether we can avoid creating a new entry (and object) by
2386 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2387 * extend from below.] Note that we can never extend/join
2388 * purgable objects because they need to remain distinct
2389 * entities in order to implement their "volatile object"
2390 * semantics.
1c79356b
A
2391 */
2392
316670eb 2393 if (purgable || entry_for_jit) {
91447636 2394 if (object == VM_OBJECT_NULL) {
3e170ce0 2395
91447636
A
2396 object = vm_object_allocate(size);
2397 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
fe8ab488 2398 object->true_share = TRUE;
316670eb 2399 if (purgable) {
fe8ab488 2400 task_t owner;
316670eb 2401 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2402 if (map->pmap == kernel_pmap) {
2403 /*
2404 * Purgeable mappings made in a kernel
2405 * map are "owned" by the kernel itself
2406 * rather than the current user task
2407 * because they're likely to be used by
2408 * more than this user task (see
2409 * execargs_purgeable_allocate(), for
2410 * example).
2411 */
2412 owner = kernel_task;
2413 } else {
2414 owner = current_task();
2415 }
2416 assert(object->vo_purgeable_owner == NULL);
2417 assert(object->resident_page_count == 0);
2418 assert(object->wired_page_count == 0);
2419 vm_object_lock(object);
2420 vm_purgeable_nonvolatile_enqueue(object, owner);
2421 vm_object_unlock(object);
316670eb 2422 }
91447636
A
2423 offset = (vm_object_offset_t)0;
2424 }
2d21ac55
A
2425 } else if ((is_submap == FALSE) &&
2426 (object == VM_OBJECT_NULL) &&
2427 (entry != vm_map_to_entry(map)) &&
2428 (entry->vme_end == start) &&
2429 (!entry->is_shared) &&
2430 (!entry->is_sub_map) &&
fe8ab488
A
2431 (!entry->in_transition) &&
2432 (!entry->needs_wakeup) &&
2433 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2d21ac55
A
2434 (entry->protection == cur_protection) &&
2435 (entry->max_protection == max_protection) &&
fe8ab488 2436 (entry->inheritance == inheritance) &&
3e170ce0
A
2437 ((user_alias == VM_MEMORY_REALLOC) ||
2438 (VME_ALIAS(entry) == alias)) &&
2d21ac55 2439 (entry->no_cache == no_cache) &&
fe8ab488
A
2440 (entry->permanent == permanent) &&
2441 (!entry->superpage_size && !superpage_size) &&
39236c6e
A
2442 /*
2443 * No coalescing if not map-aligned, to avoid propagating
2444 * that condition any further than needed:
2445 */
2446 (!entry->map_aligned || !clear_map_aligned) &&
fe8ab488
A
2447 (!entry->zero_wired_pages) &&
2448 (!entry->used_for_jit && !entry_for_jit) &&
2449 (entry->iokit_acct == iokit_acct) &&
3e170ce0
A
2450 (!entry->vme_resilient_codesign) &&
2451 (!entry->vme_resilient_media) &&
39037602 2452 (!entry->vme_atomic) &&
fe8ab488 2453
b0d623f7 2454 ((entry->vme_end - entry->vme_start) + size <=
3e170ce0 2455 (user_alias == VM_MEMORY_REALLOC ?
b0d623f7
A
2456 ANON_CHUNK_SIZE :
2457 NO_COALESCE_LIMIT)) &&
fe8ab488 2458
2d21ac55 2459 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2460 if (vm_object_coalesce(VME_OBJECT(entry),
2d21ac55 2461 VM_OBJECT_NULL,
3e170ce0 2462 VME_OFFSET(entry),
2d21ac55
A
2463 (vm_object_offset_t) 0,
2464 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2465 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2466
2467 /*
2468 * Coalesced the two objects - can extend
2469 * the previous map entry to include the
2470 * new range.
2471 */
2472 map->size += (end - entry->vme_end);
e2d2fc5c 2473 assert(entry->vme_start < end);
39236c6e
A
2474 assert(VM_MAP_PAGE_ALIGNED(end,
2475 VM_MAP_PAGE_MASK(map)));
3e170ce0
A
2476 if (__improbable(vm_debug_events))
2477 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
1c79356b 2478 entry->vme_end = end;
3e170ce0
A
2479 if (map->holelistenabled) {
2480 vm_map_store_update_first_free(map, entry, TRUE);
2481 } else {
2482 vm_map_store_update_first_free(map, map->first_free, TRUE);
2483 }
fe8ab488 2484 new_mapping_established = TRUE;
1c79356b
A
2485 RETURN(KERN_SUCCESS);
2486 }
2487 }
2488
b0d623f7
A
2489 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2490 new_entry = NULL;
2491
2492 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2493 tmp2_end = tmp2_start + step;
2494 /*
2495 * Create a new entry
2496 * LP64todo - for now, we can only allocate 4GB internal objects
2497 * because the default pager can't page bigger ones. Remove this
2498 * when it can.
2499 *
2500 * XXX FBDP
2501 * The reserved "page zero" in each process's address space can
2502 * be arbitrarily large. Splitting it into separate 4GB objects and
2503 * therefore different VM map entries serves no purpose and just
2504 * slows down operations on the VM map, so let's not split the
2505 * allocation into 4GB chunks if the max protection is NONE. That
2506 * memory should never be accessible, so it will never get to the
2507 * default pager.
2508 */
2509 tmp_start = tmp2_start;
2510 if (object == VM_OBJECT_NULL &&
2511 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2512 max_protection != VM_PROT_NONE &&
2513 superpage_size == 0)
2514 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2515 else
2516 tmp_end = tmp2_end;
2517 do {
2518 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2519 object, offset, needs_copy,
2520 FALSE, FALSE,
2521 cur_protection, max_protection,
2522 VM_BEHAVIOR_DEFAULT,
316670eb 2523 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
6d2010ae 2524 0, no_cache,
39236c6e
A
2525 permanent,
2526 superpage_size,
fe8ab488
A
2527 clear_map_aligned,
2528 is_submap);
3e170ce0
A
2529
2530 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2531 VME_ALIAS_SET(new_entry, alias);
2532
316670eb 2533 if (entry_for_jit){
6d2010ae
A
2534 if (!(map->jit_entry_exists)){
2535 new_entry->used_for_jit = TRUE;
2536 map->jit_entry_exists = TRUE;
2537 }
2538 }
2539
3e170ce0
A
2540 if (resilient_codesign &&
2541 ! ((cur_protection | max_protection) &
2542 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2543 new_entry->vme_resilient_codesign = TRUE;
2544 }
2545
2546 if (resilient_media &&
2547 ! ((cur_protection | max_protection) &
2548 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2549 new_entry->vme_resilient_media = TRUE;
2550 }
2551
fe8ab488
A
2552 assert(!new_entry->iokit_acct);
2553 if (!is_submap &&
2554 object != VM_OBJECT_NULL &&
2555 object->purgable != VM_PURGABLE_DENY) {
2556 assert(new_entry->use_pmap);
2557 assert(!new_entry->iokit_acct);
2558 /*
2559 * Turn off pmap accounting since
2560 * purgeable objects have their
2561 * own ledgers.
2562 */
2563 new_entry->use_pmap = FALSE;
2564 } else if (!is_submap &&
ecc0ceb4
A
2565 iokit_acct &&
2566 object != VM_OBJECT_NULL &&
2567 object->internal) {
fe8ab488
A
2568 /* alternate accounting */
2569 assert(!new_entry->iokit_acct);
2570 assert(new_entry->use_pmap);
2571 new_entry->iokit_acct = TRUE;
2572 new_entry->use_pmap = FALSE;
ecc0ceb4
A
2573 DTRACE_VM4(
2574 vm_map_iokit_mapped_region,
2575 vm_map_t, map,
2576 vm_map_offset_t, new_entry->vme_start,
2577 vm_map_offset_t, new_entry->vme_end,
2578 int, VME_ALIAS(new_entry));
fe8ab488
A
2579 vm_map_iokit_mapped_region(
2580 map,
2581 (new_entry->vme_end -
2582 new_entry->vme_start));
2583 } else if (!is_submap) {
2584 assert(!new_entry->iokit_acct);
2585 assert(new_entry->use_pmap);
2586 }
2587
b0d623f7
A
2588 if (is_submap) {
2589 vm_map_t submap;
2590 boolean_t submap_is_64bit;
2591 boolean_t use_pmap;
2592
fe8ab488
A
2593 assert(new_entry->is_sub_map);
2594 assert(!new_entry->use_pmap);
2595 assert(!new_entry->iokit_acct);
b0d623f7
A
2596 submap = (vm_map_t) object;
2597 submap_is_64bit = vm_map_is_64bit(submap);
3e170ce0 2598 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
fe8ab488 2599#ifndef NO_NESTED_PMAP
b0d623f7 2600 if (use_pmap && submap->pmap == NULL) {
316670eb 2601 ledger_t ledger = map->pmap->ledger;
b0d623f7 2602 /* we need a sub pmap to nest... */
316670eb
A
2603 submap->pmap = pmap_create(ledger, 0,
2604 submap_is_64bit);
b0d623f7
A
2605 if (submap->pmap == NULL) {
2606 /* let's proceed without nesting... */
2607 }
2d21ac55 2608 }
b0d623f7
A
2609 if (use_pmap && submap->pmap != NULL) {
2610 kr = pmap_nest(map->pmap,
2611 submap->pmap,
2612 tmp_start,
2613 tmp_start,
2614 tmp_end - tmp_start);
2615 if (kr != KERN_SUCCESS) {
2616 printf("vm_map_enter: "
2617 "pmap_nest(0x%llx,0x%llx) "
2618 "error 0x%x\n",
2619 (long long)tmp_start,
2620 (long long)tmp_end,
2621 kr);
2622 } else {
2623 /* we're now nested ! */
2624 new_entry->use_pmap = TRUE;
2625 pmap_empty = FALSE;
2626 }
2627 }
fe8ab488 2628#endif /* NO_NESTED_PMAP */
2d21ac55 2629 }
b0d623f7
A
2630 entry = new_entry;
2631
2632 if (superpage_size) {
2633 vm_page_t pages, m;
2634 vm_object_t sp_object;
2635
3e170ce0 2636 VME_OFFSET_SET(entry, 0);
b0d623f7
A
2637
2638 /* allocate one superpage */
2639 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 2640 if (kr != KERN_SUCCESS) {
3e170ce0
A
2641 /* deallocate whole range... */
2642 new_mapping_established = TRUE;
2643 /* ... but only up to "tmp_end" */
2644 size -= end - tmp_end;
b0d623f7
A
2645 RETURN(kr);
2646 }
2647
2648 /* create one vm_object per superpage */
2649 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2650 sp_object->phys_contiguous = TRUE;
39037602 2651 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
3e170ce0 2652 VME_OBJECT_SET(entry, sp_object);
fe8ab488 2653 assert(entry->use_pmap);
b0d623f7
A
2654
2655 /* enter the base pages into the object */
2656 vm_object_lock(sp_object);
2657 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2658 m = pages;
39037602 2659 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
2660 pages = NEXT_PAGE(m);
2661 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3e170ce0 2662 vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 2663 }
b0d623f7 2664 vm_object_unlock(sp_object);
2d21ac55 2665 }
b0d623f7
A
2666 } while (tmp_end != tmp2_end &&
2667 (tmp_start = tmp_end) &&
2668 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2669 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2670 }
91447636 2671
91447636 2672 new_mapping_established = TRUE;
1c79356b 2673
fe8ab488
A
2674BailOut:
2675 assert(map_locked == TRUE);
2d21ac55 2676
593a1d5f
A
2677 if (result == KERN_SUCCESS) {
2678 vm_prot_t pager_prot;
2679 memory_object_t pager;
91447636 2680
fe8ab488 2681#if DEBUG
593a1d5f
A
2682 if (pmap_empty &&
2683 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2684 assert(vm_map_pmap_is_empty(map,
2685 *address,
2686 *address+size));
2687 }
fe8ab488 2688#endif /* DEBUG */
593a1d5f
A
2689
2690 /*
2691 * For "named" VM objects, let the pager know that the
2692 * memory object is being mapped. Some pagers need to keep
2693 * track of this, to know when they can reclaim the memory
2694 * object, for example.
2695 * VM calls memory_object_map() for each mapping (specifying
2696 * the protection of each mapping) and calls
2697 * memory_object_last_unmap() when all the mappings are gone.
2698 */
2699 pager_prot = max_protection;
2700 if (needs_copy) {
2701 /*
2702 * Copy-On-Write mapping: won't modify
2703 * the memory object.
2704 */
2705 pager_prot &= ~VM_PROT_WRITE;
2706 }
2707 if (!is_submap &&
2708 object != VM_OBJECT_NULL &&
2709 object->named &&
2710 object->pager != MEMORY_OBJECT_NULL) {
2711 vm_object_lock(object);
2712 pager = object->pager;
2713 if (object->named &&
2714 pager != MEMORY_OBJECT_NULL) {
2715 assert(object->pager_ready);
2716 vm_object_mapping_wait(object, THREAD_UNINT);
2717 vm_object_mapping_begin(object);
2718 vm_object_unlock(object);
2719
2720 kr = memory_object_map(pager, pager_prot);
2721 assert(kr == KERN_SUCCESS);
2722
2723 vm_object_lock(object);
2724 vm_object_mapping_end(object);
2725 }
2726 vm_object_unlock(object);
2727 }
fe8ab488
A
2728 }
2729
2730 assert(map_locked == TRUE);
2731
2732 if (!keep_map_locked) {
2733 vm_map_unlock(map);
2734 map_locked = FALSE;
2735 }
2736
2737 /*
2738 * We can't hold the map lock if we enter this block.
2739 */
2740
2741 if (result == KERN_SUCCESS) {
2742
2743 /* Wire down the new entry if the user
2744 * requested all new map entries be wired.
2745 */
2746 if ((map->wiring_required)||(superpage_size)) {
2747 assert(!keep_map_locked);
2748 pmap_empty = FALSE; /* pmap won't be empty */
2749 kr = vm_map_wire(map, start, end,
3e170ce0
A
2750 new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2751 TRUE);
fe8ab488
A
2752 result = kr;
2753 }
2754
2755 }
2756
2757 if (result != KERN_SUCCESS) {
91447636
A
2758 if (new_mapping_established) {
2759 /*
2760 * We have to get rid of the new mappings since we
2761 * won't make them available to the user.
2762 * Try and do that atomically, to minimize the risk
2763 * that someone else create new mappings that range.
2764 */
2765 zap_new_map = vm_map_create(PMAP_NULL,
2766 *address,
2767 *address + size,
b0d623f7 2768 map->hdr.entries_pageable);
39236c6e
A
2769 vm_map_set_page_shift(zap_new_map,
2770 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
2771 vm_map_disable_hole_optimization(zap_new_map);
2772
91447636
A
2773 if (!map_locked) {
2774 vm_map_lock(map);
2775 map_locked = TRUE;
2776 }
2777 (void) vm_map_delete(map, *address, *address+size,
fe8ab488
A
2778 (VM_MAP_REMOVE_SAVE_ENTRIES |
2779 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2780 zap_new_map);
2781 }
2782 if (zap_old_map != VM_MAP_NULL &&
2783 zap_old_map->hdr.nentries != 0) {
2784 vm_map_entry_t entry1, entry2;
2785
2786 /*
2787 * The new mapping failed. Attempt to restore
2788 * the old mappings, saved in the "zap_old_map".
2789 */
2790 if (!map_locked) {
2791 vm_map_lock(map);
2792 map_locked = TRUE;
2793 }
2794
2795 /* first check if the coast is still clear */
2796 start = vm_map_first_entry(zap_old_map)->vme_start;
2797 end = vm_map_last_entry(zap_old_map)->vme_end;
2798 if (vm_map_lookup_entry(map, start, &entry1) ||
2799 vm_map_lookup_entry(map, end, &entry2) ||
2800 entry1 != entry2) {
2801 /*
2802 * Part of that range has already been
2803 * re-mapped: we can't restore the old
2804 * mappings...
2805 */
2806 vm_map_enter_restore_failures++;
2807 } else {
2808 /*
2809 * Transfer the saved map entries from
2810 * "zap_old_map" to the original "map",
2811 * inserting them all after "entry1".
2812 */
2813 for (entry2 = vm_map_first_entry(zap_old_map);
2814 entry2 != vm_map_to_entry(zap_old_map);
2815 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2816 vm_map_size_t entry_size;
2817
2818 entry_size = (entry2->vme_end -
2819 entry2->vme_start);
6d2010ae 2820 vm_map_store_entry_unlink(zap_old_map,
91447636 2821 entry2);
2d21ac55 2822 zap_old_map->size -= entry_size;
6d2010ae 2823 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2824 map->size += entry_size;
91447636
A
2825 entry1 = entry2;
2826 }
2827 if (map->wiring_required) {
2828 /*
2829 * XXX TODO: we should rewire the
2830 * old pages here...
2831 */
2832 }
2833 vm_map_enter_restore_successes++;
2834 }
2835 }
2836 }
2837
fe8ab488
A
2838 /*
2839 * The caller is responsible for releasing the lock if it requested to
2840 * keep the map locked.
2841 */
2842 if (map_locked && !keep_map_locked) {
91447636
A
2843 vm_map_unlock(map);
2844 }
2845
2846 /*
2847 * Get rid of the "zap_maps" and all the map entries that
2848 * they may still contain.
2849 */
2850 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 2851 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2852 zap_old_map = VM_MAP_NULL;
2853 }
2854 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 2855 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2856 zap_new_map = VM_MAP_NULL;
2857 }
2858
2859 return result;
1c79356b
A
2860
2861#undef RETURN
2862}
2863
3e170ce0 2864
fe8ab488
A
2865/*
2866 * Counters for the prefault optimization.
2867 */
2868int64_t vm_prefault_nb_pages = 0;
2869int64_t vm_prefault_nb_bailout = 0;
2870
2871static kern_return_t
2872vm_map_enter_mem_object_helper(
2d21ac55
A
2873 vm_map_t target_map,
2874 vm_map_offset_t *address,
2875 vm_map_size_t initial_size,
2876 vm_map_offset_t mask,
2877 int flags,
2878 ipc_port_t port,
2879 vm_object_offset_t offset,
2880 boolean_t copy,
2881 vm_prot_t cur_protection,
2882 vm_prot_t max_protection,
fe8ab488
A
2883 vm_inherit_t inheritance,
2884 upl_page_list_ptr_t page_list,
2885 unsigned int page_list_count)
91447636 2886{
2d21ac55
A
2887 vm_map_address_t map_addr;
2888 vm_map_size_t map_size;
2889 vm_object_t object;
2890 vm_object_size_t size;
2891 kern_return_t result;
6d2010ae 2892 boolean_t mask_cur_protection, mask_max_protection;
fe8ab488 2893 boolean_t try_prefault = (page_list_count != 0);
3e170ce0 2894 vm_map_offset_t offset_in_mapping = 0;
6d2010ae
A
2895
2896 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2897 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2898 cur_protection &= ~VM_PROT_IS_MASK;
2899 max_protection &= ~VM_PROT_IS_MASK;
91447636
A
2900
2901 /*
2d21ac55 2902 * Check arguments for validity
91447636 2903 */
2d21ac55
A
2904 if ((target_map == VM_MAP_NULL) ||
2905 (cur_protection & ~VM_PROT_ALL) ||
2906 (max_protection & ~VM_PROT_ALL) ||
2907 (inheritance > VM_INHERIT_LAST_VALID) ||
fe8ab488 2908 (try_prefault && (copy || !page_list)) ||
3e170ce0 2909 initial_size == 0) {
2d21ac55 2910 return KERN_INVALID_ARGUMENT;
3e170ce0 2911 }
6d2010ae 2912
3e170ce0
A
2913 {
2914 map_addr = vm_map_trunc_page(*address,
2915 VM_MAP_PAGE_MASK(target_map));
2916 map_size = vm_map_round_page(initial_size,
2917 VM_MAP_PAGE_MASK(target_map));
2918 }
39236c6e 2919 size = vm_object_round_page(initial_size);
593a1d5f 2920
2d21ac55
A
2921 /*
2922 * Find the vm object (if any) corresponding to this port.
2923 */
2924 if (!IP_VALID(port)) {
2925 object = VM_OBJECT_NULL;
2926 offset = 0;
2927 copy = FALSE;
2928 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2929 vm_named_entry_t named_entry;
2930
2931 named_entry = (vm_named_entry_t) port->ip_kobject;
39236c6e 2932
3e170ce0
A
2933 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2934 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
2935 offset += named_entry->data_offset;
2936 }
2937
2d21ac55
A
2938 /* a few checks to make sure user is obeying rules */
2939 if (size == 0) {
2940 if (offset >= named_entry->size)
2941 return KERN_INVALID_RIGHT;
2942 size = named_entry->size - offset;
2943 }
6d2010ae
A
2944 if (mask_max_protection) {
2945 max_protection &= named_entry->protection;
2946 }
2947 if (mask_cur_protection) {
2948 cur_protection &= named_entry->protection;
2949 }
2d21ac55
A
2950 if ((named_entry->protection & max_protection) !=
2951 max_protection)
2952 return KERN_INVALID_RIGHT;
2953 if ((named_entry->protection & cur_protection) !=
2954 cur_protection)
2955 return KERN_INVALID_RIGHT;
22ba694c
A
2956 if (offset + size < offset) {
2957 /* overflow */
2958 return KERN_INVALID_ARGUMENT;
2959 }
3e170ce0 2960 if (named_entry->size < (offset + initial_size)) {
2d21ac55 2961 return KERN_INVALID_ARGUMENT;
3e170ce0 2962 }
2d21ac55 2963
39236c6e
A
2964 if (named_entry->is_copy) {
2965 /* for a vm_map_copy, we can only map it whole */
2966 if ((size != named_entry->size) &&
2967 (vm_map_round_page(size,
2968 VM_MAP_PAGE_MASK(target_map)) ==
2969 named_entry->size)) {
2970 /* XXX FBDP use the rounded size... */
2971 size = vm_map_round_page(
2972 size,
2973 VM_MAP_PAGE_MASK(target_map));
2974 }
2975
fe8ab488
A
2976 if (!(flags & VM_FLAGS_ANYWHERE) &&
2977 (offset != 0 ||
2978 size != named_entry->size)) {
2979 /*
2980 * XXX for a mapping at a "fixed" address,
2981 * we can't trim after mapping the whole
2982 * memory entry, so reject a request for a
2983 * partial mapping.
2984 */
39236c6e
A
2985 return KERN_INVALID_ARGUMENT;
2986 }
2987 }
2988
2d21ac55
A
2989 /* the callers parameter offset is defined to be the */
2990 /* offset from beginning of named entry offset in object */
2991 offset = offset + named_entry->offset;
2992
39236c6e
A
2993 if (! VM_MAP_PAGE_ALIGNED(size,
2994 VM_MAP_PAGE_MASK(target_map))) {
2995 /*
2996 * Let's not map more than requested;
2997 * vm_map_enter() will handle this "not map-aligned"
2998 * case.
2999 */
3000 map_size = size;
3001 }
3002
2d21ac55
A
3003 named_entry_lock(named_entry);
3004 if (named_entry->is_sub_map) {
3005 vm_map_t submap;
3006
3e170ce0
A
3007 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3008 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3009 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3010 }
3011
2d21ac55
A
3012 submap = named_entry->backing.map;
3013 vm_map_lock(submap);
3014 vm_map_reference(submap);
3015 vm_map_unlock(submap);
3016 named_entry_unlock(named_entry);
3017
3018 result = vm_map_enter(target_map,
3019 &map_addr,
3020 map_size,
3021 mask,
3022 flags | VM_FLAGS_SUBMAP,
3023 (vm_object_t) submap,
3024 offset,
3025 copy,
3026 cur_protection,
3027 max_protection,
3028 inheritance);
3029 if (result != KERN_SUCCESS) {
3030 vm_map_deallocate(submap);
3031 } else {
3032 /*
3033 * No need to lock "submap" just to check its
3034 * "mapped" flag: that flag is never reset
3035 * once it's been set and if we race, we'll
3036 * just end up setting it twice, which is OK.
3037 */
316670eb
A
3038 if (submap->mapped_in_other_pmaps == FALSE &&
3039 vm_map_pmap(submap) != PMAP_NULL &&
3040 vm_map_pmap(submap) !=
3041 vm_map_pmap(target_map)) {
2d21ac55 3042 /*
316670eb
A
3043 * This submap is being mapped in a map
3044 * that uses a different pmap.
3045 * Set its "mapped_in_other_pmaps" flag
3046 * to indicate that we now need to
3047 * remove mappings from all pmaps rather
3048 * than just the submap's pmap.
2d21ac55
A
3049 */
3050 vm_map_lock(submap);
316670eb 3051 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
3052 vm_map_unlock(submap);
3053 }
3054 *address = map_addr;
3055 }
3056 return result;
3057
3058 } else if (named_entry->is_pager) {
3059 unsigned int access;
3060 vm_prot_t protections;
3061 unsigned int wimg_mode;
2d21ac55
A
3062
3063 protections = named_entry->protection & VM_PROT_ALL;
3064 access = GET_MAP_MEM(named_entry->protection);
3065
3e170ce0
A
3066 if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3067 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3068 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3069 }
3070
2d21ac55
A
3071 object = vm_object_enter(named_entry->backing.pager,
3072 named_entry->size,
3073 named_entry->internal,
3074 FALSE,
3075 FALSE);
3076 if (object == VM_OBJECT_NULL) {
3077 named_entry_unlock(named_entry);
3078 return KERN_INVALID_OBJECT;
3079 }
3080
3081 /* JMM - drop reference on pager here */
3082
3083 /* create an extra ref for the named entry */
3084 vm_object_lock(object);
3085 vm_object_reference_locked(object);
3086 named_entry->backing.object = object;
3087 named_entry->is_pager = FALSE;
3088 named_entry_unlock(named_entry);
3089
3090 wimg_mode = object->wimg_bits;
6d2010ae 3091
2d21ac55
A
3092 if (access == MAP_MEM_IO) {
3093 wimg_mode = VM_WIMG_IO;
3094 } else if (access == MAP_MEM_COPYBACK) {
3095 wimg_mode = VM_WIMG_USE_DEFAULT;
316670eb
A
3096 } else if (access == MAP_MEM_INNERWBACK) {
3097 wimg_mode = VM_WIMG_INNERWBACK;
2d21ac55
A
3098 } else if (access == MAP_MEM_WTHRU) {
3099 wimg_mode = VM_WIMG_WTHRU;
3100 } else if (access == MAP_MEM_WCOMB) {
3101 wimg_mode = VM_WIMG_WCOMB;
3102 }
2d21ac55
A
3103
3104 /* wait for object (if any) to be ready */
3105 if (!named_entry->internal) {
3106 while (!object->pager_ready) {
3107 vm_object_wait(
3108 object,
3109 VM_OBJECT_EVENT_PAGER_READY,
3110 THREAD_UNINT);
3111 vm_object_lock(object);
3112 }
3113 }
3114
6d2010ae
A
3115 if (object->wimg_bits != wimg_mode)
3116 vm_object_change_wimg_mode(object, wimg_mode);
2d21ac55 3117
fe8ab488
A
3118#if VM_OBJECT_TRACKING_OP_TRUESHARE
3119 if (!object->true_share &&
3120 vm_object_tracking_inited) {
3121 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3122 int num = 0;
3123
3124 num = OSBacktrace(bt,
3125 VM_OBJECT_TRACKING_BTDEPTH);
3126 btlog_add_entry(vm_object_tracking_btlog,
3127 object,
3128 VM_OBJECT_TRACKING_OP_TRUESHARE,
3129 bt,
3130 num);
3131 }
3132#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3133
2d21ac55 3134 object->true_share = TRUE;
6d2010ae 3135
2d21ac55
A
3136 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3137 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3138 vm_object_unlock(object);
39236c6e
A
3139
3140 } else if (named_entry->is_copy) {
3141 kern_return_t kr;
3142 vm_map_copy_t copy_map;
3143 vm_map_entry_t copy_entry;
3144 vm_map_offset_t copy_addr;
3145
3146 if (flags & ~(VM_FLAGS_FIXED |
3147 VM_FLAGS_ANYWHERE |
3148 VM_FLAGS_OVERWRITE |
d190cdc3 3149 VM_FLAGS_IOKIT_ACCT |
3e170ce0 3150 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3151 VM_FLAGS_RETURN_DATA_ADDR |
3152 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
3153 named_entry_unlock(named_entry);
3154 return KERN_INVALID_ARGUMENT;
3155 }
3156
3e170ce0
A
3157 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3158 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3159 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3160 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3161 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3162 offset = vm_object_trunc_page(offset);
3163 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3164 }
3165
3166 copy_map = named_entry->backing.copy;
3167 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3168 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3169 /* unsupported type; should not happen */
3170 printf("vm_map_enter_mem_object: "
3171 "memory_entry->backing.copy "
3172 "unsupported type 0x%x\n",
3173 copy_map->type);
3174 named_entry_unlock(named_entry);
3175 return KERN_INVALID_ARGUMENT;
3176 }
3177
3178 /* reserve a contiguous range */
3179 kr = vm_map_enter(target_map,
3180 &map_addr,
fe8ab488
A
3181 /* map whole mem entry, trim later: */
3182 named_entry->size,
39236c6e
A
3183 mask,
3184 flags & (VM_FLAGS_ANYWHERE |
3185 VM_FLAGS_OVERWRITE |
d190cdc3 3186 VM_FLAGS_IOKIT_ACCT |
3e170ce0 3187 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3188 VM_FLAGS_RETURN_DATA_ADDR |
3189 VM_FLAGS_ALIAS_MASK),
39236c6e
A
3190 VM_OBJECT_NULL,
3191 0,
3192 FALSE, /* copy */
3193 cur_protection,
3194 max_protection,
3195 inheritance);
3196 if (kr != KERN_SUCCESS) {
3197 named_entry_unlock(named_entry);
3198 return kr;
3199 }
3200
3201 copy_addr = map_addr;
3202
3203 for (copy_entry = vm_map_copy_first_entry(copy_map);
3204 copy_entry != vm_map_copy_to_entry(copy_map);
3205 copy_entry = copy_entry->vme_next) {
3206 int remap_flags = 0;
3207 vm_map_t copy_submap;
3208 vm_object_t copy_object;
3209 vm_map_size_t copy_size;
3210 vm_object_offset_t copy_offset;
39037602 3211 int copy_vm_alias;
39236c6e 3212
3e170ce0 3213 copy_offset = VME_OFFSET(copy_entry);
39236c6e
A
3214 copy_size = (copy_entry->vme_end -
3215 copy_entry->vme_start);
39037602
A
3216 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3217 if (copy_vm_alias == 0) {
3218 /*
3219 * Caller does not want a specific
3220 * alias for this new mapping: use
3221 * the alias of the original mapping.
3222 */
3223 copy_vm_alias = VME_ALIAS(copy_entry);
3224 }
39236c6e
A
3225
3226 /* sanity check */
fe8ab488
A
3227 if ((copy_addr + copy_size) >
3228 (map_addr +
3229 named_entry->size /* XXX full size */ )) {
39236c6e
A
3230 /* over-mapping too much !? */
3231 kr = KERN_INVALID_ARGUMENT;
3232 /* abort */
3233 break;
3234 }
3235
3236 /* take a reference on the object */
3237 if (copy_entry->is_sub_map) {
3238 remap_flags |= VM_FLAGS_SUBMAP;
3e170ce0 3239 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
3240 vm_map_lock(copy_submap);
3241 vm_map_reference(copy_submap);
3242 vm_map_unlock(copy_submap);
3243 copy_object = (vm_object_t) copy_submap;
3244 } else {
3e170ce0 3245 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
3246 vm_object_reference(copy_object);
3247 }
3248
3249 /* over-map the object into destination */
3250 remap_flags |= flags;
3251 remap_flags |= VM_FLAGS_FIXED;
3252 remap_flags |= VM_FLAGS_OVERWRITE;
3253 remap_flags &= ~VM_FLAGS_ANYWHERE;
39037602 3254 remap_flags |= VM_MAKE_TAG(copy_vm_alias);
39236c6e
A
3255 kr = vm_map_enter(target_map,
3256 &copy_addr,
3257 copy_size,
3258 (vm_map_offset_t) 0,
3259 remap_flags,
3260 copy_object,
3261 copy_offset,
3262 copy,
3263 cur_protection,
3264 max_protection,
3265 inheritance);
3266 if (kr != KERN_SUCCESS) {
3267 if (copy_entry->is_sub_map) {
3268 vm_map_deallocate(copy_submap);
3269 } else {
3270 vm_object_deallocate(copy_object);
3271 }
3272 /* abort */
3273 break;
3274 }
3275
3276 /* next mapping */
3277 copy_addr += copy_size;
3278 }
3279
3280 if (kr == KERN_SUCCESS) {
3e170ce0
A
3281 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3282 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3283 *address = map_addr + offset_in_mapping;
3284 } else {
3285 *address = map_addr;
3286 }
fe8ab488
A
3287
3288 if (offset) {
3289 /*
3290 * Trim in front, from 0 to "offset".
3291 */
3292 vm_map_remove(target_map,
3293 map_addr,
3294 map_addr + offset,
3295 0);
3296 *address += offset;
3297 }
3298 if (offset + map_size < named_entry->size) {
3299 /*
3300 * Trim in back, from
3301 * "offset + map_size" to
3302 * "named_entry->size".
3303 */
3304 vm_map_remove(target_map,
3305 (map_addr +
3306 offset + map_size),
3307 (map_addr +
3308 named_entry->size),
3309 0);
3310 }
39236c6e
A
3311 }
3312 named_entry_unlock(named_entry);
3313
3314 if (kr != KERN_SUCCESS) {
3315 if (! (flags & VM_FLAGS_OVERWRITE)) {
3316 /* deallocate the contiguous range */
3317 (void) vm_deallocate(target_map,
3318 map_addr,
3319 map_size);
3320 }
3321 }
3322
3323 return kr;
3324
2d21ac55
A
3325 } else {
3326 /* This is the case where we are going to map */
3327 /* an already mapped object. If the object is */
3328 /* not ready it is internal. An external */
3329 /* object cannot be mapped until it is ready */
3330 /* we can therefore avoid the ready check */
3331 /* in this case. */
3e170ce0
A
3332 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3333 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3334 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3335 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3336 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3337 offset = vm_object_trunc_page(offset);
3338 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3339 }
3340
2d21ac55
A
3341 object = named_entry->backing.object;
3342 assert(object != VM_OBJECT_NULL);
3343 named_entry_unlock(named_entry);
3344 vm_object_reference(object);
3345 }
3346 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3347 /*
3348 * JMM - This is temporary until we unify named entries
3349 * and raw memory objects.
3350 *
3351 * Detected fake ip_kotype for a memory object. In
3352 * this case, the port isn't really a port at all, but
3353 * instead is just a raw memory object.
3354 */
3e170ce0
A
3355 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3356 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3357 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3358 }
3359
2d21ac55
A
3360 object = vm_object_enter((memory_object_t)port,
3361 size, FALSE, FALSE, FALSE);
3362 if (object == VM_OBJECT_NULL)
3363 return KERN_INVALID_OBJECT;
3364
3365 /* wait for object (if any) to be ready */
3366 if (object != VM_OBJECT_NULL) {
3367 if (object == kernel_object) {
3368 printf("Warning: Attempt to map kernel object"
3369 " by a non-private kernel entity\n");
3370 return KERN_INVALID_OBJECT;
3371 }
b0d623f7 3372 if (!object->pager_ready) {
2d21ac55 3373 vm_object_lock(object);
b0d623f7
A
3374
3375 while (!object->pager_ready) {
3376 vm_object_wait(object,
3377 VM_OBJECT_EVENT_PAGER_READY,
3378 THREAD_UNINT);
3379 vm_object_lock(object);
3380 }
3381 vm_object_unlock(object);
2d21ac55 3382 }
2d21ac55
A
3383 }
3384 } else {
3385 return KERN_INVALID_OBJECT;
3386 }
3387
593a1d5f
A
3388 if (object != VM_OBJECT_NULL &&
3389 object->named &&
3390 object->pager != MEMORY_OBJECT_NULL &&
3391 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3392 memory_object_t pager;
3393 vm_prot_t pager_prot;
3394 kern_return_t kr;
3395
3396 /*
3397 * For "named" VM objects, let the pager know that the
3398 * memory object is being mapped. Some pagers need to keep
3399 * track of this, to know when they can reclaim the memory
3400 * object, for example.
3401 * VM calls memory_object_map() for each mapping (specifying
3402 * the protection of each mapping) and calls
3403 * memory_object_last_unmap() when all the mappings are gone.
3404 */
3405 pager_prot = max_protection;
3406 if (copy) {
3407 /*
3408 * Copy-On-Write mapping: won't modify the
3409 * memory object.
3410 */
3411 pager_prot &= ~VM_PROT_WRITE;
3412 }
3413 vm_object_lock(object);
3414 pager = object->pager;
3415 if (object->named &&
3416 pager != MEMORY_OBJECT_NULL &&
3417 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3418 assert(object->pager_ready);
3419 vm_object_mapping_wait(object, THREAD_UNINT);
3420 vm_object_mapping_begin(object);
3421 vm_object_unlock(object);
3422
3423 kr = memory_object_map(pager, pager_prot);
3424 assert(kr == KERN_SUCCESS);
3425
3426 vm_object_lock(object);
3427 vm_object_mapping_end(object);
3428 }
3429 vm_object_unlock(object);
3430 }
3431
2d21ac55
A
3432 /*
3433 * Perform the copy if requested
3434 */
3435
3436 if (copy) {
3437 vm_object_t new_object;
3438 vm_object_offset_t new_offset;
3439
3e170ce0
A
3440 result = vm_object_copy_strategically(object, offset,
3441 map_size,
2d21ac55
A
3442 &new_object, &new_offset,
3443 &copy);
3444
3445
3446 if (result == KERN_MEMORY_RESTART_COPY) {
3447 boolean_t success;
3448 boolean_t src_needs_copy;
3449
3450 /*
3451 * XXX
3452 * We currently ignore src_needs_copy.
3453 * This really is the issue of how to make
3454 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3455 * non-kernel users to use. Solution forthcoming.
3456 * In the meantime, since we don't allow non-kernel
3457 * memory managers to specify symmetric copy,
3458 * we won't run into problems here.
3459 */
3460 new_object = object;
3461 new_offset = offset;
3462 success = vm_object_copy_quickly(&new_object,
3e170ce0
A
3463 new_offset,
3464 map_size,
2d21ac55
A
3465 &src_needs_copy,
3466 &copy);
3467 assert(success);
3468 result = KERN_SUCCESS;
3469 }
3470 /*
3471 * Throw away the reference to the
3472 * original object, as it won't be mapped.
3473 */
3474
3475 vm_object_deallocate(object);
3476
3e170ce0 3477 if (result != KERN_SUCCESS) {
2d21ac55 3478 return result;
3e170ce0 3479 }
2d21ac55
A
3480
3481 object = new_object;
3482 offset = new_offset;
3483 }
3484
fe8ab488
A
3485 /*
3486 * If users want to try to prefault pages, the mapping and prefault
3487 * needs to be atomic.
3488 */
3489 if (try_prefault)
3490 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3e170ce0
A
3491
3492 {
3493 result = vm_map_enter(target_map,
3494 &map_addr, map_size,
3495 (vm_map_offset_t)mask,
3496 flags,
3497 object, offset,
3498 copy,
3499 cur_protection, max_protection,
3500 inheritance);
3501 }
2d21ac55
A
3502 if (result != KERN_SUCCESS)
3503 vm_object_deallocate(object);
39236c6e 3504
fe8ab488
A
3505 /*
3506 * Try to prefault, and do not forget to release the vm map lock.
3507 */
3508 if (result == KERN_SUCCESS && try_prefault) {
3509 mach_vm_address_t va = map_addr;
3510 kern_return_t kr = KERN_SUCCESS;
3511 unsigned int i = 0;
39037602
A
3512 int pmap_options;
3513
3514 pmap_options = PMAP_OPTIONS_NOWAIT;
3515 if (object->internal) {
3516 pmap_options |= PMAP_OPTIONS_INTERNAL;
3517 }
fe8ab488
A
3518
3519 for (i = 0; i < page_list_count; ++i) {
3520 if (UPL_VALID_PAGE(page_list, i)) {
3521 /*
3522 * If this function call failed, we should stop
3523 * trying to optimize, other calls are likely
3524 * going to fail too.
3525 *
3526 * We are not gonna report an error for such
3527 * failure though. That's an optimization, not
3528 * something critical.
3529 */
3530 kr = pmap_enter_options(target_map->pmap,
3531 va, UPL_PHYS_PAGE(page_list, i),
3532 cur_protection, VM_PROT_NONE,
39037602 3533 0, TRUE, pmap_options, NULL);
fe8ab488
A
3534 if (kr != KERN_SUCCESS) {
3535 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3e170ce0 3536 break;
fe8ab488
A
3537 }
3538 OSIncrementAtomic64(&vm_prefault_nb_pages);
3539 }
3540
3541 /* Next virtual address */
3542 va += PAGE_SIZE;
3543 }
fe8ab488
A
3544 vm_map_unlock(target_map);
3545 }
3546
3e170ce0
A
3547 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3548 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3549 *address = map_addr + offset_in_mapping;
3550 } else {
3551 *address = map_addr;
3552 }
2d21ac55
A
3553 return result;
3554}
3555
fe8ab488
A
3556kern_return_t
3557vm_map_enter_mem_object(
3558 vm_map_t target_map,
3559 vm_map_offset_t *address,
3560 vm_map_size_t initial_size,
3561 vm_map_offset_t mask,
3562 int flags,
3563 ipc_port_t port,
3564 vm_object_offset_t offset,
3565 boolean_t copy,
3566 vm_prot_t cur_protection,
3567 vm_prot_t max_protection,
3568 vm_inherit_t inheritance)
3569{
3570 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3571 port, offset, copy, cur_protection, max_protection,
3572 inheritance, NULL, 0);
3573}
b0d623f7 3574
fe8ab488
A
3575kern_return_t
3576vm_map_enter_mem_object_prefault(
3577 vm_map_t target_map,
3578 vm_map_offset_t *address,
3579 vm_map_size_t initial_size,
3580 vm_map_offset_t mask,
3581 int flags,
3582 ipc_port_t port,
3583 vm_object_offset_t offset,
3584 vm_prot_t cur_protection,
3585 vm_prot_t max_protection,
3586 upl_page_list_ptr_t page_list,
3587 unsigned int page_list_count)
3588{
3589 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3590 port, offset, FALSE, cur_protection, max_protection,
3591 VM_INHERIT_DEFAULT, page_list, page_list_count);
3592}
b0d623f7
A
3593
3594
3595kern_return_t
3596vm_map_enter_mem_object_control(
3597 vm_map_t target_map,
3598 vm_map_offset_t *address,
3599 vm_map_size_t initial_size,
3600 vm_map_offset_t mask,
3601 int flags,
3602 memory_object_control_t control,
3603 vm_object_offset_t offset,
3604 boolean_t copy,
3605 vm_prot_t cur_protection,
3606 vm_prot_t max_protection,
3607 vm_inherit_t inheritance)
3608{
3609 vm_map_address_t map_addr;
3610 vm_map_size_t map_size;
3611 vm_object_t object;
3612 vm_object_size_t size;
3613 kern_return_t result;
3614 memory_object_t pager;
3615 vm_prot_t pager_prot;
3616 kern_return_t kr;
3617
3618 /*
3619 * Check arguments for validity
3620 */
3621 if ((target_map == VM_MAP_NULL) ||
3622 (cur_protection & ~VM_PROT_ALL) ||
3623 (max_protection & ~VM_PROT_ALL) ||
3624 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 3625 initial_size == 0) {
b0d623f7 3626 return KERN_INVALID_ARGUMENT;
3e170ce0 3627 }
b0d623f7 3628
3e170ce0
A
3629 {
3630 map_addr = vm_map_trunc_page(*address,
3631 VM_MAP_PAGE_MASK(target_map));
3632 map_size = vm_map_round_page(initial_size,
3633 VM_MAP_PAGE_MASK(target_map));
3634 }
3635 size = vm_object_round_page(initial_size);
b0d623f7
A
3636
3637 object = memory_object_control_to_vm_object(control);
3638
3639 if (object == VM_OBJECT_NULL)
3640 return KERN_INVALID_OBJECT;
3641
3642 if (object == kernel_object) {
3643 printf("Warning: Attempt to map kernel object"
3644 " by a non-private kernel entity\n");
3645 return KERN_INVALID_OBJECT;
3646 }
3647
3648 vm_object_lock(object);
3649 object->ref_count++;
3650 vm_object_res_reference(object);
3651
3652 /*
3653 * For "named" VM objects, let the pager know that the
3654 * memory object is being mapped. Some pagers need to keep
3655 * track of this, to know when they can reclaim the memory
3656 * object, for example.
3657 * VM calls memory_object_map() for each mapping (specifying
3658 * the protection of each mapping) and calls
3659 * memory_object_last_unmap() when all the mappings are gone.
3660 */
3661 pager_prot = max_protection;
3662 if (copy) {
3663 pager_prot &= ~VM_PROT_WRITE;
3664 }
3665 pager = object->pager;
3666 if (object->named &&
3667 pager != MEMORY_OBJECT_NULL &&
3668 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3669 assert(object->pager_ready);
3670 vm_object_mapping_wait(object, THREAD_UNINT);
3671 vm_object_mapping_begin(object);
3672 vm_object_unlock(object);
3673
3674 kr = memory_object_map(pager, pager_prot);
3675 assert(kr == KERN_SUCCESS);
3676
3677 vm_object_lock(object);
3678 vm_object_mapping_end(object);
3679 }
3680 vm_object_unlock(object);
3681
3682 /*
3683 * Perform the copy if requested
3684 */
3685
3686 if (copy) {
3687 vm_object_t new_object;
3688 vm_object_offset_t new_offset;
3689
3690 result = vm_object_copy_strategically(object, offset, size,
3691 &new_object, &new_offset,
3692 &copy);
3693
3694
3695 if (result == KERN_MEMORY_RESTART_COPY) {
3696 boolean_t success;
3697 boolean_t src_needs_copy;
3698
3699 /*
3700 * XXX
3701 * We currently ignore src_needs_copy.
3702 * This really is the issue of how to make
3703 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3704 * non-kernel users to use. Solution forthcoming.
3705 * In the meantime, since we don't allow non-kernel
3706 * memory managers to specify symmetric copy,
3707 * we won't run into problems here.
3708 */
3709 new_object = object;
3710 new_offset = offset;
3711 success = vm_object_copy_quickly(&new_object,
3712 new_offset, size,
3713 &src_needs_copy,
3714 &copy);
3715 assert(success);
3716 result = KERN_SUCCESS;
3717 }
3718 /*
3719 * Throw away the reference to the
3720 * original object, as it won't be mapped.
3721 */
3722
3723 vm_object_deallocate(object);
3724
3e170ce0 3725 if (result != KERN_SUCCESS) {
b0d623f7 3726 return result;
3e170ce0 3727 }
b0d623f7
A
3728
3729 object = new_object;
3730 offset = new_offset;
3731 }
3732
3e170ce0
A
3733 {
3734 result = vm_map_enter(target_map,
3735 &map_addr, map_size,
3736 (vm_map_offset_t)mask,
3737 flags,
3738 object, offset,
3739 copy,
3740 cur_protection, max_protection,
3741 inheritance);
3742 }
b0d623f7
A
3743 if (result != KERN_SUCCESS)
3744 vm_object_deallocate(object);
3745 *address = map_addr;
3746
3747 return result;
3748}
3749
3750
2d21ac55
A
3751#if VM_CPM
3752
3753#ifdef MACH_ASSERT
3754extern pmap_paddr_t avail_start, avail_end;
3755#endif
3756
3757/*
3758 * Allocate memory in the specified map, with the caveat that
3759 * the memory is physically contiguous. This call may fail
3760 * if the system can't find sufficient contiguous memory.
3761 * This call may cause or lead to heart-stopping amounts of
3762 * paging activity.
3763 *
3764 * Memory obtained from this call should be freed in the
3765 * normal way, viz., via vm_deallocate.
3766 */
3767kern_return_t
3768vm_map_enter_cpm(
3769 vm_map_t map,
3770 vm_map_offset_t *addr,
3771 vm_map_size_t size,
3772 int flags)
3773{
3774 vm_object_t cpm_obj;
3775 pmap_t pmap;
3776 vm_page_t m, pages;
3777 kern_return_t kr;
3778 vm_map_offset_t va, start, end, offset;
3779#if MACH_ASSERT
316670eb 3780 vm_map_offset_t prev_addr = 0;
2d21ac55
A
3781#endif /* MACH_ASSERT */
3782
3783 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
3784 vm_tag_t tag;
3785
3786 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 3787
2d21ac55
A
3788 if (size == 0) {
3789 *addr = 0;
3790 return KERN_SUCCESS;
3791 }
3792 if (anywhere)
3793 *addr = vm_map_min(map);
3794 else
39236c6e
A
3795 *addr = vm_map_trunc_page(*addr,
3796 VM_MAP_PAGE_MASK(map));
3797 size = vm_map_round_page(size,
3798 VM_MAP_PAGE_MASK(map));
2d21ac55
A
3799
3800 /*
3801 * LP64todo - cpm_allocate should probably allow
3802 * allocations of >4GB, but not with the current
3803 * algorithm, so just cast down the size for now.
3804 */
3805 if (size > VM_MAX_ADDRESS)
3806 return KERN_RESOURCE_SHORTAGE;
3807 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 3808 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
3809 return kr;
3810
3811 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3812 assert(cpm_obj != VM_OBJECT_NULL);
3813 assert(cpm_obj->internal);
316670eb 3814 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
3815 assert(cpm_obj->can_persist == FALSE);
3816 assert(cpm_obj->pager_created == FALSE);
3817 assert(cpm_obj->pageout == FALSE);
3818 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
3819
3820 /*
3821 * Insert pages into object.
3822 */
3823
3824 vm_object_lock(cpm_obj);
3825 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3826 m = pages;
3827 pages = NEXT_PAGE(m);
0c530ab8 3828 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
3829
3830 assert(!m->gobbled);
3831 assert(!m->wanted);
3832 assert(!m->pageout);
3833 assert(!m->tabled);
b0d623f7 3834 assert(VM_PAGE_WIRED(m));
91447636
A
3835 /*
3836 * ENCRYPTED SWAP:
3837 * "m" is not supposed to be pageable, so it
3838 * should not be encrypted. It wouldn't be safe
3839 * to enter it in a new VM object while encrypted.
3840 */
3841 ASSERT_PAGE_DECRYPTED(m);
3842 assert(m->busy);
39037602 3843 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
91447636
A
3844
3845 m->busy = FALSE;
3846 vm_page_insert(m, cpm_obj, offset);
3847 }
3848 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3849 vm_object_unlock(cpm_obj);
3850
3851 /*
3852 * Hang onto a reference on the object in case a
3853 * multi-threaded application for some reason decides
3854 * to deallocate the portion of the address space into
3855 * which we will insert this object.
3856 *
3857 * Unfortunately, we must insert the object now before
3858 * we can talk to the pmap module about which addresses
3859 * must be wired down. Hence, the race with a multi-
3860 * threaded app.
3861 */
3862 vm_object_reference(cpm_obj);
3863
3864 /*
3865 * Insert object into map.
3866 */
3867
3868 kr = vm_map_enter(
2d21ac55
A
3869 map,
3870 addr,
3871 size,
3872 (vm_map_offset_t)0,
3873 flags,
3874 cpm_obj,
3875 (vm_object_offset_t)0,
3876 FALSE,
3877 VM_PROT_ALL,
3878 VM_PROT_ALL,
3879 VM_INHERIT_DEFAULT);
91447636
A
3880
3881 if (kr != KERN_SUCCESS) {
3882 /*
3883 * A CPM object doesn't have can_persist set,
3884 * so all we have to do is deallocate it to
3885 * free up these pages.
3886 */
3887 assert(cpm_obj->pager_created == FALSE);
3888 assert(cpm_obj->can_persist == FALSE);
3889 assert(cpm_obj->pageout == FALSE);
3890 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3891 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3892 vm_object_deallocate(cpm_obj); /* kill creation ref */
3893 }
3894
3895 /*
3896 * Inform the physical mapping system that the
3897 * range of addresses may not fault, so that
3898 * page tables and such can be locked down as well.
3899 */
3900 start = *addr;
3901 end = start + size;
3902 pmap = vm_map_pmap(map);
3903 pmap_pageable(pmap, start, end, FALSE);
3904
3905 /*
3906 * Enter each page into the pmap, to avoid faults.
3907 * Note that this loop could be coded more efficiently,
3908 * if the need arose, rather than looking up each page
3909 * again.
3910 */
3911 for (offset = 0, va = start; offset < size;
3912 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
3913 int type_of_fault;
3914
91447636
A
3915 vm_object_lock(cpm_obj);
3916 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 3917 assert(m != VM_PAGE_NULL);
2d21ac55
A
3918
3919 vm_page_zero_fill(m);
3920
3921 type_of_fault = DBG_ZERO_FILL_FAULT;
3922
6d2010ae 3923 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
fe8ab488 3924 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
2d21ac55
A
3925 &type_of_fault);
3926
3927 vm_object_unlock(cpm_obj);
91447636
A
3928 }
3929
3930#if MACH_ASSERT
3931 /*
3932 * Verify ordering in address space.
3933 */
3934 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3935 vm_object_lock(cpm_obj);
3936 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3937 vm_object_unlock(cpm_obj);
3938 if (m == VM_PAGE_NULL)
316670eb
A
3939 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3940 cpm_obj, (uint64_t)offset);
91447636
A
3941 assert(m->tabled);
3942 assert(!m->busy);
3943 assert(!m->wanted);
3944 assert(!m->fictitious);
3945 assert(!m->private);
3946 assert(!m->absent);
3947 assert(!m->error);
3948 assert(!m->cleaning);
316670eb 3949 assert(!m->laundry);
91447636
A
3950 assert(!m->precious);
3951 assert(!m->clustered);
3952 if (offset != 0) {
39037602 3953 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb
A
3954 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3955 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3956 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3957 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
3958 panic("vm_allocate_cpm: pages not contig!");
3959 }
3960 }
39037602 3961 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636
A
3962 }
3963#endif /* MACH_ASSERT */
3964
3965 vm_object_deallocate(cpm_obj); /* kill extra ref */
3966
3967 return kr;
3968}
3969
3970
3971#else /* VM_CPM */
3972
3973/*
3974 * Interface is defined in all cases, but unless the kernel
3975 * is built explicitly for this option, the interface does
3976 * nothing.
3977 */
3978
3979kern_return_t
3980vm_map_enter_cpm(
3981 __unused vm_map_t map,
3982 __unused vm_map_offset_t *addr,
3983 __unused vm_map_size_t size,
3984 __unused int flags)
3985{
3986 return KERN_FAILURE;
3987}
3988#endif /* VM_CPM */
3989
b0d623f7
A
3990/* Not used without nested pmaps */
3991#ifndef NO_NESTED_PMAP
2d21ac55
A
3992/*
3993 * Clip and unnest a portion of a nested submap mapping.
3994 */
b0d623f7
A
3995
3996
2d21ac55
A
3997static void
3998vm_map_clip_unnest(
3999 vm_map_t map,
4000 vm_map_entry_t entry,
4001 vm_map_offset_t start_unnest,
4002 vm_map_offset_t end_unnest)
4003{
b0d623f7
A
4004 vm_map_offset_t old_start_unnest = start_unnest;
4005 vm_map_offset_t old_end_unnest = end_unnest;
4006
2d21ac55 4007 assert(entry->is_sub_map);
3e170ce0 4008 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 4009 assert(entry->use_pmap);
2d21ac55 4010
b0d623f7
A
4011 /*
4012 * Query the platform for the optimal unnest range.
4013 * DRK: There's some duplication of effort here, since
4014 * callers may have adjusted the range to some extent. This
4015 * routine was introduced to support 1GiB subtree nesting
4016 * for x86 platforms, which can also nest on 2MiB boundaries
4017 * depending on size/alignment.
4018 */
4019 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
4020 assert(VME_SUBMAP(entry)->is_nested_map);
4021 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4022 log_unnest_badness(map,
4023 old_start_unnest,
4024 old_end_unnest,
4025 VME_SUBMAP(entry)->is_nested_map,
4026 (entry->vme_start +
4027 VME_SUBMAP(entry)->lowest_unnestable_start -
4028 VME_OFFSET(entry)));
b0d623f7
A
4029 }
4030
2d21ac55
A
4031 if (entry->vme_start > start_unnest ||
4032 entry->vme_end < end_unnest) {
4033 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4034 "bad nested entry: start=0x%llx end=0x%llx\n",
4035 (long long)start_unnest, (long long)end_unnest,
4036 (long long)entry->vme_start, (long long)entry->vme_end);
4037 }
b0d623f7 4038
2d21ac55
A
4039 if (start_unnest > entry->vme_start) {
4040 _vm_map_clip_start(&map->hdr,
4041 entry,
4042 start_unnest);
3e170ce0
A
4043 if (map->holelistenabled) {
4044 vm_map_store_update_first_free(map, NULL, FALSE);
4045 } else {
4046 vm_map_store_update_first_free(map, map->first_free, FALSE);
4047 }
2d21ac55
A
4048 }
4049 if (entry->vme_end > end_unnest) {
4050 _vm_map_clip_end(&map->hdr,
4051 entry,
4052 end_unnest);
3e170ce0
A
4053 if (map->holelistenabled) {
4054 vm_map_store_update_first_free(map, NULL, FALSE);
4055 } else {
4056 vm_map_store_update_first_free(map, map->first_free, FALSE);
4057 }
2d21ac55
A
4058 }
4059
4060 pmap_unnest(map->pmap,
4061 entry->vme_start,
4062 entry->vme_end - entry->vme_start);
316670eb 4063 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
2d21ac55
A
4064 /* clean up parent map/maps */
4065 vm_map_submap_pmap_clean(
4066 map, entry->vme_start,
4067 entry->vme_end,
3e170ce0
A
4068 VME_SUBMAP(entry),
4069 VME_OFFSET(entry));
2d21ac55
A
4070 }
4071 entry->use_pmap = FALSE;
3e170ce0
A
4072 if ((map->pmap != kernel_pmap) &&
4073 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4074 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 4075 }
2d21ac55 4076}
b0d623f7 4077#endif /* NO_NESTED_PMAP */
2d21ac55 4078
1c79356b
A
4079/*
4080 * vm_map_clip_start: [ internal use only ]
4081 *
4082 * Asserts that the given entry begins at or after
4083 * the specified address; if necessary,
4084 * it splits the entry into two.
4085 */
e2d2fc5c 4086void
2d21ac55
A
4087vm_map_clip_start(
4088 vm_map_t map,
4089 vm_map_entry_t entry,
4090 vm_map_offset_t startaddr)
4091{
0c530ab8 4092#ifndef NO_NESTED_PMAP
fe8ab488
A
4093 if (entry->is_sub_map &&
4094 entry->use_pmap &&
2d21ac55
A
4095 startaddr >= entry->vme_start) {
4096 vm_map_offset_t start_unnest, end_unnest;
4097
4098 /*
4099 * Make sure "startaddr" is no longer in a nested range
4100 * before we clip. Unnest only the minimum range the platform
4101 * can handle.
b0d623f7
A
4102 * vm_map_clip_unnest may perform additional adjustments to
4103 * the unnest range.
2d21ac55
A
4104 */
4105 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4106 end_unnest = start_unnest + pmap_nesting_size_min;
4107 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4108 }
4109#endif /* NO_NESTED_PMAP */
4110 if (startaddr > entry->vme_start) {
3e170ce0 4111 if (VME_OBJECT(entry) &&
2d21ac55 4112 !entry->is_sub_map &&
3e170ce0 4113 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4114 pmap_remove(map->pmap,
4115 (addr64_t)(entry->vme_start),
4116 (addr64_t)(entry->vme_end));
4117 }
39037602
A
4118 if (entry->vme_atomic) {
4119 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4120 }
2d21ac55 4121 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
4122 if (map->holelistenabled) {
4123 vm_map_store_update_first_free(map, NULL, FALSE);
4124 } else {
4125 vm_map_store_update_first_free(map, map->first_free, FALSE);
4126 }
2d21ac55
A
4127 }
4128}
4129
1c79356b
A
4130
4131#define vm_map_copy_clip_start(copy, entry, startaddr) \
4132 MACRO_BEGIN \
4133 if ((startaddr) > (entry)->vme_start) \
4134 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4135 MACRO_END
4136
4137/*
4138 * This routine is called only when it is known that
4139 * the entry must be split.
4140 */
91447636 4141static void
1c79356b 4142_vm_map_clip_start(
39037602
A
4143 struct vm_map_header *map_header,
4144 vm_map_entry_t entry,
4145 vm_map_offset_t start)
1c79356b 4146{
39037602 4147 vm_map_entry_t new_entry;
1c79356b
A
4148
4149 /*
4150 * Split off the front portion --
4151 * note that we must insert the new
4152 * entry BEFORE this one, so that
4153 * this entry has the specified starting
4154 * address.
4155 */
4156
fe8ab488
A
4157 if (entry->map_aligned) {
4158 assert(VM_MAP_PAGE_ALIGNED(start,
4159 VM_MAP_HDR_PAGE_MASK(map_header)));
4160 }
4161
7ddcb079 4162 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4163 vm_map_entry_copy_full(new_entry, entry);
4164
4165 new_entry->vme_end = start;
e2d2fc5c 4166 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 4167 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 4168 assert(start < entry->vme_end);
1c79356b
A
4169 entry->vme_start = start;
4170
6d2010ae 4171 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
4172
4173 if (entry->is_sub_map)
3e170ce0 4174 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4175 else
3e170ce0 4176 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4177}
4178
4179
4180/*
4181 * vm_map_clip_end: [ internal use only ]
4182 *
4183 * Asserts that the given entry ends at or before
4184 * the specified address; if necessary,
4185 * it splits the entry into two.
4186 */
e2d2fc5c 4187void
2d21ac55
A
4188vm_map_clip_end(
4189 vm_map_t map,
4190 vm_map_entry_t entry,
4191 vm_map_offset_t endaddr)
4192{
4193 if (endaddr > entry->vme_end) {
4194 /*
4195 * Within the scope of this clipping, limit "endaddr" to
4196 * the end of this map entry...
4197 */
4198 endaddr = entry->vme_end;
4199 }
4200#ifndef NO_NESTED_PMAP
fe8ab488 4201 if (entry->is_sub_map && entry->use_pmap) {
2d21ac55
A
4202 vm_map_offset_t start_unnest, end_unnest;
4203
4204 /*
4205 * Make sure the range between the start of this entry and
4206 * the new "endaddr" is no longer nested before we clip.
4207 * Unnest only the minimum range the platform can handle.
b0d623f7
A
4208 * vm_map_clip_unnest may perform additional adjustments to
4209 * the unnest range.
2d21ac55
A
4210 */
4211 start_unnest = entry->vme_start;
4212 end_unnest =
4213 (endaddr + pmap_nesting_size_min - 1) &
4214 ~(pmap_nesting_size_min - 1);
4215 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4216 }
4217#endif /* NO_NESTED_PMAP */
4218 if (endaddr < entry->vme_end) {
3e170ce0 4219 if (VME_OBJECT(entry) &&
2d21ac55 4220 !entry->is_sub_map &&
3e170ce0 4221 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4222 pmap_remove(map->pmap,
4223 (addr64_t)(entry->vme_start),
4224 (addr64_t)(entry->vme_end));
4225 }
39037602
A
4226 if (entry->vme_atomic) {
4227 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4228 }
2d21ac55 4229 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
4230 if (map->holelistenabled) {
4231 vm_map_store_update_first_free(map, NULL, FALSE);
4232 } else {
4233 vm_map_store_update_first_free(map, map->first_free, FALSE);
4234 }
2d21ac55
A
4235 }
4236}
0c530ab8 4237
1c79356b
A
4238
4239#define vm_map_copy_clip_end(copy, entry, endaddr) \
4240 MACRO_BEGIN \
4241 if ((endaddr) < (entry)->vme_end) \
4242 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4243 MACRO_END
4244
4245/*
4246 * This routine is called only when it is known that
4247 * the entry must be split.
4248 */
91447636 4249static void
1c79356b 4250_vm_map_clip_end(
39037602
A
4251 struct vm_map_header *map_header,
4252 vm_map_entry_t entry,
4253 vm_map_offset_t end)
1c79356b 4254{
39037602 4255 vm_map_entry_t new_entry;
1c79356b
A
4256
4257 /*
4258 * Create a new entry and insert it
4259 * AFTER the specified entry
4260 */
4261
fe8ab488
A
4262 if (entry->map_aligned) {
4263 assert(VM_MAP_PAGE_ALIGNED(end,
4264 VM_MAP_HDR_PAGE_MASK(map_header)));
4265 }
4266
7ddcb079 4267 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4268 vm_map_entry_copy_full(new_entry, entry);
4269
e2d2fc5c 4270 assert(entry->vme_start < end);
1c79356b 4271 new_entry->vme_start = entry->vme_end = end;
3e170ce0
A
4272 VME_OFFSET_SET(new_entry,
4273 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 4274 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 4275
6d2010ae 4276 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
4277
4278 if (entry->is_sub_map)
3e170ce0 4279 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4280 else
3e170ce0 4281 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4282}
4283
4284
4285/*
4286 * VM_MAP_RANGE_CHECK: [ internal use only ]
4287 *
4288 * Asserts that the starting and ending region
4289 * addresses fall within the valid range of the map.
4290 */
2d21ac55
A
4291#define VM_MAP_RANGE_CHECK(map, start, end) \
4292 MACRO_BEGIN \
4293 if (start < vm_map_min(map)) \
4294 start = vm_map_min(map); \
4295 if (end > vm_map_max(map)) \
4296 end = vm_map_max(map); \
4297 if (start > end) \
4298 start = end; \
4299 MACRO_END
1c79356b
A
4300
4301/*
4302 * vm_map_range_check: [ internal use only ]
4303 *
4304 * Check that the region defined by the specified start and
4305 * end addresses are wholly contained within a single map
4306 * entry or set of adjacent map entries of the spacified map,
4307 * i.e. the specified region contains no unmapped space.
4308 * If any or all of the region is unmapped, FALSE is returned.
4309 * Otherwise, TRUE is returned and if the output argument 'entry'
4310 * is not NULL it points to the map entry containing the start
4311 * of the region.
4312 *
4313 * The map is locked for reading on entry and is left locked.
4314 */
91447636 4315static boolean_t
1c79356b 4316vm_map_range_check(
39037602
A
4317 vm_map_t map,
4318 vm_map_offset_t start,
4319 vm_map_offset_t end,
1c79356b
A
4320 vm_map_entry_t *entry)
4321{
4322 vm_map_entry_t cur;
39037602 4323 vm_map_offset_t prev;
1c79356b
A
4324
4325 /*
4326 * Basic sanity checks first
4327 */
4328 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4329 return (FALSE);
4330
4331 /*
4332 * Check first if the region starts within a valid
4333 * mapping for the map.
4334 */
4335 if (!vm_map_lookup_entry(map, start, &cur))
4336 return (FALSE);
4337
4338 /*
4339 * Optimize for the case that the region is contained
4340 * in a single map entry.
4341 */
4342 if (entry != (vm_map_entry_t *) NULL)
4343 *entry = cur;
4344 if (end <= cur->vme_end)
4345 return (TRUE);
4346
4347 /*
4348 * If the region is not wholly contained within a
4349 * single entry, walk the entries looking for holes.
4350 */
4351 prev = cur->vme_end;
4352 cur = cur->vme_next;
4353 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4354 if (end <= cur->vme_end)
4355 return (TRUE);
4356 prev = cur->vme_end;
4357 cur = cur->vme_next;
4358 }
4359 return (FALSE);
4360}
4361
4362/*
4363 * vm_map_submap: [ kernel use only ]
4364 *
4365 * Mark the given range as handled by a subordinate map.
4366 *
4367 * This range must have been created with vm_map_find using
4368 * the vm_submap_object, and no other operations may have been
4369 * performed on this range prior to calling vm_map_submap.
4370 *
4371 * Only a limited number of operations can be performed
4372 * within this rage after calling vm_map_submap:
4373 * vm_fault
4374 * [Don't try vm_map_copyin!]
4375 *
4376 * To remove a submapping, one must first remove the
4377 * range from the superior map, and then destroy the
4378 * submap (if desired). [Better yet, don't try it.]
4379 */
4380kern_return_t
4381vm_map_submap(
fe8ab488 4382 vm_map_t map,
91447636
A
4383 vm_map_offset_t start,
4384 vm_map_offset_t end,
fe8ab488 4385 vm_map_t submap,
91447636 4386 vm_map_offset_t offset,
0c530ab8 4387#ifdef NO_NESTED_PMAP
91447636 4388 __unused
0c530ab8 4389#endif /* NO_NESTED_PMAP */
fe8ab488 4390 boolean_t use_pmap)
1c79356b
A
4391{
4392 vm_map_entry_t entry;
39037602
A
4393 kern_return_t result = KERN_INVALID_ARGUMENT;
4394 vm_object_t object;
1c79356b
A
4395
4396 vm_map_lock(map);
4397
2d21ac55 4398 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 4399 entry = entry->vme_next;
2d21ac55 4400 }
1c79356b 4401
2d21ac55
A
4402 if (entry == vm_map_to_entry(map) ||
4403 entry->is_sub_map) {
1c79356b
A
4404 vm_map_unlock(map);
4405 return KERN_INVALID_ARGUMENT;
4406 }
4407
2d21ac55 4408 vm_map_clip_start(map, entry, start);
1c79356b
A
4409 vm_map_clip_end(map, entry, end);
4410
4411 if ((entry->vme_start == start) && (entry->vme_end == end) &&
4412 (!entry->is_sub_map) &&
3e170ce0 4413 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
4414 (object->resident_page_count == 0) &&
4415 (object->copy == VM_OBJECT_NULL) &&
4416 (object->shadow == VM_OBJECT_NULL) &&
4417 (!object->pager_created)) {
3e170ce0
A
4418 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4419 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
4420 vm_object_deallocate(object);
4421 entry->is_sub_map = TRUE;
fe8ab488 4422 entry->use_pmap = FALSE;
3e170ce0 4423 VME_SUBMAP_SET(entry, submap);
2d21ac55 4424 vm_map_reference(submap);
316670eb
A
4425 if (submap->mapped_in_other_pmaps == FALSE &&
4426 vm_map_pmap(submap) != PMAP_NULL &&
4427 vm_map_pmap(submap) != vm_map_pmap(map)) {
4428 /*
4429 * This submap is being mapped in a map
4430 * that uses a different pmap.
4431 * Set its "mapped_in_other_pmaps" flag
4432 * to indicate that we now need to
4433 * remove mappings from all pmaps rather
4434 * than just the submap's pmap.
4435 */
4436 submap->mapped_in_other_pmaps = TRUE;
4437 }
2d21ac55 4438
0c530ab8 4439#ifndef NO_NESTED_PMAP
2d21ac55
A
4440 if (use_pmap) {
4441 /* nest if platform code will allow */
4442 if(submap->pmap == NULL) {
316670eb
A
4443 ledger_t ledger = map->pmap->ledger;
4444 submap->pmap = pmap_create(ledger,
4445 (vm_map_size_t) 0, FALSE);
2d21ac55
A
4446 if(submap->pmap == PMAP_NULL) {
4447 vm_map_unlock(map);
4448 return(KERN_NO_SPACE);
55e303ae 4449 }
55e303ae 4450 }
2d21ac55 4451 result = pmap_nest(map->pmap,
3e170ce0 4452 (VME_SUBMAP(entry))->pmap,
2d21ac55
A
4453 (addr64_t)start,
4454 (addr64_t)start,
4455 (uint64_t)(end - start));
4456 if(result)
4457 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4458 entry->use_pmap = TRUE;
4459 }
0c530ab8 4460#else /* NO_NESTED_PMAP */
2d21ac55 4461 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 4462#endif /* NO_NESTED_PMAP */
2d21ac55 4463 result = KERN_SUCCESS;
1c79356b
A
4464 }
4465 vm_map_unlock(map);
4466
4467 return(result);
4468}
4469
490019cf 4470
1c79356b
A
4471/*
4472 * vm_map_protect:
4473 *
4474 * Sets the protection of the specified address
4475 * region in the target map. If "set_max" is
4476 * specified, the maximum protection is to be set;
4477 * otherwise, only the current protection is affected.
4478 */
4479kern_return_t
4480vm_map_protect(
39037602
A
4481 vm_map_t map,
4482 vm_map_offset_t start,
4483 vm_map_offset_t end,
4484 vm_prot_t new_prot,
4485 boolean_t set_max)
4486{
4487 vm_map_entry_t current;
4488 vm_map_offset_t prev;
1c79356b
A
4489 vm_map_entry_t entry;
4490 vm_prot_t new_max;
1c79356b
A
4491
4492 XPR(XPR_VM_MAP,
2d21ac55 4493 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 4494 map, start, end, new_prot, set_max);
1c79356b
A
4495
4496 vm_map_lock(map);
4497
91447636
A
4498 /* LP64todo - remove this check when vm_map_commpage64()
4499 * no longer has to stuff in a map_entry for the commpage
4500 * above the map's max_offset.
4501 */
4502 if (start >= map->max_offset) {
4503 vm_map_unlock(map);
4504 return(KERN_INVALID_ADDRESS);
4505 }
4506
b0d623f7
A
4507 while(1) {
4508 /*
4509 * Lookup the entry. If it doesn't start in a valid
4510 * entry, return an error.
4511 */
4512 if (! vm_map_lookup_entry(map, start, &entry)) {
4513 vm_map_unlock(map);
4514 return(KERN_INVALID_ADDRESS);
4515 }
4516
4517 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4518 start = SUPERPAGE_ROUND_DOWN(start);
4519 continue;
4520 }
4521 break;
4522 }
4523 if (entry->superpage_size)
4524 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
4525
4526 /*
4527 * Make a first pass to check for protection and address
4528 * violations.
4529 */
4530
4531 current = entry;
4532 prev = current->vme_start;
4533 while ((current != vm_map_to_entry(map)) &&
4534 (current->vme_start < end)) {
4535
4536 /*
4537 * If there is a hole, return an error.
4538 */
4539 if (current->vme_start != prev) {
4540 vm_map_unlock(map);
4541 return(KERN_INVALID_ADDRESS);
4542 }
4543
4544 new_max = current->max_protection;
4545 if(new_prot & VM_PROT_COPY) {
4546 new_max |= VM_PROT_WRITE;
4547 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4548 vm_map_unlock(map);
4549 return(KERN_PROTECTION_FAILURE);
4550 }
4551 } else {
4552 if ((new_prot & new_max) != new_prot) {
4553 vm_map_unlock(map);
4554 return(KERN_PROTECTION_FAILURE);
4555 }
4556 }
4557
593a1d5f 4558
1c79356b
A
4559 prev = current->vme_end;
4560 current = current->vme_next;
4561 }
39037602
A
4562
4563
1c79356b
A
4564 if (end > prev) {
4565 vm_map_unlock(map);
4566 return(KERN_INVALID_ADDRESS);
4567 }
4568
4569 /*
4570 * Go back and fix up protections.
4571 * Clip to start here if the range starts within
4572 * the entry.
4573 */
4574
4575 current = entry;
2d21ac55
A
4576 if (current != vm_map_to_entry(map)) {
4577 /* clip and unnest if necessary */
4578 vm_map_clip_start(map, current, start);
1c79356b 4579 }
2d21ac55 4580
1c79356b
A
4581 while ((current != vm_map_to_entry(map)) &&
4582 (current->vme_start < end)) {
4583
4584 vm_prot_t old_prot;
4585
4586 vm_map_clip_end(map, current, end);
4587
fe8ab488
A
4588 if (current->is_sub_map) {
4589 /* clipping did unnest if needed */
4590 assert(!current->use_pmap);
4591 }
2d21ac55 4592
1c79356b
A
4593 old_prot = current->protection;
4594
4595 if(new_prot & VM_PROT_COPY) {
4596 /* caller is asking specifically to copy the */
4597 /* mapped data, this implies that max protection */
4598 /* will include write. Caller must be prepared */
4599 /* for loss of shared memory communication in the */
4600 /* target area after taking this step */
6d2010ae 4601
3e170ce0
A
4602 if (current->is_sub_map == FALSE &&
4603 VME_OBJECT(current) == VM_OBJECT_NULL) {
4604 VME_OBJECT_SET(current,
4605 vm_object_allocate(
4606 (vm_map_size_t)
4607 (current->vme_end -
4608 current->vme_start)));
4609 VME_OFFSET_SET(current, 0);
fe8ab488 4610 assert(current->use_pmap);
6d2010ae 4611 }
3e170ce0 4612 assert(current->wired_count == 0);
1c79356b
A
4613 current->needs_copy = TRUE;
4614 current->max_protection |= VM_PROT_WRITE;
4615 }
4616
4617 if (set_max)
4618 current->protection =
4619 (current->max_protection =
2d21ac55
A
4620 new_prot & ~VM_PROT_COPY) &
4621 old_prot;
1c79356b
A
4622 else
4623 current->protection = new_prot & ~VM_PROT_COPY;
4624
4625 /*
4626 * Update physical map if necessary.
4627 * If the request is to turn off write protection,
4628 * we won't do it for real (in pmap). This is because
4629 * it would cause copy-on-write to fail. We've already
4630 * set, the new protection in the map, so if a
4631 * write-protect fault occurred, it will be fixed up
4632 * properly, COW or not.
4633 */
1c79356b 4634 if (current->protection != old_prot) {
1c79356b
A
4635 /* Look one level in we support nested pmaps */
4636 /* from mapped submaps which are direct entries */
4637 /* in our map */
0c530ab8 4638
2d21ac55 4639 vm_prot_t prot;
0c530ab8 4640
39037602
A
4641 prot = current->protection;
4642 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
4643 prot &= ~VM_PROT_WRITE;
4644 } else {
4645 assert(!VME_OBJECT(current)->code_signed);
4646 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4647 }
2d21ac55 4648
3e170ce0 4649 if (override_nx(map, VME_ALIAS(current)) && prot)
0c530ab8 4650 prot |= VM_PROT_EXECUTE;
2d21ac55 4651
490019cf 4652
0c530ab8 4653 if (current->is_sub_map && current->use_pmap) {
3e170ce0 4654 pmap_protect(VME_SUBMAP(current)->pmap,
2d21ac55
A
4655 current->vme_start,
4656 current->vme_end,
4657 prot);
1c79356b 4658 } else {
2d21ac55
A
4659 pmap_protect(map->pmap,
4660 current->vme_start,
4661 current->vme_end,
4662 prot);
1c79356b 4663 }
1c79356b
A
4664 }
4665 current = current->vme_next;
4666 }
4667
5353443c 4668 current = entry;
91447636
A
4669 while ((current != vm_map_to_entry(map)) &&
4670 (current->vme_start <= end)) {
5353443c
A
4671 vm_map_simplify_entry(map, current);
4672 current = current->vme_next;
4673 }
4674
1c79356b
A
4675 vm_map_unlock(map);
4676 return(KERN_SUCCESS);
4677}
4678
4679/*
4680 * vm_map_inherit:
4681 *
4682 * Sets the inheritance of the specified address
4683 * range in the target map. Inheritance
4684 * affects how the map will be shared with
4685 * child maps at the time of vm_map_fork.
4686 */
4687kern_return_t
4688vm_map_inherit(
39037602
A
4689 vm_map_t map,
4690 vm_map_offset_t start,
4691 vm_map_offset_t end,
4692 vm_inherit_t new_inheritance)
1c79356b 4693{
39037602 4694 vm_map_entry_t entry;
1c79356b
A
4695 vm_map_entry_t temp_entry;
4696
4697 vm_map_lock(map);
4698
4699 VM_MAP_RANGE_CHECK(map, start, end);
4700
4701 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4702 entry = temp_entry;
1c79356b
A
4703 }
4704 else {
4705 temp_entry = temp_entry->vme_next;
4706 entry = temp_entry;
4707 }
4708
4709 /* first check entire range for submaps which can't support the */
4710 /* given inheritance. */
4711 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4712 if(entry->is_sub_map) {
91447636
A
4713 if(new_inheritance == VM_INHERIT_COPY) {
4714 vm_map_unlock(map);
1c79356b 4715 return(KERN_INVALID_ARGUMENT);
91447636 4716 }
1c79356b
A
4717 }
4718
4719 entry = entry->vme_next;
4720 }
4721
4722 entry = temp_entry;
2d21ac55
A
4723 if (entry != vm_map_to_entry(map)) {
4724 /* clip and unnest if necessary */
4725 vm_map_clip_start(map, entry, start);
4726 }
1c79356b
A
4727
4728 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4729 vm_map_clip_end(map, entry, end);
fe8ab488
A
4730 if (entry->is_sub_map) {
4731 /* clip did unnest if needed */
4732 assert(!entry->use_pmap);
4733 }
1c79356b
A
4734
4735 entry->inheritance = new_inheritance;
4736
4737 entry = entry->vme_next;
4738 }
4739
4740 vm_map_unlock(map);
4741 return(KERN_SUCCESS);
4742}
4743
2d21ac55
A
4744/*
4745 * Update the accounting for the amount of wired memory in this map. If the user has
4746 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4747 */
4748
4749static kern_return_t
4750add_wire_counts(
4751 vm_map_t map,
4752 vm_map_entry_t entry,
4753 boolean_t user_wire)
4754{
4755 vm_map_size_t size;
4756
4757 if (user_wire) {
6d2010ae 4758 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
4759
4760 /*
4761 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4762 * this map entry.
4763 */
4764
4765 if (entry->user_wired_count == 0) {
4766 size = entry->vme_end - entry->vme_start;
4767
4768 /*
4769 * Since this is the first time the user is wiring this map entry, check to see if we're
4770 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4771 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4772 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4773 * limit, then we fail.
4774 */
4775
4776 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
4777 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4778 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
4779 return KERN_RESOURCE_SHORTAGE;
4780
4781 /*
4782 * The first time the user wires an entry, we also increment the wired_count and add this to
4783 * the total that has been wired in the map.
4784 */
4785
4786 if (entry->wired_count >= MAX_WIRE_COUNT)
4787 return KERN_FAILURE;
4788
4789 entry->wired_count++;
4790 map->user_wire_size += size;
4791 }
4792
4793 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4794 return KERN_FAILURE;
4795
4796 entry->user_wired_count++;
4797
4798 } else {
4799
4800 /*
4801 * The kernel's wiring the memory. Just bump the count and continue.
4802 */
4803
4804 if (entry->wired_count >= MAX_WIRE_COUNT)
4805 panic("vm_map_wire: too many wirings");
4806
4807 entry->wired_count++;
4808 }
4809
4810 return KERN_SUCCESS;
4811}
4812
4813/*
4814 * Update the memory wiring accounting now that the given map entry is being unwired.
4815 */
4816
4817static void
4818subtract_wire_counts(
4819 vm_map_t map,
4820 vm_map_entry_t entry,
4821 boolean_t user_wire)
4822{
4823
4824 if (user_wire) {
4825
4826 /*
4827 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4828 */
4829
4830 if (entry->user_wired_count == 1) {
4831
4832 /*
4833 * We're removing the last user wire reference. Decrement the wired_count and the total
4834 * user wired memory for this map.
4835 */
4836
4837 assert(entry->wired_count >= 1);
4838 entry->wired_count--;
4839 map->user_wire_size -= entry->vme_end - entry->vme_start;
4840 }
4841
4842 assert(entry->user_wired_count >= 1);
4843 entry->user_wired_count--;
4844
4845 } else {
4846
4847 /*
4848 * The kernel is unwiring the memory. Just update the count.
4849 */
4850
4851 assert(entry->wired_count >= 1);
4852 entry->wired_count--;
4853 }
4854}
4855
39037602 4856
1c79356b
A
4857/*
4858 * vm_map_wire:
4859 *
4860 * Sets the pageability of the specified address range in the
4861 * target map as wired. Regions specified as not pageable require
4862 * locked-down physical memory and physical page maps. The
4863 * access_type variable indicates types of accesses that must not
4864 * generate page faults. This is checked against protection of
4865 * memory being locked-down.
4866 *
4867 * The map must not be locked, but a reference must remain to the
4868 * map throughout the call.
4869 */
91447636 4870static kern_return_t
1c79356b 4871vm_map_wire_nested(
39037602
A
4872 vm_map_t map,
4873 vm_map_offset_t start,
4874 vm_map_offset_t end,
4875 vm_prot_t caller_prot,
1c79356b 4876 boolean_t user_wire,
9bccf70c 4877 pmap_t map_pmap,
fe8ab488
A
4878 vm_map_offset_t pmap_addr,
4879 ppnum_t *physpage_p)
1c79356b 4880{
39037602
A
4881 vm_map_entry_t entry;
4882 vm_prot_t access_type;
1c79356b 4883 struct vm_map_entry *first_entry, tmp_entry;
91447636 4884 vm_map_t real_map;
39037602 4885 vm_map_offset_t s,e;
1c79356b
A
4886 kern_return_t rc;
4887 boolean_t need_wakeup;
4888 boolean_t main_map = FALSE;
9bccf70c 4889 wait_interrupt_t interruptible_state;
0b4e3aa0 4890 thread_t cur_thread;
1c79356b 4891 unsigned int last_timestamp;
91447636 4892 vm_map_size_t size;
fe8ab488
A
4893 boolean_t wire_and_extract;
4894
3e170ce0
A
4895 access_type = (caller_prot & VM_PROT_ALL);
4896
fe8ab488
A
4897 wire_and_extract = FALSE;
4898 if (physpage_p != NULL) {
4899 /*
4900 * The caller wants the physical page number of the
4901 * wired page. We return only one physical page number
4902 * so this works for only one page at a time.
4903 */
4904 if ((end - start) != PAGE_SIZE) {
4905 return KERN_INVALID_ARGUMENT;
4906 }
4907 wire_and_extract = TRUE;
4908 *physpage_p = 0;
4909 }
1c79356b
A
4910
4911 vm_map_lock(map);
4912 if(map_pmap == NULL)
4913 main_map = TRUE;
4914 last_timestamp = map->timestamp;
4915
4916 VM_MAP_RANGE_CHECK(map, start, end);
4917 assert(page_aligned(start));
4918 assert(page_aligned(end));
39236c6e
A
4919 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4920 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
4921 if (start == end) {
4922 /* We wired what the caller asked for, zero pages */
4923 vm_map_unlock(map);
4924 return KERN_SUCCESS;
4925 }
1c79356b 4926
2d21ac55
A
4927 need_wakeup = FALSE;
4928 cur_thread = current_thread();
4929
4930 s = start;
4931 rc = KERN_SUCCESS;
4932
4933 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 4934 entry = first_entry;
2d21ac55
A
4935 /*
4936 * vm_map_clip_start will be done later.
4937 * We don't want to unnest any nested submaps here !
4938 */
1c79356b
A
4939 } else {
4940 /* Start address is not in map */
2d21ac55
A
4941 rc = KERN_INVALID_ADDRESS;
4942 goto done;
1c79356b
A
4943 }
4944
2d21ac55
A
4945 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4946 /*
4947 * At this point, we have wired from "start" to "s".
4948 * We still need to wire from "s" to "end".
4949 *
4950 * "entry" hasn't been clipped, so it could start before "s"
4951 * and/or end after "end".
4952 */
4953
4954 /* "e" is how far we want to wire in this entry */
4955 e = entry->vme_end;
4956 if (e > end)
4957 e = end;
4958
1c79356b
A
4959 /*
4960 * If another thread is wiring/unwiring this entry then
4961 * block after informing other thread to wake us up.
4962 */
4963 if (entry->in_transition) {
9bccf70c
A
4964 wait_result_t wait_result;
4965
1c79356b
A
4966 /*
4967 * We have not clipped the entry. Make sure that
4968 * the start address is in range so that the lookup
4969 * below will succeed.
2d21ac55
A
4970 * "s" is the current starting point: we've already
4971 * wired from "start" to "s" and we still have
4972 * to wire from "s" to "end".
1c79356b 4973 */
1c79356b
A
4974
4975 entry->needs_wakeup = TRUE;
4976
4977 /*
4978 * wake up anybody waiting on entries that we have
4979 * already wired.
4980 */
4981 if (need_wakeup) {
4982 vm_map_entry_wakeup(map);
4983 need_wakeup = FALSE;
4984 }
4985 /*
4986 * User wiring is interruptible
4987 */
9bccf70c 4988 wait_result = vm_map_entry_wait(map,
2d21ac55
A
4989 (user_wire) ? THREAD_ABORTSAFE :
4990 THREAD_UNINT);
9bccf70c 4991 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
4992 /*
4993 * undo the wirings we have done so far
4994 * We do not clear the needs_wakeup flag,
4995 * because we cannot tell if we were the
4996 * only one waiting.
4997 */
2d21ac55
A
4998 rc = KERN_FAILURE;
4999 goto done;
1c79356b
A
5000 }
5001
1c79356b
A
5002 /*
5003 * Cannot avoid a lookup here. reset timestamp.
5004 */
5005 last_timestamp = map->timestamp;
5006
5007 /*
5008 * The entry could have been clipped, look it up again.
5009 * Worse that can happen is, it may not exist anymore.
5010 */
5011 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
5012 /*
5013 * User: undo everything upto the previous
5014 * entry. let vm_map_unwire worry about
5015 * checking the validity of the range.
5016 */
2d21ac55
A
5017 rc = KERN_FAILURE;
5018 goto done;
1c79356b
A
5019 }
5020 entry = first_entry;
5021 continue;
5022 }
2d21ac55
A
5023
5024 if (entry->is_sub_map) {
91447636
A
5025 vm_map_offset_t sub_start;
5026 vm_map_offset_t sub_end;
5027 vm_map_offset_t local_start;
5028 vm_map_offset_t local_end;
1c79356b 5029 pmap_t pmap;
2d21ac55 5030
fe8ab488
A
5031 if (wire_and_extract) {
5032 /*
5033 * Wiring would result in copy-on-write
5034 * which would not be compatible with
5035 * the sharing we have with the original
5036 * provider of this memory.
5037 */
5038 rc = KERN_INVALID_ARGUMENT;
5039 goto done;
5040 }
5041
2d21ac55 5042 vm_map_clip_start(map, entry, s);
1c79356b
A
5043 vm_map_clip_end(map, entry, end);
5044
3e170ce0 5045 sub_start = VME_OFFSET(entry);
2d21ac55 5046 sub_end = entry->vme_end;
3e170ce0 5047 sub_end += VME_OFFSET(entry) - entry->vme_start;
2d21ac55 5048
1c79356b
A
5049 local_end = entry->vme_end;
5050 if(map_pmap == NULL) {
2d21ac55
A
5051 vm_object_t object;
5052 vm_object_offset_t offset;
5053 vm_prot_t prot;
5054 boolean_t wired;
5055 vm_map_entry_t local_entry;
5056 vm_map_version_t version;
5057 vm_map_t lookup_map;
5058
1c79356b 5059 if(entry->use_pmap) {
3e170ce0 5060 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
5061 /* ppc implementation requires that */
5062 /* submaps pmap address ranges line */
5063 /* up with parent map */
5064#ifdef notdef
5065 pmap_addr = sub_start;
5066#endif
2d21ac55 5067 pmap_addr = s;
1c79356b
A
5068 } else {
5069 pmap = map->pmap;
2d21ac55 5070 pmap_addr = s;
1c79356b 5071 }
2d21ac55 5072
1c79356b 5073 if (entry->wired_count) {
2d21ac55
A
5074 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5075 goto done;
5076
5077 /*
5078 * The map was not unlocked:
5079 * no need to goto re-lookup.
5080 * Just go directly to next entry.
5081 */
1c79356b 5082 entry = entry->vme_next;
2d21ac55 5083 s = entry->vme_start;
1c79356b
A
5084 continue;
5085
2d21ac55 5086 }
9bccf70c 5087
2d21ac55
A
5088 /* call vm_map_lookup_locked to */
5089 /* cause any needs copy to be */
5090 /* evaluated */
5091 local_start = entry->vme_start;
5092 lookup_map = map;
5093 vm_map_lock_write_to_read(map);
5094 if(vm_map_lookup_locked(
5095 &lookup_map, local_start,
39037602 5096 access_type | VM_PROT_COPY,
2d21ac55
A
5097 OBJECT_LOCK_EXCLUSIVE,
5098 &version, &object,
5099 &offset, &prot, &wired,
5100 NULL,
5101 &real_map)) {
1c79356b 5102
2d21ac55 5103 vm_map_unlock_read(lookup_map);
4bd07ac2 5104 assert(map_pmap == NULL);
2d21ac55
A
5105 vm_map_unwire(map, start,
5106 s, user_wire);
5107 return(KERN_FAILURE);
5108 }
316670eb 5109 vm_object_unlock(object);
2d21ac55
A
5110 if(real_map != lookup_map)
5111 vm_map_unlock(real_map);
5112 vm_map_unlock_read(lookup_map);
5113 vm_map_lock(map);
1c79356b 5114
2d21ac55
A
5115 /* we unlocked, so must re-lookup */
5116 if (!vm_map_lookup_entry(map,
5117 local_start,
5118 &local_entry)) {
5119 rc = KERN_FAILURE;
5120 goto done;
5121 }
5122
5123 /*
5124 * entry could have been "simplified",
5125 * so re-clip
5126 */
5127 entry = local_entry;
5128 assert(s == local_start);
5129 vm_map_clip_start(map, entry, s);
5130 vm_map_clip_end(map, entry, end);
5131 /* re-compute "e" */
5132 e = entry->vme_end;
5133 if (e > end)
5134 e = end;
5135
5136 /* did we have a change of type? */
5137 if (!entry->is_sub_map) {
5138 last_timestamp = map->timestamp;
5139 continue;
1c79356b
A
5140 }
5141 } else {
9bccf70c 5142 local_start = entry->vme_start;
2d21ac55
A
5143 pmap = map_pmap;
5144 }
5145
5146 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5147 goto done;
5148
5149 entry->in_transition = TRUE;
5150
5151 vm_map_unlock(map);
3e170ce0 5152 rc = vm_map_wire_nested(VME_SUBMAP(entry),
1c79356b 5153 sub_start, sub_end,
3e170ce0 5154 caller_prot,
fe8ab488
A
5155 user_wire, pmap, pmap_addr,
5156 NULL);
2d21ac55 5157 vm_map_lock(map);
9bccf70c 5158
1c79356b
A
5159 /*
5160 * Find the entry again. It could have been clipped
5161 * after we unlocked the map.
5162 */
9bccf70c
A
5163 if (!vm_map_lookup_entry(map, local_start,
5164 &first_entry))
5165 panic("vm_map_wire: re-lookup failed");
5166 entry = first_entry;
1c79356b 5167
2d21ac55
A
5168 assert(local_start == s);
5169 /* re-compute "e" */
5170 e = entry->vme_end;
5171 if (e > end)
5172 e = end;
5173
1c79356b
A
5174 last_timestamp = map->timestamp;
5175 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 5176 (entry->vme_start < e)) {
1c79356b
A
5177 assert(entry->in_transition);
5178 entry->in_transition = FALSE;
5179 if (entry->needs_wakeup) {
5180 entry->needs_wakeup = FALSE;
5181 need_wakeup = TRUE;
5182 }
5183 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 5184 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5185 }
5186 entry = entry->vme_next;
5187 }
5188 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5189 goto done;
1c79356b 5190 }
2d21ac55
A
5191
5192 /* no need to relookup again */
5193 s = entry->vme_start;
1c79356b
A
5194 continue;
5195 }
5196
5197 /*
5198 * If this entry is already wired then increment
5199 * the appropriate wire reference count.
5200 */
9bccf70c 5201 if (entry->wired_count) {
fe8ab488
A
5202
5203 if ((entry->protection & access_type) != access_type) {
5204 /* found a protection problem */
5205
5206 /*
5207 * XXX FBDP
5208 * We should always return an error
5209 * in this case but since we didn't
5210 * enforce it before, let's do
5211 * it only for the new "wire_and_extract"
5212 * code path for now...
5213 */
5214 if (wire_and_extract) {
5215 rc = KERN_PROTECTION_FAILURE;
5216 goto done;
5217 }
5218 }
5219
1c79356b
A
5220 /*
5221 * entry is already wired down, get our reference
5222 * after clipping to our range.
5223 */
2d21ac55 5224 vm_map_clip_start(map, entry, s);
1c79356b 5225 vm_map_clip_end(map, entry, end);
1c79356b 5226
2d21ac55
A
5227 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5228 goto done;
5229
fe8ab488
A
5230 if (wire_and_extract) {
5231 vm_object_t object;
5232 vm_object_offset_t offset;
5233 vm_page_t m;
5234
5235 /*
5236 * We don't have to "wire" the page again
5237 * bit we still have to "extract" its
5238 * physical page number, after some sanity
5239 * checks.
5240 */
5241 assert((entry->vme_end - entry->vme_start)
5242 == PAGE_SIZE);
5243 assert(!entry->needs_copy);
5244 assert(!entry->is_sub_map);
3e170ce0 5245 assert(VME_OBJECT(entry));
fe8ab488
A
5246 if (((entry->vme_end - entry->vme_start)
5247 != PAGE_SIZE) ||
5248 entry->needs_copy ||
5249 entry->is_sub_map ||
3e170ce0 5250 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5251 rc = KERN_INVALID_ARGUMENT;
5252 goto done;
5253 }
5254
3e170ce0
A
5255 object = VME_OBJECT(entry);
5256 offset = VME_OFFSET(entry);
fe8ab488
A
5257 /* need exclusive lock to update m->dirty */
5258 if (entry->protection & VM_PROT_WRITE) {
5259 vm_object_lock(object);
5260 } else {
5261 vm_object_lock_shared(object);
5262 }
5263 m = vm_page_lookup(object, offset);
5264 assert(m != VM_PAGE_NULL);
39037602
A
5265 assert(VM_PAGE_WIRED(m));
5266 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
5267 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
5268 if (entry->protection & VM_PROT_WRITE) {
5269 vm_object_lock_assert_exclusive(
39037602 5270 object);
fe8ab488
A
5271 m->dirty = TRUE;
5272 }
5273 } else {
5274 /* not already wired !? */
5275 *physpage_p = 0;
5276 }
5277 vm_object_unlock(object);
5278 }
5279
2d21ac55 5280 /* map was not unlocked: no need to relookup */
1c79356b 5281 entry = entry->vme_next;
2d21ac55 5282 s = entry->vme_start;
1c79356b
A
5283 continue;
5284 }
5285
5286 /*
5287 * Unwired entry or wire request transmitted via submap
5288 */
5289
5290
39037602 5291
1c79356b
A
5292 /*
5293 * Perform actions of vm_map_lookup that need the write
5294 * lock on the map: create a shadow object for a
5295 * copy-on-write region, or an object for a zero-fill
5296 * region.
5297 */
5298 size = entry->vme_end - entry->vme_start;
5299 /*
5300 * If wiring a copy-on-write page, we need to copy it now
5301 * even if we're only (currently) requesting read access.
5302 * This is aggressive, but once it's wired we can't move it.
5303 */
5304 if (entry->needs_copy) {
fe8ab488
A
5305 if (wire_and_extract) {
5306 /*
5307 * We're supposed to share with the original
5308 * provider so should not be "needs_copy"
5309 */
5310 rc = KERN_INVALID_ARGUMENT;
5311 goto done;
5312 }
3e170ce0
A
5313
5314 VME_OBJECT_SHADOW(entry, size);
1c79356b 5315 entry->needs_copy = FALSE;
3e170ce0 5316 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5317 if (wire_and_extract) {
5318 /*
5319 * We're supposed to share with the original
5320 * provider so should already have an object.
5321 */
5322 rc = KERN_INVALID_ARGUMENT;
5323 goto done;
5324 }
3e170ce0
A
5325 VME_OBJECT_SET(entry, vm_object_allocate(size));
5326 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 5327 assert(entry->use_pmap);
1c79356b
A
5328 }
5329
2d21ac55 5330 vm_map_clip_start(map, entry, s);
1c79356b
A
5331 vm_map_clip_end(map, entry, end);
5332
2d21ac55 5333 /* re-compute "e" */
1c79356b 5334 e = entry->vme_end;
2d21ac55
A
5335 if (e > end)
5336 e = end;
1c79356b
A
5337
5338 /*
5339 * Check for holes and protection mismatch.
5340 * Holes: Next entry should be contiguous unless this
5341 * is the end of the region.
5342 * Protection: Access requested must be allowed, unless
5343 * wiring is by protection class
5344 */
2d21ac55
A
5345 if ((entry->vme_end < end) &&
5346 ((entry->vme_next == vm_map_to_entry(map)) ||
5347 (entry->vme_next->vme_start > entry->vme_end))) {
5348 /* found a hole */
5349 rc = KERN_INVALID_ADDRESS;
5350 goto done;
5351 }
5352 if ((entry->protection & access_type) != access_type) {
5353 /* found a protection problem */
5354 rc = KERN_PROTECTION_FAILURE;
5355 goto done;
1c79356b
A
5356 }
5357
5358 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5359
2d21ac55
A
5360 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5361 goto done;
1c79356b
A
5362
5363 entry->in_transition = TRUE;
5364
5365 /*
5366 * This entry might get split once we unlock the map.
5367 * In vm_fault_wire(), we need the current range as
5368 * defined by this entry. In order for this to work
5369 * along with a simultaneous clip operation, we make a
5370 * temporary copy of this entry and use that for the
5371 * wiring. Note that the underlying objects do not
5372 * change during a clip.
5373 */
5374 tmp_entry = *entry;
5375
5376 /*
5377 * The in_transition state guarentees that the entry
5378 * (or entries for this range, if split occured) will be
5379 * there when the map lock is acquired for the second time.
5380 */
5381 vm_map_unlock(map);
0b4e3aa0 5382
9bccf70c
A
5383 if (!user_wire && cur_thread != THREAD_NULL)
5384 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
5385 else
5386 interruptible_state = THREAD_UNINT;
9bccf70c 5387
1c79356b 5388 if(map_pmap)
9bccf70c 5389 rc = vm_fault_wire(map,
3e170ce0 5390 &tmp_entry, caller_prot, map_pmap, pmap_addr,
fe8ab488 5391 physpage_p);
1c79356b 5392 else
9bccf70c 5393 rc = vm_fault_wire(map,
3e170ce0 5394 &tmp_entry, caller_prot, map->pmap,
fe8ab488
A
5395 tmp_entry.vme_start,
5396 physpage_p);
0b4e3aa0
A
5397
5398 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 5399 thread_interrupt_level(interruptible_state);
0b4e3aa0 5400
1c79356b
A
5401 vm_map_lock(map);
5402
5403 if (last_timestamp+1 != map->timestamp) {
5404 /*
5405 * Find the entry again. It could have been clipped
5406 * after we unlocked the map.
5407 */
5408 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5409 &first_entry))
1c79356b
A
5410 panic("vm_map_wire: re-lookup failed");
5411
5412 entry = first_entry;
5413 }
5414
5415 last_timestamp = map->timestamp;
5416
5417 while ((entry != vm_map_to_entry(map)) &&
5418 (entry->vme_start < tmp_entry.vme_end)) {
5419 assert(entry->in_transition);
5420 entry->in_transition = FALSE;
5421 if (entry->needs_wakeup) {
5422 entry->needs_wakeup = FALSE;
5423 need_wakeup = TRUE;
5424 }
5425 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5426 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5427 }
5428 entry = entry->vme_next;
5429 }
5430
5431 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5432 goto done;
1c79356b 5433 }
2d21ac55 5434
d190cdc3
A
5435 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
5436 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
5437 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
5438 /* found a "new" hole */
5439 s = tmp_entry.vme_end;
5440 rc = KERN_INVALID_ADDRESS;
5441 goto done;
5442 }
5443
2d21ac55 5444 s = entry->vme_start;
d190cdc3 5445
1c79356b 5446 } /* end while loop through map entries */
2d21ac55
A
5447
5448done:
5449 if (rc == KERN_SUCCESS) {
5450 /* repair any damage we may have made to the VM map */
5451 vm_map_simplify_range(map, start, end);
5452 }
5453
1c79356b
A
5454 vm_map_unlock(map);
5455
5456 /*
5457 * wake up anybody waiting on entries we wired.
5458 */
5459 if (need_wakeup)
5460 vm_map_entry_wakeup(map);
5461
2d21ac55
A
5462 if (rc != KERN_SUCCESS) {
5463 /* undo what has been wired so far */
4bd07ac2
A
5464 vm_map_unwire_nested(map, start, s, user_wire,
5465 map_pmap, pmap_addr);
fe8ab488
A
5466 if (physpage_p) {
5467 *physpage_p = 0;
5468 }
2d21ac55
A
5469 }
5470
5471 return rc;
1c79356b
A
5472
5473}
5474
5475kern_return_t
3e170ce0 5476vm_map_wire_external(
39037602
A
5477 vm_map_t map,
5478 vm_map_offset_t start,
5479 vm_map_offset_t end,
5480 vm_prot_t caller_prot,
1c79356b
A
5481 boolean_t user_wire)
5482{
3e170ce0
A
5483 kern_return_t kret;
5484
5485 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5486 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5487 kret = vm_map_wire_nested(map, start, end, caller_prot,
5488 user_wire, (pmap_t)NULL, 0, NULL);
5489 return kret;
5490}
1c79356b 5491
3e170ce0
A
5492kern_return_t
5493vm_map_wire(
39037602
A
5494 vm_map_t map,
5495 vm_map_offset_t start,
5496 vm_map_offset_t end,
5497 vm_prot_t caller_prot,
3e170ce0
A
5498 boolean_t user_wire)
5499{
1c79356b
A
5500 kern_return_t kret;
5501
3e170ce0 5502 kret = vm_map_wire_nested(map, start, end, caller_prot,
fe8ab488
A
5503 user_wire, (pmap_t)NULL, 0, NULL);
5504 return kret;
5505}
5506
5507kern_return_t
3e170ce0 5508vm_map_wire_and_extract_external(
fe8ab488
A
5509 vm_map_t map,
5510 vm_map_offset_t start,
3e170ce0 5511 vm_prot_t caller_prot,
fe8ab488
A
5512 boolean_t user_wire,
5513 ppnum_t *physpage_p)
5514{
3e170ce0
A
5515 kern_return_t kret;
5516
5517 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5518 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5519 kret = vm_map_wire_nested(map,
5520 start,
5521 start+VM_MAP_PAGE_SIZE(map),
5522 caller_prot,
5523 user_wire,
5524 (pmap_t)NULL,
5525 0,
5526 physpage_p);
5527 if (kret != KERN_SUCCESS &&
5528 physpage_p != NULL) {
5529 *physpage_p = 0;
5530 }
5531 return kret;
5532}
fe8ab488 5533
3e170ce0
A
5534kern_return_t
5535vm_map_wire_and_extract(
5536 vm_map_t map,
5537 vm_map_offset_t start,
5538 vm_prot_t caller_prot,
5539 boolean_t user_wire,
5540 ppnum_t *physpage_p)
5541{
fe8ab488
A
5542 kern_return_t kret;
5543
5544 kret = vm_map_wire_nested(map,
5545 start,
5546 start+VM_MAP_PAGE_SIZE(map),
3e170ce0 5547 caller_prot,
fe8ab488
A
5548 user_wire,
5549 (pmap_t)NULL,
5550 0,
5551 physpage_p);
5552 if (kret != KERN_SUCCESS &&
5553 physpage_p != NULL) {
5554 *physpage_p = 0;
5555 }
1c79356b
A
5556 return kret;
5557}
5558
5559/*
5560 * vm_map_unwire:
5561 *
5562 * Sets the pageability of the specified address range in the target
5563 * as pageable. Regions specified must have been wired previously.
5564 *
5565 * The map must not be locked, but a reference must remain to the map
5566 * throughout the call.
5567 *
5568 * Kernel will panic on failures. User unwire ignores holes and
5569 * unwired and intransition entries to avoid losing memory by leaving
5570 * it unwired.
5571 */
91447636 5572static kern_return_t
1c79356b 5573vm_map_unwire_nested(
39037602
A
5574 vm_map_t map,
5575 vm_map_offset_t start,
5576 vm_map_offset_t end,
1c79356b 5577 boolean_t user_wire,
9bccf70c 5578 pmap_t map_pmap,
91447636 5579 vm_map_offset_t pmap_addr)
1c79356b 5580{
39037602 5581 vm_map_entry_t entry;
1c79356b
A
5582 struct vm_map_entry *first_entry, tmp_entry;
5583 boolean_t need_wakeup;
5584 boolean_t main_map = FALSE;
5585 unsigned int last_timestamp;
5586
5587 vm_map_lock(map);
5588 if(map_pmap == NULL)
5589 main_map = TRUE;
5590 last_timestamp = map->timestamp;
5591
5592 VM_MAP_RANGE_CHECK(map, start, end);
5593 assert(page_aligned(start));
5594 assert(page_aligned(end));
39236c6e
A
5595 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5596 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 5597
2d21ac55
A
5598 if (start == end) {
5599 /* We unwired what the caller asked for: zero pages */
5600 vm_map_unlock(map);
5601 return KERN_SUCCESS;
5602 }
5603
1c79356b
A
5604 if (vm_map_lookup_entry(map, start, &first_entry)) {
5605 entry = first_entry;
2d21ac55
A
5606 /*
5607 * vm_map_clip_start will be done later.
5608 * We don't want to unnest any nested sub maps here !
5609 */
1c79356b
A
5610 }
5611 else {
2d21ac55
A
5612 if (!user_wire) {
5613 panic("vm_map_unwire: start not found");
5614 }
1c79356b
A
5615 /* Start address is not in map. */
5616 vm_map_unlock(map);
5617 return(KERN_INVALID_ADDRESS);
5618 }
5619
b0d623f7
A
5620 if (entry->superpage_size) {
5621 /* superpages are always wired */
5622 vm_map_unlock(map);
5623 return KERN_INVALID_ADDRESS;
5624 }
5625
1c79356b
A
5626 need_wakeup = FALSE;
5627 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5628 if (entry->in_transition) {
5629 /*
5630 * 1)
5631 * Another thread is wiring down this entry. Note
5632 * that if it is not for the other thread we would
5633 * be unwiring an unwired entry. This is not
5634 * permitted. If we wait, we will be unwiring memory
5635 * we did not wire.
5636 *
5637 * 2)
5638 * Another thread is unwiring this entry. We did not
5639 * have a reference to it, because if we did, this
5640 * entry will not be getting unwired now.
5641 */
2d21ac55
A
5642 if (!user_wire) {
5643 /*
5644 * XXX FBDP
5645 * This could happen: there could be some
5646 * overlapping vslock/vsunlock operations
5647 * going on.
5648 * We should probably just wait and retry,
5649 * but then we have to be careful that this
5650 * entry could get "simplified" after
5651 * "in_transition" gets unset and before
5652 * we re-lookup the entry, so we would
5653 * have to re-clip the entry to avoid
5654 * re-unwiring what we have already unwired...
5655 * See vm_map_wire_nested().
5656 *
5657 * Or we could just ignore "in_transition"
5658 * here and proceed to decement the wired
5659 * count(s) on this entry. That should be fine
5660 * as long as "wired_count" doesn't drop all
5661 * the way to 0 (and we should panic if THAT
5662 * happens).
5663 */
1c79356b 5664 panic("vm_map_unwire: in_transition entry");
2d21ac55 5665 }
1c79356b
A
5666
5667 entry = entry->vme_next;
5668 continue;
5669 }
5670
2d21ac55 5671 if (entry->is_sub_map) {
91447636
A
5672 vm_map_offset_t sub_start;
5673 vm_map_offset_t sub_end;
5674 vm_map_offset_t local_end;
1c79356b 5675 pmap_t pmap;
2d21ac55 5676
1c79356b
A
5677 vm_map_clip_start(map, entry, start);
5678 vm_map_clip_end(map, entry, end);
5679
3e170ce0 5680 sub_start = VME_OFFSET(entry);
1c79356b 5681 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 5682 sub_end += VME_OFFSET(entry);
1c79356b
A
5683 local_end = entry->vme_end;
5684 if(map_pmap == NULL) {
2d21ac55 5685 if(entry->use_pmap) {
3e170ce0 5686 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 5687 pmap_addr = sub_start;
2d21ac55 5688 } else {
1c79356b 5689 pmap = map->pmap;
9bccf70c 5690 pmap_addr = start;
2d21ac55
A
5691 }
5692 if (entry->wired_count == 0 ||
5693 (user_wire && entry->user_wired_count == 0)) {
5694 if (!user_wire)
5695 panic("vm_map_unwire: entry is unwired");
5696 entry = entry->vme_next;
5697 continue;
5698 }
5699
5700 /*
5701 * Check for holes
5702 * Holes: Next entry should be contiguous unless
5703 * this is the end of the region.
5704 */
5705 if (((entry->vme_end < end) &&
5706 ((entry->vme_next == vm_map_to_entry(map)) ||
5707 (entry->vme_next->vme_start
5708 > entry->vme_end)))) {
5709 if (!user_wire)
5710 panic("vm_map_unwire: non-contiguous region");
1c79356b 5711/*
2d21ac55
A
5712 entry = entry->vme_next;
5713 continue;
1c79356b 5714*/
2d21ac55 5715 }
1c79356b 5716
2d21ac55 5717 subtract_wire_counts(map, entry, user_wire);
1c79356b 5718
2d21ac55
A
5719 if (entry->wired_count != 0) {
5720 entry = entry->vme_next;
5721 continue;
5722 }
1c79356b 5723
2d21ac55
A
5724 entry->in_transition = TRUE;
5725 tmp_entry = *entry;/* see comment in vm_map_wire() */
5726
5727 /*
5728 * We can unlock the map now. The in_transition state
5729 * guarantees existance of the entry.
5730 */
5731 vm_map_unlock(map);
3e170ce0 5732 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5733 sub_start, sub_end, user_wire, pmap, pmap_addr);
5734 vm_map_lock(map);
1c79356b 5735
2d21ac55
A
5736 if (last_timestamp+1 != map->timestamp) {
5737 /*
5738 * Find the entry again. It could have been
5739 * clipped or deleted after we unlocked the map.
5740 */
5741 if (!vm_map_lookup_entry(map,
5742 tmp_entry.vme_start,
5743 &first_entry)) {
5744 if (!user_wire)
5745 panic("vm_map_unwire: re-lookup failed");
5746 entry = first_entry->vme_next;
5747 } else
5748 entry = first_entry;
5749 }
5750 last_timestamp = map->timestamp;
1c79356b 5751
1c79356b 5752 /*
2d21ac55
A
5753 * clear transition bit for all constituent entries
5754 * that were in the original entry (saved in
5755 * tmp_entry). Also check for waiters.
5756 */
5757 while ((entry != vm_map_to_entry(map)) &&
5758 (entry->vme_start < tmp_entry.vme_end)) {
5759 assert(entry->in_transition);
5760 entry->in_transition = FALSE;
5761 if (entry->needs_wakeup) {
5762 entry->needs_wakeup = FALSE;
5763 need_wakeup = TRUE;
5764 }
5765 entry = entry->vme_next;
1c79356b 5766 }
2d21ac55 5767 continue;
1c79356b 5768 } else {
2d21ac55 5769 vm_map_unlock(map);
3e170ce0 5770 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5771 sub_start, sub_end, user_wire, map_pmap,
5772 pmap_addr);
5773 vm_map_lock(map);
1c79356b 5774
2d21ac55
A
5775 if (last_timestamp+1 != map->timestamp) {
5776 /*
5777 * Find the entry again. It could have been
5778 * clipped or deleted after we unlocked the map.
5779 */
5780 if (!vm_map_lookup_entry(map,
5781 tmp_entry.vme_start,
5782 &first_entry)) {
5783 if (!user_wire)
5784 panic("vm_map_unwire: re-lookup failed");
5785 entry = first_entry->vme_next;
5786 } else
5787 entry = first_entry;
5788 }
5789 last_timestamp = map->timestamp;
1c79356b
A
5790 }
5791 }
5792
5793
9bccf70c 5794 if ((entry->wired_count == 0) ||
2d21ac55 5795 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
5796 if (!user_wire)
5797 panic("vm_map_unwire: entry is unwired");
5798
5799 entry = entry->vme_next;
5800 continue;
5801 }
2d21ac55 5802
1c79356b 5803 assert(entry->wired_count > 0 &&
2d21ac55 5804 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
5805
5806 vm_map_clip_start(map, entry, start);
5807 vm_map_clip_end(map, entry, end);
5808
5809 /*
5810 * Check for holes
5811 * Holes: Next entry should be contiguous unless
5812 * this is the end of the region.
5813 */
5814 if (((entry->vme_end < end) &&
2d21ac55
A
5815 ((entry->vme_next == vm_map_to_entry(map)) ||
5816 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
5817
5818 if (!user_wire)
5819 panic("vm_map_unwire: non-contiguous region");
5820 entry = entry->vme_next;
5821 continue;
5822 }
5823
2d21ac55 5824 subtract_wire_counts(map, entry, user_wire);
1c79356b 5825
9bccf70c 5826 if (entry->wired_count != 0) {
1c79356b
A
5827 entry = entry->vme_next;
5828 continue;
1c79356b
A
5829 }
5830
b0d623f7
A
5831 if(entry->zero_wired_pages) {
5832 entry->zero_wired_pages = FALSE;
5833 }
5834
1c79356b
A
5835 entry->in_transition = TRUE;
5836 tmp_entry = *entry; /* see comment in vm_map_wire() */
5837
5838 /*
5839 * We can unlock the map now. The in_transition state
5840 * guarantees existance of the entry.
5841 */
5842 vm_map_unlock(map);
5843 if(map_pmap) {
9bccf70c 5844 vm_fault_unwire(map,
2d21ac55 5845 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 5846 } else {
9bccf70c 5847 vm_fault_unwire(map,
2d21ac55
A
5848 &tmp_entry, FALSE, map->pmap,
5849 tmp_entry.vme_start);
1c79356b
A
5850 }
5851 vm_map_lock(map);
5852
5853 if (last_timestamp+1 != map->timestamp) {
5854 /*
5855 * Find the entry again. It could have been clipped
5856 * or deleted after we unlocked the map.
5857 */
5858 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5859 &first_entry)) {
1c79356b 5860 if (!user_wire)
2d21ac55 5861 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
5862 entry = first_entry->vme_next;
5863 } else
5864 entry = first_entry;
5865 }
5866 last_timestamp = map->timestamp;
5867
5868 /*
5869 * clear transition bit for all constituent entries that
5870 * were in the original entry (saved in tmp_entry). Also
5871 * check for waiters.
5872 */
5873 while ((entry != vm_map_to_entry(map)) &&
5874 (entry->vme_start < tmp_entry.vme_end)) {
5875 assert(entry->in_transition);
5876 entry->in_transition = FALSE;
5877 if (entry->needs_wakeup) {
5878 entry->needs_wakeup = FALSE;
5879 need_wakeup = TRUE;
5880 }
5881 entry = entry->vme_next;
5882 }
5883 }
91447636
A
5884
5885 /*
5886 * We might have fragmented the address space when we wired this
5887 * range of addresses. Attempt to re-coalesce these VM map entries
5888 * with their neighbors now that they're no longer wired.
5889 * Under some circumstances, address space fragmentation can
5890 * prevent VM object shadow chain collapsing, which can cause
5891 * swap space leaks.
5892 */
5893 vm_map_simplify_range(map, start, end);
5894
1c79356b
A
5895 vm_map_unlock(map);
5896 /*
5897 * wake up anybody waiting on entries that we have unwired.
5898 */
5899 if (need_wakeup)
5900 vm_map_entry_wakeup(map);
5901 return(KERN_SUCCESS);
5902
5903}
5904
5905kern_return_t
5906vm_map_unwire(
39037602
A
5907 vm_map_t map,
5908 vm_map_offset_t start,
5909 vm_map_offset_t end,
1c79356b
A
5910 boolean_t user_wire)
5911{
9bccf70c 5912 return vm_map_unwire_nested(map, start, end,
2d21ac55 5913 user_wire, (pmap_t)NULL, 0);
1c79356b
A
5914}
5915
5916
5917/*
5918 * vm_map_entry_delete: [ internal use only ]
5919 *
5920 * Deallocate the given entry from the target map.
5921 */
91447636 5922static void
1c79356b 5923vm_map_entry_delete(
39037602
A
5924 vm_map_t map,
5925 vm_map_entry_t entry)
1c79356b 5926{
39037602
A
5927 vm_map_offset_t s, e;
5928 vm_object_t object;
5929 vm_map_t submap;
1c79356b
A
5930
5931 s = entry->vme_start;
5932 e = entry->vme_end;
5933 assert(page_aligned(s));
5934 assert(page_aligned(e));
39236c6e
A
5935 if (entry->map_aligned == TRUE) {
5936 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5937 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5938 }
1c79356b
A
5939 assert(entry->wired_count == 0);
5940 assert(entry->user_wired_count == 0);
b0d623f7 5941 assert(!entry->permanent);
1c79356b
A
5942
5943 if (entry->is_sub_map) {
5944 object = NULL;
3e170ce0 5945 submap = VME_SUBMAP(entry);
1c79356b
A
5946 } else {
5947 submap = NULL;
3e170ce0 5948 object = VME_OBJECT(entry);
1c79356b
A
5949 }
5950
6d2010ae 5951 vm_map_store_entry_unlink(map, entry);
1c79356b
A
5952 map->size -= e - s;
5953
5954 vm_map_entry_dispose(map, entry);
5955
5956 vm_map_unlock(map);
5957 /*
5958 * Deallocate the object only after removing all
5959 * pmap entries pointing to its pages.
5960 */
5961 if (submap)
5962 vm_map_deallocate(submap);
5963 else
2d21ac55 5964 vm_object_deallocate(object);
1c79356b
A
5965
5966}
5967
5968void
5969vm_map_submap_pmap_clean(
5970 vm_map_t map,
91447636
A
5971 vm_map_offset_t start,
5972 vm_map_offset_t end,
1c79356b 5973 vm_map_t sub_map,
91447636 5974 vm_map_offset_t offset)
1c79356b 5975{
91447636
A
5976 vm_map_offset_t submap_start;
5977 vm_map_offset_t submap_end;
5978 vm_map_size_t remove_size;
1c79356b
A
5979 vm_map_entry_t entry;
5980
5981 submap_end = offset + (end - start);
5982 submap_start = offset;
b7266188
A
5983
5984 vm_map_lock_read(sub_map);
1c79356b 5985 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
2d21ac55 5986
1c79356b
A
5987 remove_size = (entry->vme_end - entry->vme_start);
5988 if(offset > entry->vme_start)
5989 remove_size -= offset - entry->vme_start;
2d21ac55 5990
1c79356b
A
5991
5992 if(submap_end < entry->vme_end) {
5993 remove_size -=
5994 entry->vme_end - submap_end;
5995 }
5996 if(entry->is_sub_map) {
5997 vm_map_submap_pmap_clean(
5998 sub_map,
5999 start,
6000 start + remove_size,
3e170ce0
A
6001 VME_SUBMAP(entry),
6002 VME_OFFSET(entry));
1c79356b 6003 } else {
9bccf70c 6004
316670eb 6005 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
6006 && (VME_OBJECT(entry) != NULL)) {
6007 vm_object_pmap_protect_options(
6008 VME_OBJECT(entry),
6009 (VME_OFFSET(entry) +
6010 offset -
6011 entry->vme_start),
9bccf70c
A
6012 remove_size,
6013 PMAP_NULL,
6014 entry->vme_start,
3e170ce0
A
6015 VM_PROT_NONE,
6016 PMAP_OPTIONS_REMOVE);
9bccf70c
A
6017 } else {
6018 pmap_remove(map->pmap,
2d21ac55
A
6019 (addr64_t)start,
6020 (addr64_t)(start + remove_size));
9bccf70c 6021 }
1c79356b
A
6022 }
6023 }
6024
6025 entry = entry->vme_next;
2d21ac55 6026
1c79356b 6027 while((entry != vm_map_to_entry(sub_map))
2d21ac55 6028 && (entry->vme_start < submap_end)) {
1c79356b
A
6029 remove_size = (entry->vme_end - entry->vme_start);
6030 if(submap_end < entry->vme_end) {
6031 remove_size -= entry->vme_end - submap_end;
6032 }
6033 if(entry->is_sub_map) {
6034 vm_map_submap_pmap_clean(
6035 sub_map,
6036 (start + entry->vme_start) - offset,
6037 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
6038 VME_SUBMAP(entry),
6039 VME_OFFSET(entry));
1c79356b 6040 } else {
316670eb 6041 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
6042 && (VME_OBJECT(entry) != NULL)) {
6043 vm_object_pmap_protect_options(
6044 VME_OBJECT(entry),
6045 VME_OFFSET(entry),
9bccf70c
A
6046 remove_size,
6047 PMAP_NULL,
6048 entry->vme_start,
3e170ce0
A
6049 VM_PROT_NONE,
6050 PMAP_OPTIONS_REMOVE);
9bccf70c
A
6051 } else {
6052 pmap_remove(map->pmap,
2d21ac55
A
6053 (addr64_t)((start + entry->vme_start)
6054 - offset),
6055 (addr64_t)(((start + entry->vme_start)
6056 - offset) + remove_size));
9bccf70c 6057 }
1c79356b
A
6058 }
6059 entry = entry->vme_next;
b7266188
A
6060 }
6061 vm_map_unlock_read(sub_map);
1c79356b
A
6062 return;
6063}
6064
6065/*
6066 * vm_map_delete: [ internal use only ]
6067 *
6068 * Deallocates the given address range from the target map.
6069 * Removes all user wirings. Unwires one kernel wiring if
6070 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
6071 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
6072 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
6073 *
6074 * This routine is called with map locked and leaves map locked.
6075 */
91447636 6076static kern_return_t
1c79356b 6077vm_map_delete(
91447636
A
6078 vm_map_t map,
6079 vm_map_offset_t start,
6080 vm_map_offset_t end,
6081 int flags,
6082 vm_map_t zap_map)
1c79356b
A
6083{
6084 vm_map_entry_t entry, next;
6085 struct vm_map_entry *first_entry, tmp_entry;
39037602
A
6086 vm_map_offset_t s;
6087 vm_object_t object;
1c79356b
A
6088 boolean_t need_wakeup;
6089 unsigned int last_timestamp = ~0; /* unlikely value */
6090 int interruptible;
1c79356b
A
6091
6092 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 6093 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
6094
6095 /*
6096 * All our DMA I/O operations in IOKit are currently done by
6097 * wiring through the map entries of the task requesting the I/O.
6098 * Because of this, we must always wait for kernel wirings
6099 * to go away on the entries before deleting them.
6100 *
6101 * Any caller who wants to actually remove a kernel wiring
6102 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6103 * properly remove one wiring instead of blasting through
6104 * them all.
6105 */
6106 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6107
b0d623f7
A
6108 while(1) {
6109 /*
6110 * Find the start of the region, and clip it
6111 */
6112 if (vm_map_lookup_entry(map, start, &first_entry)) {
6113 entry = first_entry;
fe8ab488
A
6114 if (map == kalloc_map &&
6115 (entry->vme_start != start ||
6116 entry->vme_end != end)) {
6117 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6118 "mismatched entry %p [0x%llx:0x%llx]\n",
6119 map,
6120 (uint64_t)start,
6121 (uint64_t)end,
6122 entry,
6123 (uint64_t)entry->vme_start,
6124 (uint64_t)entry->vme_end);
6125 }
b0d623f7
A
6126 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
6127 start = SUPERPAGE_ROUND_DOWN(start);
6128 continue;
6129 }
6130 if (start == entry->vme_start) {
6131 /*
6132 * No need to clip. We don't want to cause
6133 * any unnecessary unnesting in this case...
6134 */
6135 } else {
fe8ab488
A
6136 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6137 entry->map_aligned &&
6138 !VM_MAP_PAGE_ALIGNED(
6139 start,
6140 VM_MAP_PAGE_MASK(map))) {
6141 /*
6142 * The entry will no longer be
6143 * map-aligned after clipping
6144 * and the caller said it's OK.
6145 */
6146 entry->map_aligned = FALSE;
6147 }
6148 if (map == kalloc_map) {
6149 panic("vm_map_delete(%p,0x%llx,0x%llx):"
6150 " clipping %p at 0x%llx\n",
6151 map,
6152 (uint64_t)start,
6153 (uint64_t)end,
6154 entry,
6155 (uint64_t)start);
6156 }
b0d623f7
A
6157 vm_map_clip_start(map, entry, start);
6158 }
6159
2d21ac55 6160 /*
b0d623f7
A
6161 * Fix the lookup hint now, rather than each
6162 * time through the loop.
2d21ac55 6163 */
b0d623f7 6164 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 6165 } else {
fe8ab488
A
6166 if (map->pmap == kernel_pmap &&
6167 map->ref_count != 0) {
6168 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6169 "no map entry at 0x%llx\n",
6170 map,
6171 (uint64_t)start,
6172 (uint64_t)end,
6173 (uint64_t)start);
6174 }
b0d623f7 6175 entry = first_entry->vme_next;
2d21ac55 6176 }
b0d623f7 6177 break;
1c79356b 6178 }
b0d623f7
A
6179 if (entry->superpage_size)
6180 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
6181
6182 need_wakeup = FALSE;
6183 /*
6184 * Step through all entries in this region
6185 */
2d21ac55
A
6186 s = entry->vme_start;
6187 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6188 /*
6189 * At this point, we have deleted all the memory entries
6190 * between "start" and "s". We still need to delete
6191 * all memory entries between "s" and "end".
6192 * While we were blocked and the map was unlocked, some
6193 * new memory entries could have been re-allocated between
6194 * "start" and "s" and we don't want to mess with those.
6195 * Some of those entries could even have been re-assembled
6196 * with an entry after "s" (in vm_map_simplify_entry()), so
6197 * we may have to vm_map_clip_start() again.
6198 */
1c79356b 6199
2d21ac55
A
6200 if (entry->vme_start >= s) {
6201 /*
6202 * This entry starts on or after "s"
6203 * so no need to clip its start.
6204 */
6205 } else {
6206 /*
6207 * This entry has been re-assembled by a
6208 * vm_map_simplify_entry(). We need to
6209 * re-clip its start.
6210 */
fe8ab488
A
6211 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6212 entry->map_aligned &&
6213 !VM_MAP_PAGE_ALIGNED(s,
6214 VM_MAP_PAGE_MASK(map))) {
6215 /*
6216 * The entry will no longer be map-aligned
6217 * after clipping and the caller said it's OK.
6218 */
6219 entry->map_aligned = FALSE;
6220 }
6221 if (map == kalloc_map) {
6222 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6223 "clipping %p at 0x%llx\n",
6224 map,
6225 (uint64_t)start,
6226 (uint64_t)end,
6227 entry,
6228 (uint64_t)s);
6229 }
2d21ac55
A
6230 vm_map_clip_start(map, entry, s);
6231 }
6232 if (entry->vme_end <= end) {
6233 /*
6234 * This entry is going away completely, so no need
6235 * to clip and possibly cause an unnecessary unnesting.
6236 */
6237 } else {
fe8ab488
A
6238 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6239 entry->map_aligned &&
6240 !VM_MAP_PAGE_ALIGNED(end,
6241 VM_MAP_PAGE_MASK(map))) {
6242 /*
6243 * The entry will no longer be map-aligned
6244 * after clipping and the caller said it's OK.
6245 */
6246 entry->map_aligned = FALSE;
6247 }
6248 if (map == kalloc_map) {
6249 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6250 "clipping %p at 0x%llx\n",
6251 map,
6252 (uint64_t)start,
6253 (uint64_t)end,
6254 entry,
6255 (uint64_t)end);
6256 }
2d21ac55
A
6257 vm_map_clip_end(map, entry, end);
6258 }
b0d623f7
A
6259
6260 if (entry->permanent) {
6261 panic("attempt to remove permanent VM map entry "
6262 "%p [0x%llx:0x%llx]\n",
6263 entry, (uint64_t) s, (uint64_t) end);
6264 }
6265
6266
1c79356b 6267 if (entry->in_transition) {
9bccf70c
A
6268 wait_result_t wait_result;
6269
1c79356b
A
6270 /*
6271 * Another thread is wiring/unwiring this entry.
6272 * Let the other thread know we are waiting.
6273 */
2d21ac55 6274 assert(s == entry->vme_start);
1c79356b
A
6275 entry->needs_wakeup = TRUE;
6276
6277 /*
6278 * wake up anybody waiting on entries that we have
6279 * already unwired/deleted.
6280 */
6281 if (need_wakeup) {
6282 vm_map_entry_wakeup(map);
6283 need_wakeup = FALSE;
6284 }
6285
9bccf70c 6286 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
6287
6288 if (interruptible &&
9bccf70c 6289 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6290 /*
6291 * We do not clear the needs_wakeup flag,
6292 * since we cannot tell if we were the only one.
6293 */
6294 return KERN_ABORTED;
9bccf70c 6295 }
1c79356b
A
6296
6297 /*
6298 * The entry could have been clipped or it
6299 * may not exist anymore. Look it up again.
6300 */
6301 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6302 /*
6303 * User: use the next entry
6304 */
6305 entry = first_entry->vme_next;
2d21ac55 6306 s = entry->vme_start;
1c79356b
A
6307 } else {
6308 entry = first_entry;
0c530ab8 6309 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6310 }
9bccf70c 6311 last_timestamp = map->timestamp;
1c79356b
A
6312 continue;
6313 } /* end in_transition */
6314
6315 if (entry->wired_count) {
2d21ac55
A
6316 boolean_t user_wire;
6317
6318 user_wire = entry->user_wired_count > 0;
6319
1c79356b 6320 /*
b0d623f7 6321 * Remove a kernel wiring if requested
1c79356b 6322 */
b0d623f7 6323 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 6324 entry->wired_count--;
b0d623f7
A
6325 }
6326
6327 /*
6328 * Remove all user wirings for proper accounting
6329 */
6330 if (entry->user_wired_count > 0) {
6331 while (entry->user_wired_count)
6332 subtract_wire_counts(map, entry, user_wire);
6333 }
1c79356b
A
6334
6335 if (entry->wired_count != 0) {
2d21ac55 6336 assert(map != kernel_map);
1c79356b
A
6337 /*
6338 * Cannot continue. Typical case is when
6339 * a user thread has physical io pending on
6340 * on this page. Either wait for the
6341 * kernel wiring to go away or return an
6342 * error.
6343 */
6344 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 6345 wait_result_t wait_result;
1c79356b 6346
2d21ac55 6347 assert(s == entry->vme_start);
1c79356b 6348 entry->needs_wakeup = TRUE;
9bccf70c 6349 wait_result = vm_map_entry_wait(map,
2d21ac55 6350 interruptible);
1c79356b
A
6351
6352 if (interruptible &&
2d21ac55 6353 wait_result == THREAD_INTERRUPTED) {
1c79356b 6354 /*
2d21ac55 6355 * We do not clear the
1c79356b
A
6356 * needs_wakeup flag, since we
6357 * cannot tell if we were the
6358 * only one.
2d21ac55 6359 */
1c79356b 6360 return KERN_ABORTED;
9bccf70c 6361 }
1c79356b
A
6362
6363 /*
2d21ac55 6364 * The entry could have been clipped or
1c79356b
A
6365 * it may not exist anymore. Look it
6366 * up again.
2d21ac55 6367 */
1c79356b 6368 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
6369 &first_entry)) {
6370 assert(map != kernel_map);
1c79356b 6371 /*
2d21ac55
A
6372 * User: use the next entry
6373 */
1c79356b 6374 entry = first_entry->vme_next;
2d21ac55 6375 s = entry->vme_start;
1c79356b
A
6376 } else {
6377 entry = first_entry;
0c530ab8 6378 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6379 }
9bccf70c 6380 last_timestamp = map->timestamp;
1c79356b
A
6381 continue;
6382 }
6383 else {
6384 return KERN_FAILURE;
6385 }
6386 }
6387
6388 entry->in_transition = TRUE;
6389 /*
6390 * copy current entry. see comment in vm_map_wire()
6391 */
6392 tmp_entry = *entry;
2d21ac55 6393 assert(s == entry->vme_start);
1c79356b
A
6394
6395 /*
6396 * We can unlock the map now. The in_transition
6397 * state guarentees existance of the entry.
6398 */
6399 vm_map_unlock(map);
2d21ac55
A
6400
6401 if (tmp_entry.is_sub_map) {
6402 vm_map_t sub_map;
6403 vm_map_offset_t sub_start, sub_end;
6404 pmap_t pmap;
6405 vm_map_offset_t pmap_addr;
6406
6407
3e170ce0
A
6408 sub_map = VME_SUBMAP(&tmp_entry);
6409 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55
A
6410 sub_end = sub_start + (tmp_entry.vme_end -
6411 tmp_entry.vme_start);
6412 if (tmp_entry.use_pmap) {
6413 pmap = sub_map->pmap;
6414 pmap_addr = tmp_entry.vme_start;
6415 } else {
6416 pmap = map->pmap;
6417 pmap_addr = tmp_entry.vme_start;
6418 }
6419 (void) vm_map_unwire_nested(sub_map,
6420 sub_start, sub_end,
6421 user_wire,
6422 pmap, pmap_addr);
6423 } else {
6424
3e170ce0 6425 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
6426 pmap_protect_options(
6427 map->pmap,
6428 tmp_entry.vme_start,
6429 tmp_entry.vme_end,
6430 VM_PROT_NONE,
6431 PMAP_OPTIONS_REMOVE,
6432 NULL);
6433 }
2d21ac55 6434 vm_fault_unwire(map, &tmp_entry,
3e170ce0 6435 VME_OBJECT(&tmp_entry) == kernel_object,
2d21ac55
A
6436 map->pmap, tmp_entry.vme_start);
6437 }
6438
1c79356b
A
6439 vm_map_lock(map);
6440
6441 if (last_timestamp+1 != map->timestamp) {
6442 /*
6443 * Find the entry again. It could have
6444 * been clipped after we unlocked the map.
6445 */
6446 if (!vm_map_lookup_entry(map, s, &first_entry)){
6447 assert((map != kernel_map) &&
2d21ac55 6448 (!entry->is_sub_map));
1c79356b 6449 first_entry = first_entry->vme_next;
2d21ac55 6450 s = first_entry->vme_start;
1c79356b 6451 } else {
0c530ab8 6452 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6453 }
6454 } else {
0c530ab8 6455 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6456 first_entry = entry;
6457 }
6458
6459 last_timestamp = map->timestamp;
6460
6461 entry = first_entry;
6462 while ((entry != vm_map_to_entry(map)) &&
6463 (entry->vme_start < tmp_entry.vme_end)) {
6464 assert(entry->in_transition);
6465 entry->in_transition = FALSE;
6466 if (entry->needs_wakeup) {
6467 entry->needs_wakeup = FALSE;
6468 need_wakeup = TRUE;
6469 }
6470 entry = entry->vme_next;
6471 }
6472 /*
6473 * We have unwired the entry(s). Go back and
6474 * delete them.
6475 */
6476 entry = first_entry;
6477 continue;
6478 }
6479
6480 /* entry is unwired */
6481 assert(entry->wired_count == 0);
6482 assert(entry->user_wired_count == 0);
6483
2d21ac55
A
6484 assert(s == entry->vme_start);
6485
6486 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6487 /*
6488 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6489 * vm_map_delete(), some map entries might have been
6490 * transferred to a "zap_map", which doesn't have a
6491 * pmap. The original pmap has already been flushed
6492 * in the vm_map_delete() call targeting the original
6493 * map, but when we get to destroying the "zap_map",
6494 * we don't have any pmap to flush, so let's just skip
6495 * all this.
6496 */
6497 } else if (entry->is_sub_map) {
6498 if (entry->use_pmap) {
0c530ab8 6499#ifndef NO_NESTED_PMAP
3e170ce0
A
6500 int pmap_flags;
6501
6502 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6503 /*
6504 * This is the final cleanup of the
6505 * address space being terminated.
6506 * No new mappings are expected and
6507 * we don't really need to unnest the
6508 * shared region (and lose the "global"
6509 * pmap mappings, if applicable).
6510 *
6511 * Tell the pmap layer that we're
6512 * "clean" wrt nesting.
6513 */
6514 pmap_flags = PMAP_UNNEST_CLEAN;
6515 } else {
6516 /*
6517 * We're unmapping part of the nested
6518 * shared region, so we can't keep the
6519 * nested pmap.
6520 */
6521 pmap_flags = 0;
6522 }
6523 pmap_unnest_options(
6524 map->pmap,
6525 (addr64_t)entry->vme_start,
6526 entry->vme_end - entry->vme_start,
6527 pmap_flags);
0c530ab8 6528#endif /* NO_NESTED_PMAP */
316670eb 6529 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
9bccf70c
A
6530 /* clean up parent map/maps */
6531 vm_map_submap_pmap_clean(
6532 map, entry->vme_start,
6533 entry->vme_end,
3e170ce0
A
6534 VME_SUBMAP(entry),
6535 VME_OFFSET(entry));
9bccf70c 6536 }
2d21ac55 6537 } else {
1c79356b
A
6538 vm_map_submap_pmap_clean(
6539 map, entry->vme_start, entry->vme_end,
3e170ce0
A
6540 VME_SUBMAP(entry),
6541 VME_OFFSET(entry));
2d21ac55 6542 }
3e170ce0
A
6543 } else if (VME_OBJECT(entry) != kernel_object &&
6544 VME_OBJECT(entry) != compressor_object) {
6545 object = VME_OBJECT(entry);
39236c6e
A
6546 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6547 vm_object_pmap_protect_options(
3e170ce0 6548 object, VME_OFFSET(entry),
55e303ae
A
6549 entry->vme_end - entry->vme_start,
6550 PMAP_NULL,
6551 entry->vme_start,
39236c6e
A
6552 VM_PROT_NONE,
6553 PMAP_OPTIONS_REMOVE);
3e170ce0 6554 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
39236c6e
A
6555 (map->pmap == kernel_pmap)) {
6556 /* Remove translations associated
6557 * with this range unless the entry
6558 * does not have an object, or
6559 * it's the kernel map or a descendant
6560 * since the platform could potentially
6561 * create "backdoor" mappings invisible
6562 * to the VM. It is expected that
6563 * objectless, non-kernel ranges
6564 * do not have such VM invisible
6565 * translations.
6566 */
6567 pmap_remove_options(map->pmap,
6568 (addr64_t)entry->vme_start,
6569 (addr64_t)entry->vme_end,
6570 PMAP_OPTIONS_REMOVE);
1c79356b
A
6571 }
6572 }
6573
fe8ab488
A
6574 if (entry->iokit_acct) {
6575 /* alternate accounting */
ecc0ceb4
A
6576 DTRACE_VM4(vm_map_iokit_unmapped_region,
6577 vm_map_t, map,
6578 vm_map_offset_t, entry->vme_start,
6579 vm_map_offset_t, entry->vme_end,
6580 int, VME_ALIAS(entry));
fe8ab488
A
6581 vm_map_iokit_unmapped_region(map,
6582 (entry->vme_end -
6583 entry->vme_start));
6584 entry->iokit_acct = FALSE;
6585 }
6586
91447636
A
6587 /*
6588 * All pmap mappings for this map entry must have been
6589 * cleared by now.
6590 */
fe8ab488 6591#if DEBUG
91447636
A
6592 assert(vm_map_pmap_is_empty(map,
6593 entry->vme_start,
6594 entry->vme_end));
fe8ab488 6595#endif /* DEBUG */
91447636 6596
1c79356b 6597 next = entry->vme_next;
fe8ab488
A
6598
6599 if (map->pmap == kernel_pmap &&
6600 map->ref_count != 0 &&
6601 entry->vme_end < end &&
6602 (next == vm_map_to_entry(map) ||
6603 next->vme_start != entry->vme_end)) {
6604 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6605 "hole after %p at 0x%llx\n",
6606 map,
6607 (uint64_t)start,
6608 (uint64_t)end,
6609 entry,
6610 (uint64_t)entry->vme_end);
6611 }
6612
1c79356b
A
6613 s = next->vme_start;
6614 last_timestamp = map->timestamp;
91447636
A
6615
6616 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6617 zap_map != VM_MAP_NULL) {
2d21ac55 6618 vm_map_size_t entry_size;
91447636
A
6619 /*
6620 * The caller wants to save the affected VM map entries
6621 * into the "zap_map". The caller will take care of
6622 * these entries.
6623 */
6624 /* unlink the entry from "map" ... */
6d2010ae 6625 vm_map_store_entry_unlink(map, entry);
91447636 6626 /* ... and add it to the end of the "zap_map" */
6d2010ae 6627 vm_map_store_entry_link(zap_map,
91447636
A
6628 vm_map_last_entry(zap_map),
6629 entry);
2d21ac55
A
6630 entry_size = entry->vme_end - entry->vme_start;
6631 map->size -= entry_size;
6632 zap_map->size += entry_size;
6633 /* we didn't unlock the map, so no timestamp increase */
6634 last_timestamp--;
91447636
A
6635 } else {
6636 vm_map_entry_delete(map, entry);
6637 /* vm_map_entry_delete unlocks the map */
6638 vm_map_lock(map);
6639 }
6640
1c79356b
A
6641 entry = next;
6642
6643 if(entry == vm_map_to_entry(map)) {
6644 break;
6645 }
6646 if (last_timestamp+1 != map->timestamp) {
6647 /*
6648 * we are responsible for deleting everything
6649 * from the give space, if someone has interfered
6650 * we pick up where we left off, back fills should
6651 * be all right for anyone except map_delete and
6652 * we have to assume that the task has been fully
6653 * disabled before we get here
6654 */
6655 if (!vm_map_lookup_entry(map, s, &entry)){
6656 entry = entry->vme_next;
2d21ac55 6657 s = entry->vme_start;
1c79356b 6658 } else {
2d21ac55 6659 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6660 }
6661 /*
6662 * others can not only allocate behind us, we can
6663 * also see coalesce while we don't have the map lock
6664 */
6665 if(entry == vm_map_to_entry(map)) {
6666 break;
6667 }
1c79356b
A
6668 }
6669 last_timestamp = map->timestamp;
6670 }
6671
6672 if (map->wait_for_space)
6673 thread_wakeup((event_t) map);
6674 /*
6675 * wake up anybody waiting on entries that we have already deleted.
6676 */
6677 if (need_wakeup)
6678 vm_map_entry_wakeup(map);
6679
6680 return KERN_SUCCESS;
6681}
6682
6683/*
6684 * vm_map_remove:
6685 *
6686 * Remove the given address range from the target map.
6687 * This is the exported form of vm_map_delete.
6688 */
6689kern_return_t
6690vm_map_remove(
39037602
A
6691 vm_map_t map,
6692 vm_map_offset_t start,
6693 vm_map_offset_t end,
6694 boolean_t flags)
1c79356b 6695{
39037602 6696 kern_return_t result;
9bccf70c 6697
1c79356b
A
6698 vm_map_lock(map);
6699 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
6700 /*
6701 * For the zone_map, the kernel controls the allocation/freeing of memory.
6702 * Any free to the zone_map should be within the bounds of the map and
6703 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6704 * free to the zone_map into a no-op, there is a problem and we should
6705 * panic.
6706 */
6707 if ((map == zone_map) && (start == end))
6708 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
91447636 6709 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 6710 vm_map_unlock(map);
91447636 6711
1c79356b
A
6712 return(result);
6713}
6714
39037602
A
6715/*
6716 * vm_map_remove_locked:
6717 *
6718 * Remove the given address range from the target locked map.
6719 * This is the exported form of vm_map_delete.
6720 */
6721kern_return_t
6722vm_map_remove_locked(
6723 vm_map_t map,
6724 vm_map_offset_t start,
6725 vm_map_offset_t end,
6726 boolean_t flags)
6727{
6728 kern_return_t result;
6729
6730 VM_MAP_RANGE_CHECK(map, start, end);
6731 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6732 return(result);
6733}
6734
1c79356b 6735
1c79356b
A
6736/*
6737 * Routine: vm_map_copy_discard
6738 *
6739 * Description:
6740 * Dispose of a map copy object (returned by
6741 * vm_map_copyin).
6742 */
6743void
6744vm_map_copy_discard(
6745 vm_map_copy_t copy)
6746{
1c79356b
A
6747 if (copy == VM_MAP_COPY_NULL)
6748 return;
6749
6750 switch (copy->type) {
6751 case VM_MAP_COPY_ENTRY_LIST:
6752 while (vm_map_copy_first_entry(copy) !=
2d21ac55 6753 vm_map_copy_to_entry(copy)) {
1c79356b
A
6754 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6755
6756 vm_map_copy_entry_unlink(copy, entry);
39236c6e 6757 if (entry->is_sub_map) {
3e170ce0 6758 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 6759 } else {
3e170ce0 6760 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 6761 }
1c79356b
A
6762 vm_map_copy_entry_dispose(copy, entry);
6763 }
6764 break;
6765 case VM_MAP_COPY_OBJECT:
6766 vm_object_deallocate(copy->cpy_object);
6767 break;
1c79356b
A
6768 case VM_MAP_COPY_KERNEL_BUFFER:
6769
6770 /*
6771 * The vm_map_copy_t and possibly the data buffer were
6772 * allocated by a single call to kalloc(), i.e. the
6773 * vm_map_copy_t was not allocated out of the zone.
6774 */
3e170ce0
A
6775 if (copy->size > msg_ool_size_small || copy->offset)
6776 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6777 (long long)copy->size, (long long)copy->offset);
6778 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
6779 return;
6780 }
91447636 6781 zfree(vm_map_copy_zone, copy);
1c79356b
A
6782}
6783
6784/*
6785 * Routine: vm_map_copy_copy
6786 *
6787 * Description:
6788 * Move the information in a map copy object to
6789 * a new map copy object, leaving the old one
6790 * empty.
6791 *
6792 * This is used by kernel routines that need
6793 * to look at out-of-line data (in copyin form)
6794 * before deciding whether to return SUCCESS.
6795 * If the routine returns FAILURE, the original
6796 * copy object will be deallocated; therefore,
6797 * these routines must make a copy of the copy
6798 * object and leave the original empty so that
6799 * deallocation will not fail.
6800 */
6801vm_map_copy_t
6802vm_map_copy_copy(
6803 vm_map_copy_t copy)
6804{
6805 vm_map_copy_t new_copy;
6806
6807 if (copy == VM_MAP_COPY_NULL)
6808 return VM_MAP_COPY_NULL;
6809
6810 /*
6811 * Allocate a new copy object, and copy the information
6812 * from the old one into it.
6813 */
6814
6815 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 6816 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
6817 *new_copy = *copy;
6818
6819 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6820 /*
6821 * The links in the entry chain must be
6822 * changed to point to the new copy object.
6823 */
6824 vm_map_copy_first_entry(copy)->vme_prev
6825 = vm_map_copy_to_entry(new_copy);
6826 vm_map_copy_last_entry(copy)->vme_next
6827 = vm_map_copy_to_entry(new_copy);
6828 }
6829
6830 /*
6831 * Change the old copy object into one that contains
6832 * nothing to be deallocated.
6833 */
6834 copy->type = VM_MAP_COPY_OBJECT;
6835 copy->cpy_object = VM_OBJECT_NULL;
6836
6837 /*
6838 * Return the new object.
6839 */
6840 return new_copy;
6841}
6842
91447636 6843static kern_return_t
1c79356b
A
6844vm_map_overwrite_submap_recurse(
6845 vm_map_t dst_map,
91447636
A
6846 vm_map_offset_t dst_addr,
6847 vm_map_size_t dst_size)
1c79356b 6848{
91447636 6849 vm_map_offset_t dst_end;
1c79356b
A
6850 vm_map_entry_t tmp_entry;
6851 vm_map_entry_t entry;
6852 kern_return_t result;
6853 boolean_t encountered_sub_map = FALSE;
6854
6855
6856
6857 /*
6858 * Verify that the destination is all writeable
6859 * initially. We have to trunc the destination
6860 * address and round the copy size or we'll end up
6861 * splitting entries in strange ways.
6862 */
6863
39236c6e
A
6864 dst_end = vm_map_round_page(dst_addr + dst_size,
6865 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 6866 vm_map_lock(dst_map);
1c79356b
A
6867
6868start_pass_1:
1c79356b
A
6869 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6870 vm_map_unlock(dst_map);
6871 return(KERN_INVALID_ADDRESS);
6872 }
6873
39236c6e
A
6874 vm_map_clip_start(dst_map,
6875 tmp_entry,
6876 vm_map_trunc_page(dst_addr,
6877 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
6878 if (tmp_entry->is_sub_map) {
6879 /* clipping did unnest if needed */
6880 assert(!tmp_entry->use_pmap);
6881 }
1c79356b
A
6882
6883 for (entry = tmp_entry;;) {
6884 vm_map_entry_t next;
6885
6886 next = entry->vme_next;
6887 while(entry->is_sub_map) {
91447636
A
6888 vm_map_offset_t sub_start;
6889 vm_map_offset_t sub_end;
6890 vm_map_offset_t local_end;
1c79356b
A
6891
6892 if (entry->in_transition) {
2d21ac55
A
6893 /*
6894 * Say that we are waiting, and wait for entry.
6895 */
1c79356b
A
6896 entry->needs_wakeup = TRUE;
6897 vm_map_entry_wait(dst_map, THREAD_UNINT);
6898
6899 goto start_pass_1;
6900 }
6901
6902 encountered_sub_map = TRUE;
3e170ce0 6903 sub_start = VME_OFFSET(entry);
1c79356b
A
6904
6905 if(entry->vme_end < dst_end)
6906 sub_end = entry->vme_end;
6907 else
6908 sub_end = dst_end;
6909 sub_end -= entry->vme_start;
3e170ce0 6910 sub_end += VME_OFFSET(entry);
1c79356b
A
6911 local_end = entry->vme_end;
6912 vm_map_unlock(dst_map);
6913
6914 result = vm_map_overwrite_submap_recurse(
3e170ce0 6915 VME_SUBMAP(entry),
2d21ac55
A
6916 sub_start,
6917 sub_end - sub_start);
1c79356b
A
6918
6919 if(result != KERN_SUCCESS)
6920 return result;
6921 if (dst_end <= entry->vme_end)
6922 return KERN_SUCCESS;
6923 vm_map_lock(dst_map);
6924 if(!vm_map_lookup_entry(dst_map, local_end,
6925 &tmp_entry)) {
6926 vm_map_unlock(dst_map);
6927 return(KERN_INVALID_ADDRESS);
6928 }
6929 entry = tmp_entry;
6930 next = entry->vme_next;
6931 }
6932
6933 if ( ! (entry->protection & VM_PROT_WRITE)) {
6934 vm_map_unlock(dst_map);
6935 return(KERN_PROTECTION_FAILURE);
6936 }
6937
6938 /*
6939 * If the entry is in transition, we must wait
6940 * for it to exit that state. Anything could happen
6941 * when we unlock the map, so start over.
6942 */
6943 if (entry->in_transition) {
6944
6945 /*
6946 * Say that we are waiting, and wait for entry.
6947 */
6948 entry->needs_wakeup = TRUE;
6949 vm_map_entry_wait(dst_map, THREAD_UNINT);
6950
6951 goto start_pass_1;
6952 }
6953
6954/*
6955 * our range is contained completely within this map entry
6956 */
6957 if (dst_end <= entry->vme_end) {
6958 vm_map_unlock(dst_map);
6959 return KERN_SUCCESS;
6960 }
6961/*
6962 * check that range specified is contiguous region
6963 */
6964 if ((next == vm_map_to_entry(dst_map)) ||
6965 (next->vme_start != entry->vme_end)) {
6966 vm_map_unlock(dst_map);
6967 return(KERN_INVALID_ADDRESS);
6968 }
6969
6970 /*
6971 * Check for permanent objects in the destination.
6972 */
3e170ce0
A
6973 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6974 ((!VME_OBJECT(entry)->internal) ||
6975 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
6976 if(encountered_sub_map) {
6977 vm_map_unlock(dst_map);
6978 return(KERN_FAILURE);
6979 }
6980 }
6981
6982
6983 entry = next;
6984 }/* for */
6985 vm_map_unlock(dst_map);
6986 return(KERN_SUCCESS);
6987}
6988
6989/*
6990 * Routine: vm_map_copy_overwrite
6991 *
6992 * Description:
6993 * Copy the memory described by the map copy
6994 * object (copy; returned by vm_map_copyin) onto
6995 * the specified destination region (dst_map, dst_addr).
6996 * The destination must be writeable.
6997 *
6998 * Unlike vm_map_copyout, this routine actually
6999 * writes over previously-mapped memory. If the
7000 * previous mapping was to a permanent (user-supplied)
7001 * memory object, it is preserved.
7002 *
7003 * The attributes (protection and inheritance) of the
7004 * destination region are preserved.
7005 *
7006 * If successful, consumes the copy object.
7007 * Otherwise, the caller is responsible for it.
7008 *
7009 * Implementation notes:
7010 * To overwrite aligned temporary virtual memory, it is
7011 * sufficient to remove the previous mapping and insert
7012 * the new copy. This replacement is done either on
7013 * the whole region (if no permanent virtual memory
7014 * objects are embedded in the destination region) or
7015 * in individual map entries.
7016 *
7017 * To overwrite permanent virtual memory , it is necessary
7018 * to copy each page, as the external memory management
7019 * interface currently does not provide any optimizations.
7020 *
7021 * Unaligned memory also has to be copied. It is possible
7022 * to use 'vm_trickery' to copy the aligned data. This is
7023 * not done but not hard to implement.
7024 *
7025 * Once a page of permanent memory has been overwritten,
7026 * it is impossible to interrupt this function; otherwise,
7027 * the call would be neither atomic nor location-independent.
7028 * The kernel-state portion of a user thread must be
7029 * interruptible.
7030 *
7031 * It may be expensive to forward all requests that might
7032 * overwrite permanent memory (vm_write, vm_copy) to
7033 * uninterruptible kernel threads. This routine may be
7034 * called by interruptible threads; however, success is
7035 * not guaranteed -- if the request cannot be performed
7036 * atomically and interruptibly, an error indication is
7037 * returned.
7038 */
7039
91447636 7040static kern_return_t
1c79356b 7041vm_map_copy_overwrite_nested(
91447636
A
7042 vm_map_t dst_map,
7043 vm_map_address_t dst_addr,
7044 vm_map_copy_t copy,
7045 boolean_t interruptible,
6d2010ae
A
7046 pmap_t pmap,
7047 boolean_t discard_on_success)
1c79356b 7048{
91447636
A
7049 vm_map_offset_t dst_end;
7050 vm_map_entry_t tmp_entry;
7051 vm_map_entry_t entry;
7052 kern_return_t kr;
7053 boolean_t aligned = TRUE;
7054 boolean_t contains_permanent_objects = FALSE;
7055 boolean_t encountered_sub_map = FALSE;
7056 vm_map_offset_t base_addr;
7057 vm_map_size_t copy_size;
7058 vm_map_size_t total_size;
1c79356b
A
7059
7060
7061 /*
7062 * Check for null copy object.
7063 */
7064
7065 if (copy == VM_MAP_COPY_NULL)
7066 return(KERN_SUCCESS);
7067
7068 /*
7069 * Check for special kernel buffer allocated
7070 * by new_ipc_kmsg_copyin.
7071 */
7072
7073 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 7074 return(vm_map_copyout_kernel_buffer(
2d21ac55 7075 dst_map, &dst_addr,
39037602 7076 copy, copy->size, TRUE, discard_on_success));
1c79356b
A
7077 }
7078
7079 /*
7080 * Only works for entry lists at the moment. Will
7081 * support page lists later.
7082 */
7083
7084 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7085
7086 if (copy->size == 0) {
6d2010ae
A
7087 if (discard_on_success)
7088 vm_map_copy_discard(copy);
1c79356b
A
7089 return(KERN_SUCCESS);
7090 }
7091
7092 /*
7093 * Verify that the destination is all writeable
7094 * initially. We have to trunc the destination
7095 * address and round the copy size or we'll end up
7096 * splitting entries in strange ways.
7097 */
7098
39236c6e
A
7099 if (!VM_MAP_PAGE_ALIGNED(copy->size,
7100 VM_MAP_PAGE_MASK(dst_map)) ||
7101 !VM_MAP_PAGE_ALIGNED(copy->offset,
7102 VM_MAP_PAGE_MASK(dst_map)) ||
7103 !VM_MAP_PAGE_ALIGNED(dst_addr,
fe8ab488 7104 VM_MAP_PAGE_MASK(dst_map)))
1c79356b
A
7105 {
7106 aligned = FALSE;
39236c6e
A
7107 dst_end = vm_map_round_page(dst_addr + copy->size,
7108 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
7109 } else {
7110 dst_end = dst_addr + copy->size;
7111 }
7112
1c79356b 7113 vm_map_lock(dst_map);
9bccf70c 7114
91447636
A
7115 /* LP64todo - remove this check when vm_map_commpage64()
7116 * no longer has to stuff in a map_entry for the commpage
7117 * above the map's max_offset.
7118 */
7119 if (dst_addr >= dst_map->max_offset) {
7120 vm_map_unlock(dst_map);
7121 return(KERN_INVALID_ADDRESS);
7122 }
7123
9bccf70c 7124start_pass_1:
1c79356b
A
7125 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7126 vm_map_unlock(dst_map);
7127 return(KERN_INVALID_ADDRESS);
7128 }
39236c6e
A
7129 vm_map_clip_start(dst_map,
7130 tmp_entry,
7131 vm_map_trunc_page(dst_addr,
7132 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7133 for (entry = tmp_entry;;) {
7134 vm_map_entry_t next = entry->vme_next;
7135
7136 while(entry->is_sub_map) {
91447636
A
7137 vm_map_offset_t sub_start;
7138 vm_map_offset_t sub_end;
7139 vm_map_offset_t local_end;
1c79356b
A
7140
7141 if (entry->in_transition) {
7142
2d21ac55
A
7143 /*
7144 * Say that we are waiting, and wait for entry.
7145 */
1c79356b
A
7146 entry->needs_wakeup = TRUE;
7147 vm_map_entry_wait(dst_map, THREAD_UNINT);
7148
7149 goto start_pass_1;
7150 }
7151
7152 local_end = entry->vme_end;
7153 if (!(entry->needs_copy)) {
7154 /* if needs_copy we are a COW submap */
7155 /* in such a case we just replace so */
7156 /* there is no need for the follow- */
7157 /* ing check. */
7158 encountered_sub_map = TRUE;
3e170ce0 7159 sub_start = VME_OFFSET(entry);
1c79356b
A
7160
7161 if(entry->vme_end < dst_end)
7162 sub_end = entry->vme_end;
7163 else
7164 sub_end = dst_end;
7165 sub_end -= entry->vme_start;
3e170ce0 7166 sub_end += VME_OFFSET(entry);
1c79356b
A
7167 vm_map_unlock(dst_map);
7168
7169 kr = vm_map_overwrite_submap_recurse(
3e170ce0 7170 VME_SUBMAP(entry),
1c79356b
A
7171 sub_start,
7172 sub_end - sub_start);
7173 if(kr != KERN_SUCCESS)
7174 return kr;
7175 vm_map_lock(dst_map);
7176 }
7177
7178 if (dst_end <= entry->vme_end)
7179 goto start_overwrite;
7180 if(!vm_map_lookup_entry(dst_map, local_end,
7181 &entry)) {
7182 vm_map_unlock(dst_map);
7183 return(KERN_INVALID_ADDRESS);
7184 }
7185 next = entry->vme_next;
7186 }
7187
7188 if ( ! (entry->protection & VM_PROT_WRITE)) {
7189 vm_map_unlock(dst_map);
7190 return(KERN_PROTECTION_FAILURE);
7191 }
7192
7193 /*
7194 * If the entry is in transition, we must wait
7195 * for it to exit that state. Anything could happen
7196 * when we unlock the map, so start over.
7197 */
7198 if (entry->in_transition) {
7199
7200 /*
7201 * Say that we are waiting, and wait for entry.
7202 */
7203 entry->needs_wakeup = TRUE;
7204 vm_map_entry_wait(dst_map, THREAD_UNINT);
7205
7206 goto start_pass_1;
7207 }
7208
7209/*
7210 * our range is contained completely within this map entry
7211 */
7212 if (dst_end <= entry->vme_end)
7213 break;
7214/*
7215 * check that range specified is contiguous region
7216 */
7217 if ((next == vm_map_to_entry(dst_map)) ||
7218 (next->vme_start != entry->vme_end)) {
7219 vm_map_unlock(dst_map);
7220 return(KERN_INVALID_ADDRESS);
7221 }
7222
7223
7224 /*
7225 * Check for permanent objects in the destination.
7226 */
3e170ce0
A
7227 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7228 ((!VME_OBJECT(entry)->internal) ||
7229 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
7230 contains_permanent_objects = TRUE;
7231 }
7232
7233 entry = next;
7234 }/* for */
7235
7236start_overwrite:
7237 /*
7238 * If there are permanent objects in the destination, then
7239 * the copy cannot be interrupted.
7240 */
7241
7242 if (interruptible && contains_permanent_objects) {
7243 vm_map_unlock(dst_map);
7244 return(KERN_FAILURE); /* XXX */
7245 }
7246
7247 /*
7248 *
7249 * Make a second pass, overwriting the data
7250 * At the beginning of each loop iteration,
7251 * the next entry to be overwritten is "tmp_entry"
7252 * (initially, the value returned from the lookup above),
7253 * and the starting address expected in that entry
7254 * is "start".
7255 */
7256
7257 total_size = copy->size;
7258 if(encountered_sub_map) {
7259 copy_size = 0;
7260 /* re-calculate tmp_entry since we've had the map */
7261 /* unlocked */
7262 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7263 vm_map_unlock(dst_map);
7264 return(KERN_INVALID_ADDRESS);
7265 }
7266 } else {
7267 copy_size = copy->size;
7268 }
7269
7270 base_addr = dst_addr;
7271 while(TRUE) {
7272 /* deconstruct the copy object and do in parts */
7273 /* only in sub_map, interruptable case */
7274 vm_map_entry_t copy_entry;
91447636
A
7275 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
7276 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 7277 int nentries;
91447636 7278 int remaining_entries = 0;
b0d623f7 7279 vm_map_offset_t new_offset = 0;
1c79356b
A
7280
7281 for (entry = tmp_entry; copy_size == 0;) {
7282 vm_map_entry_t next;
7283
7284 next = entry->vme_next;
7285
7286 /* tmp_entry and base address are moved along */
7287 /* each time we encounter a sub-map. Otherwise */
7288 /* entry can outpase tmp_entry, and the copy_size */
7289 /* may reflect the distance between them */
7290 /* if the current entry is found to be in transition */
7291 /* we will start over at the beginning or the last */
7292 /* encounter of a submap as dictated by base_addr */
7293 /* we will zero copy_size accordingly. */
7294 if (entry->in_transition) {
7295 /*
7296 * Say that we are waiting, and wait for entry.
7297 */
7298 entry->needs_wakeup = TRUE;
7299 vm_map_entry_wait(dst_map, THREAD_UNINT);
7300
1c79356b 7301 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 7302 &tmp_entry)) {
1c79356b
A
7303 vm_map_unlock(dst_map);
7304 return(KERN_INVALID_ADDRESS);
7305 }
7306 copy_size = 0;
7307 entry = tmp_entry;
7308 continue;
7309 }
7310 if(entry->is_sub_map) {
91447636
A
7311 vm_map_offset_t sub_start;
7312 vm_map_offset_t sub_end;
7313 vm_map_offset_t local_end;
1c79356b
A
7314
7315 if (entry->needs_copy) {
7316 /* if this is a COW submap */
7317 /* just back the range with a */
7318 /* anonymous entry */
7319 if(entry->vme_end < dst_end)
7320 sub_end = entry->vme_end;
7321 else
7322 sub_end = dst_end;
7323 if(entry->vme_start < base_addr)
7324 sub_start = base_addr;
7325 else
7326 sub_start = entry->vme_start;
7327 vm_map_clip_end(
7328 dst_map, entry, sub_end);
7329 vm_map_clip_start(
7330 dst_map, entry, sub_start);
2d21ac55 7331 assert(!entry->use_pmap);
1c79356b
A
7332 entry->is_sub_map = FALSE;
7333 vm_map_deallocate(
3e170ce0
A
7334 VME_SUBMAP(entry));
7335 VME_SUBMAP_SET(entry, NULL);
1c79356b
A
7336 entry->is_shared = FALSE;
7337 entry->needs_copy = FALSE;
3e170ce0 7338 VME_OFFSET_SET(entry, 0);
2d21ac55
A
7339 /*
7340 * XXX FBDP
7341 * We should propagate the protections
7342 * of the submap entry here instead
7343 * of forcing them to VM_PROT_ALL...
7344 * Or better yet, we should inherit
7345 * the protection of the copy_entry.
7346 */
1c79356b
A
7347 entry->protection = VM_PROT_ALL;
7348 entry->max_protection = VM_PROT_ALL;
7349 entry->wired_count = 0;
7350 entry->user_wired_count = 0;
7351 if(entry->inheritance
2d21ac55
A
7352 == VM_INHERIT_SHARE)
7353 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
7354 continue;
7355 }
7356 /* first take care of any non-sub_map */
7357 /* entries to send */
7358 if(base_addr < entry->vme_start) {
7359 /* stuff to send */
7360 copy_size =
7361 entry->vme_start - base_addr;
7362 break;
7363 }
3e170ce0 7364 sub_start = VME_OFFSET(entry);
1c79356b
A
7365
7366 if(entry->vme_end < dst_end)
7367 sub_end = entry->vme_end;
7368 else
7369 sub_end = dst_end;
7370 sub_end -= entry->vme_start;
3e170ce0 7371 sub_end += VME_OFFSET(entry);
1c79356b
A
7372 local_end = entry->vme_end;
7373 vm_map_unlock(dst_map);
7374 copy_size = sub_end - sub_start;
7375
7376 /* adjust the copy object */
7377 if (total_size > copy_size) {
91447636
A
7378 vm_map_size_t local_size = 0;
7379 vm_map_size_t entry_size;
1c79356b 7380
2d21ac55
A
7381 nentries = 1;
7382 new_offset = copy->offset;
7383 copy_entry = vm_map_copy_first_entry(copy);
7384 while(copy_entry !=
7385 vm_map_copy_to_entry(copy)){
7386 entry_size = copy_entry->vme_end -
7387 copy_entry->vme_start;
7388 if((local_size < copy_size) &&
7389 ((local_size + entry_size)
7390 >= copy_size)) {
7391 vm_map_copy_clip_end(copy,
7392 copy_entry,
7393 copy_entry->vme_start +
7394 (copy_size - local_size));
7395 entry_size = copy_entry->vme_end -
7396 copy_entry->vme_start;
7397 local_size += entry_size;
7398 new_offset += entry_size;
7399 }
7400 if(local_size >= copy_size) {
7401 next_copy = copy_entry->vme_next;
7402 copy_entry->vme_next =
7403 vm_map_copy_to_entry(copy);
7404 previous_prev =
7405 copy->cpy_hdr.links.prev;
7406 copy->cpy_hdr.links.prev = copy_entry;
7407 copy->size = copy_size;
7408 remaining_entries =
7409 copy->cpy_hdr.nentries;
7410 remaining_entries -= nentries;
7411 copy->cpy_hdr.nentries = nentries;
7412 break;
7413 } else {
7414 local_size += entry_size;
7415 new_offset += entry_size;
7416 nentries++;
7417 }
7418 copy_entry = copy_entry->vme_next;
7419 }
1c79356b
A
7420 }
7421
7422 if((entry->use_pmap) && (pmap == NULL)) {
7423 kr = vm_map_copy_overwrite_nested(
3e170ce0 7424 VME_SUBMAP(entry),
1c79356b
A
7425 sub_start,
7426 copy,
7427 interruptible,
3e170ce0 7428 VME_SUBMAP(entry)->pmap,
6d2010ae 7429 TRUE);
1c79356b
A
7430 } else if (pmap != NULL) {
7431 kr = vm_map_copy_overwrite_nested(
3e170ce0 7432 VME_SUBMAP(entry),
1c79356b
A
7433 sub_start,
7434 copy,
6d2010ae
A
7435 interruptible, pmap,
7436 TRUE);
1c79356b
A
7437 } else {
7438 kr = vm_map_copy_overwrite_nested(
3e170ce0 7439 VME_SUBMAP(entry),
1c79356b
A
7440 sub_start,
7441 copy,
7442 interruptible,
6d2010ae
A
7443 dst_map->pmap,
7444 TRUE);
1c79356b
A
7445 }
7446 if(kr != KERN_SUCCESS) {
7447 if(next_copy != NULL) {
2d21ac55
A
7448 copy->cpy_hdr.nentries +=
7449 remaining_entries;
7450 copy->cpy_hdr.links.prev->vme_next =
7451 next_copy;
7452 copy->cpy_hdr.links.prev
7453 = previous_prev;
7454 copy->size = total_size;
1c79356b
A
7455 }
7456 return kr;
7457 }
7458 if (dst_end <= local_end) {
7459 return(KERN_SUCCESS);
7460 }
7461 /* otherwise copy no longer exists, it was */
7462 /* destroyed after successful copy_overwrite */
7463 copy = (vm_map_copy_t)
2d21ac55 7464 zalloc(vm_map_copy_zone);
04b8595b 7465 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 7466 vm_map_copy_first_entry(copy) =
2d21ac55
A
7467 vm_map_copy_last_entry(copy) =
7468 vm_map_copy_to_entry(copy);
1c79356b
A
7469 copy->type = VM_MAP_COPY_ENTRY_LIST;
7470 copy->offset = new_offset;
7471
e2d2fc5c
A
7472 /*
7473 * XXX FBDP
7474 * this does not seem to deal with
7475 * the VM map store (R&B tree)
7476 */
7477
1c79356b
A
7478 total_size -= copy_size;
7479 copy_size = 0;
7480 /* put back remainder of copy in container */
7481 if(next_copy != NULL) {
2d21ac55
A
7482 copy->cpy_hdr.nentries = remaining_entries;
7483 copy->cpy_hdr.links.next = next_copy;
7484 copy->cpy_hdr.links.prev = previous_prev;
7485 copy->size = total_size;
7486 next_copy->vme_prev =
7487 vm_map_copy_to_entry(copy);
7488 next_copy = NULL;
1c79356b
A
7489 }
7490 base_addr = local_end;
7491 vm_map_lock(dst_map);
7492 if(!vm_map_lookup_entry(dst_map,
2d21ac55 7493 local_end, &tmp_entry)) {
1c79356b
A
7494 vm_map_unlock(dst_map);
7495 return(KERN_INVALID_ADDRESS);
7496 }
7497 entry = tmp_entry;
7498 continue;
7499 }
7500 if (dst_end <= entry->vme_end) {
7501 copy_size = dst_end - base_addr;
7502 break;
7503 }
7504
7505 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 7506 (next->vme_start != entry->vme_end)) {
1c79356b
A
7507 vm_map_unlock(dst_map);
7508 return(KERN_INVALID_ADDRESS);
7509 }
7510
7511 entry = next;
7512 }/* for */
7513
7514 next_copy = NULL;
7515 nentries = 1;
7516
7517 /* adjust the copy object */
7518 if (total_size > copy_size) {
91447636
A
7519 vm_map_size_t local_size = 0;
7520 vm_map_size_t entry_size;
1c79356b
A
7521
7522 new_offset = copy->offset;
7523 copy_entry = vm_map_copy_first_entry(copy);
7524 while(copy_entry != vm_map_copy_to_entry(copy)) {
7525 entry_size = copy_entry->vme_end -
2d21ac55 7526 copy_entry->vme_start;
1c79356b 7527 if((local_size < copy_size) &&
2d21ac55
A
7528 ((local_size + entry_size)
7529 >= copy_size)) {
1c79356b 7530 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
7531 copy_entry->vme_start +
7532 (copy_size - local_size));
1c79356b 7533 entry_size = copy_entry->vme_end -
2d21ac55 7534 copy_entry->vme_start;
1c79356b
A
7535 local_size += entry_size;
7536 new_offset += entry_size;
7537 }
7538 if(local_size >= copy_size) {
7539 next_copy = copy_entry->vme_next;
7540 copy_entry->vme_next =
7541 vm_map_copy_to_entry(copy);
7542 previous_prev =
7543 copy->cpy_hdr.links.prev;
7544 copy->cpy_hdr.links.prev = copy_entry;
7545 copy->size = copy_size;
7546 remaining_entries =
7547 copy->cpy_hdr.nentries;
7548 remaining_entries -= nentries;
7549 copy->cpy_hdr.nentries = nentries;
7550 break;
7551 } else {
7552 local_size += entry_size;
7553 new_offset += entry_size;
7554 nentries++;
7555 }
7556 copy_entry = copy_entry->vme_next;
7557 }
7558 }
7559
7560 if (aligned) {
7561 pmap_t local_pmap;
7562
7563 if(pmap)
7564 local_pmap = pmap;
7565 else
7566 local_pmap = dst_map->pmap;
7567
7568 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
7569 dst_map, tmp_entry, copy,
7570 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b
A
7571 if(next_copy != NULL) {
7572 copy->cpy_hdr.nentries +=
2d21ac55 7573 remaining_entries;
1c79356b 7574 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7575 next_copy;
1c79356b 7576 copy->cpy_hdr.links.prev =
2d21ac55 7577 previous_prev;
1c79356b
A
7578 copy->size += copy_size;
7579 }
7580 return kr;
7581 }
7582 vm_map_unlock(dst_map);
7583 } else {
2d21ac55
A
7584 /*
7585 * Performance gain:
7586 *
7587 * if the copy and dst address are misaligned but the same
7588 * offset within the page we can copy_not_aligned the
7589 * misaligned parts and copy aligned the rest. If they are
7590 * aligned but len is unaligned we simply need to copy
7591 * the end bit unaligned. We'll need to split the misaligned
7592 * bits of the region in this case !
7593 */
7594 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
7595 kr = vm_map_copy_overwrite_unaligned(
7596 dst_map,
7597 tmp_entry,
7598 copy,
7599 base_addr,
7600 discard_on_success);
7601 if (kr != KERN_SUCCESS) {
1c79356b
A
7602 if(next_copy != NULL) {
7603 copy->cpy_hdr.nentries +=
2d21ac55 7604 remaining_entries;
1c79356b 7605 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7606 next_copy;
1c79356b
A
7607 copy->cpy_hdr.links.prev =
7608 previous_prev;
7609 copy->size += copy_size;
7610 }
7611 return kr;
7612 }
7613 }
7614 total_size -= copy_size;
7615 if(total_size == 0)
7616 break;
7617 base_addr += copy_size;
7618 copy_size = 0;
7619 copy->offset = new_offset;
7620 if(next_copy != NULL) {
7621 copy->cpy_hdr.nentries = remaining_entries;
7622 copy->cpy_hdr.links.next = next_copy;
7623 copy->cpy_hdr.links.prev = previous_prev;
7624 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7625 copy->size = total_size;
7626 }
7627 vm_map_lock(dst_map);
7628 while(TRUE) {
7629 if (!vm_map_lookup_entry(dst_map,
2d21ac55 7630 base_addr, &tmp_entry)) {
1c79356b
A
7631 vm_map_unlock(dst_map);
7632 return(KERN_INVALID_ADDRESS);
7633 }
7634 if (tmp_entry->in_transition) {
7635 entry->needs_wakeup = TRUE;
7636 vm_map_entry_wait(dst_map, THREAD_UNINT);
7637 } else {
7638 break;
7639 }
7640 }
39236c6e
A
7641 vm_map_clip_start(dst_map,
7642 tmp_entry,
7643 vm_map_trunc_page(base_addr,
7644 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7645
7646 entry = tmp_entry;
7647 } /* while */
7648
7649 /*
7650 * Throw away the vm_map_copy object
7651 */
6d2010ae
A
7652 if (discard_on_success)
7653 vm_map_copy_discard(copy);
1c79356b
A
7654
7655 return(KERN_SUCCESS);
7656}/* vm_map_copy_overwrite */
7657
7658kern_return_t
7659vm_map_copy_overwrite(
7660 vm_map_t dst_map,
91447636 7661 vm_map_offset_t dst_addr,
1c79356b
A
7662 vm_map_copy_t copy,
7663 boolean_t interruptible)
7664{
6d2010ae
A
7665 vm_map_size_t head_size, tail_size;
7666 vm_map_copy_t head_copy, tail_copy;
7667 vm_map_offset_t head_addr, tail_addr;
7668 vm_map_entry_t entry;
7669 kern_return_t kr;
7670
7671 head_size = 0;
7672 tail_size = 0;
7673 head_copy = NULL;
7674 tail_copy = NULL;
7675 head_addr = 0;
7676 tail_addr = 0;
7677
7678 if (interruptible ||
7679 copy == VM_MAP_COPY_NULL ||
7680 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7681 /*
7682 * We can't split the "copy" map if we're interruptible
7683 * or if we don't have a "copy" map...
7684 */
7685 blunt_copy:
7686 return vm_map_copy_overwrite_nested(dst_map,
7687 dst_addr,
7688 copy,
7689 interruptible,
7690 (pmap_t) NULL,
7691 TRUE);
7692 }
7693
7694 if (copy->size < 3 * PAGE_SIZE) {
7695 /*
7696 * Too small to bother with optimizing...
7697 */
7698 goto blunt_copy;
7699 }
7700
39236c6e
A
7701 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7702 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6d2010ae
A
7703 /*
7704 * Incompatible mis-alignment of source and destination...
7705 */
7706 goto blunt_copy;
7707 }
7708
7709 /*
7710 * Proper alignment or identical mis-alignment at the beginning.
7711 * Let's try and do a small unaligned copy first (if needed)
7712 * and then an aligned copy for the rest.
7713 */
7714 if (!page_aligned(dst_addr)) {
7715 head_addr = dst_addr;
39236c6e
A
7716 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7717 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6d2010ae
A
7718 }
7719 if (!page_aligned(copy->offset + copy->size)) {
7720 /*
7721 * Mis-alignment at the end.
7722 * Do an aligned copy up to the last page and
7723 * then an unaligned copy for the remaining bytes.
7724 */
39236c6e
A
7725 tail_size = ((copy->offset + copy->size) &
7726 VM_MAP_PAGE_MASK(dst_map));
6d2010ae
A
7727 tail_addr = dst_addr + copy->size - tail_size;
7728 }
7729
7730 if (head_size + tail_size == copy->size) {
7731 /*
7732 * It's all unaligned, no optimization possible...
7733 */
7734 goto blunt_copy;
7735 }
7736
7737 /*
7738 * Can't optimize if there are any submaps in the
7739 * destination due to the way we free the "copy" map
7740 * progressively in vm_map_copy_overwrite_nested()
7741 * in that case.
7742 */
7743 vm_map_lock_read(dst_map);
7744 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7745 vm_map_unlock_read(dst_map);
7746 goto blunt_copy;
7747 }
7748 for (;
7749 (entry != vm_map_copy_to_entry(copy) &&
7750 entry->vme_start < dst_addr + copy->size);
7751 entry = entry->vme_next) {
7752 if (entry->is_sub_map) {
7753 vm_map_unlock_read(dst_map);
7754 goto blunt_copy;
7755 }
7756 }
7757 vm_map_unlock_read(dst_map);
7758
7759 if (head_size) {
7760 /*
7761 * Unaligned copy of the first "head_size" bytes, to reach
7762 * a page boundary.
7763 */
7764
7765 /*
7766 * Extract "head_copy" out of "copy".
7767 */
7768 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7769 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7770 vm_map_copy_first_entry(head_copy) =
7771 vm_map_copy_to_entry(head_copy);
7772 vm_map_copy_last_entry(head_copy) =
7773 vm_map_copy_to_entry(head_copy);
7774 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7775 head_copy->cpy_hdr.nentries = 0;
7776 head_copy->cpy_hdr.entries_pageable =
7777 copy->cpy_hdr.entries_pageable;
7778 vm_map_store_init(&head_copy->cpy_hdr);
7779
7780 head_copy->offset = copy->offset;
7781 head_copy->size = head_size;
7782
7783 copy->offset += head_size;
7784 copy->size -= head_size;
7785
7786 entry = vm_map_copy_first_entry(copy);
7787 vm_map_copy_clip_end(copy, entry, copy->offset);
7788 vm_map_copy_entry_unlink(copy, entry);
7789 vm_map_copy_entry_link(head_copy,
7790 vm_map_copy_to_entry(head_copy),
7791 entry);
7792
7793 /*
7794 * Do the unaligned copy.
7795 */
7796 kr = vm_map_copy_overwrite_nested(dst_map,
7797 head_addr,
7798 head_copy,
7799 interruptible,
7800 (pmap_t) NULL,
7801 FALSE);
7802 if (kr != KERN_SUCCESS)
7803 goto done;
7804 }
7805
7806 if (tail_size) {
7807 /*
7808 * Extract "tail_copy" out of "copy".
7809 */
7810 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7811 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7812 vm_map_copy_first_entry(tail_copy) =
7813 vm_map_copy_to_entry(tail_copy);
7814 vm_map_copy_last_entry(tail_copy) =
7815 vm_map_copy_to_entry(tail_copy);
7816 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7817 tail_copy->cpy_hdr.nentries = 0;
7818 tail_copy->cpy_hdr.entries_pageable =
7819 copy->cpy_hdr.entries_pageable;
7820 vm_map_store_init(&tail_copy->cpy_hdr);
7821
7822 tail_copy->offset = copy->offset + copy->size - tail_size;
7823 tail_copy->size = tail_size;
7824
7825 copy->size -= tail_size;
7826
7827 entry = vm_map_copy_last_entry(copy);
7828 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7829 entry = vm_map_copy_last_entry(copy);
7830 vm_map_copy_entry_unlink(copy, entry);
7831 vm_map_copy_entry_link(tail_copy,
7832 vm_map_copy_last_entry(tail_copy),
7833 entry);
7834 }
7835
7836 /*
7837 * Copy most (or possibly all) of the data.
7838 */
7839 kr = vm_map_copy_overwrite_nested(dst_map,
7840 dst_addr + head_size,
7841 copy,
7842 interruptible,
7843 (pmap_t) NULL,
7844 FALSE);
7845 if (kr != KERN_SUCCESS) {
7846 goto done;
7847 }
7848
7849 if (tail_size) {
7850 kr = vm_map_copy_overwrite_nested(dst_map,
7851 tail_addr,
7852 tail_copy,
7853 interruptible,
7854 (pmap_t) NULL,
7855 FALSE);
7856 }
7857
7858done:
7859 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7860 if (kr == KERN_SUCCESS) {
7861 /*
7862 * Discard all the copy maps.
7863 */
7864 if (head_copy) {
7865 vm_map_copy_discard(head_copy);
7866 head_copy = NULL;
7867 }
7868 vm_map_copy_discard(copy);
7869 if (tail_copy) {
7870 vm_map_copy_discard(tail_copy);
7871 tail_copy = NULL;
7872 }
7873 } else {
7874 /*
7875 * Re-assemble the original copy map.
7876 */
7877 if (head_copy) {
7878 entry = vm_map_copy_first_entry(head_copy);
7879 vm_map_copy_entry_unlink(head_copy, entry);
7880 vm_map_copy_entry_link(copy,
7881 vm_map_copy_to_entry(copy),
7882 entry);
7883 copy->offset -= head_size;
7884 copy->size += head_size;
7885 vm_map_copy_discard(head_copy);
7886 head_copy = NULL;
7887 }
7888 if (tail_copy) {
7889 entry = vm_map_copy_last_entry(tail_copy);
7890 vm_map_copy_entry_unlink(tail_copy, entry);
7891 vm_map_copy_entry_link(copy,
7892 vm_map_copy_last_entry(copy),
7893 entry);
7894 copy->size += tail_size;
7895 vm_map_copy_discard(tail_copy);
7896 tail_copy = NULL;
7897 }
7898 }
7899 return kr;
1c79356b
A
7900}
7901
7902
7903/*
91447636 7904 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
7905 *
7906 * Decription:
7907 * Physically copy unaligned data
7908 *
7909 * Implementation:
7910 * Unaligned parts of pages have to be physically copied. We use
7911 * a modified form of vm_fault_copy (which understands none-aligned
7912 * page offsets and sizes) to do the copy. We attempt to copy as
7913 * much memory in one go as possibly, however vm_fault_copy copies
7914 * within 1 memory object so we have to find the smaller of "amount left"
7915 * "source object data size" and "target object data size". With
7916 * unaligned data we don't need to split regions, therefore the source
7917 * (copy) object should be one map entry, the target range may be split
7918 * over multiple map entries however. In any event we are pessimistic
7919 * about these assumptions.
7920 *
7921 * Assumptions:
7922 * dst_map is locked on entry and is return locked on success,
7923 * unlocked on error.
7924 */
7925
91447636 7926static kern_return_t
1c79356b
A
7927vm_map_copy_overwrite_unaligned(
7928 vm_map_t dst_map,
7929 vm_map_entry_t entry,
7930 vm_map_copy_t copy,
39236c6e
A
7931 vm_map_offset_t start,
7932 boolean_t discard_on_success)
1c79356b 7933{
39236c6e
A
7934 vm_map_entry_t copy_entry;
7935 vm_map_entry_t copy_entry_next;
1c79356b
A
7936 vm_map_version_t version;
7937 vm_object_t dst_object;
7938 vm_object_offset_t dst_offset;
7939 vm_object_offset_t src_offset;
7940 vm_object_offset_t entry_offset;
91447636
A
7941 vm_map_offset_t entry_end;
7942 vm_map_size_t src_size,
1c79356b
A
7943 dst_size,
7944 copy_size,
7945 amount_left;
7946 kern_return_t kr = KERN_SUCCESS;
7947
39236c6e
A
7948
7949 copy_entry = vm_map_copy_first_entry(copy);
7950
1c79356b
A
7951 vm_map_lock_write_to_read(dst_map);
7952
91447636 7953 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
7954 amount_left = copy->size;
7955/*
7956 * unaligned so we never clipped this entry, we need the offset into
7957 * the vm_object not just the data.
7958 */
7959 while (amount_left > 0) {
7960
7961 if (entry == vm_map_to_entry(dst_map)) {
7962 vm_map_unlock_read(dst_map);
7963 return KERN_INVALID_ADDRESS;
7964 }
7965
7966 /* "start" must be within the current map entry */
7967 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7968
7969 dst_offset = start - entry->vme_start;
7970
7971 dst_size = entry->vme_end - start;
7972
7973 src_size = copy_entry->vme_end -
7974 (copy_entry->vme_start + src_offset);
7975
7976 if (dst_size < src_size) {
7977/*
7978 * we can only copy dst_size bytes before
7979 * we have to get the next destination entry
7980 */
7981 copy_size = dst_size;
7982 } else {
7983/*
7984 * we can only copy src_size bytes before
7985 * we have to get the next source copy entry
7986 */
7987 copy_size = src_size;
7988 }
7989
7990 if (copy_size > amount_left) {
7991 copy_size = amount_left;
7992 }
7993/*
7994 * Entry needs copy, create a shadow shadow object for
7995 * Copy on write region.
7996 */
7997 if (entry->needs_copy &&
2d21ac55 7998 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
7999 {
8000 if (vm_map_lock_read_to_write(dst_map)) {
8001 vm_map_lock_read(dst_map);
8002 goto RetryLookup;
8003 }
3e170ce0
A
8004 VME_OBJECT_SHADOW(entry,
8005 (vm_map_size_t)(entry->vme_end
8006 - entry->vme_start));
1c79356b
A
8007 entry->needs_copy = FALSE;
8008 vm_map_lock_write_to_read(dst_map);
8009 }
3e170ce0 8010 dst_object = VME_OBJECT(entry);
1c79356b
A
8011/*
8012 * unlike with the virtual (aligned) copy we're going
8013 * to fault on it therefore we need a target object.
8014 */
8015 if (dst_object == VM_OBJECT_NULL) {
8016 if (vm_map_lock_read_to_write(dst_map)) {
8017 vm_map_lock_read(dst_map);
8018 goto RetryLookup;
8019 }
91447636 8020 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 8021 entry->vme_end - entry->vme_start);
3e170ce0
A
8022 VME_OBJECT(entry) = dst_object;
8023 VME_OFFSET_SET(entry, 0);
fe8ab488 8024 assert(entry->use_pmap);
1c79356b
A
8025 vm_map_lock_write_to_read(dst_map);
8026 }
8027/*
8028 * Take an object reference and unlock map. The "entry" may
8029 * disappear or change when the map is unlocked.
8030 */
8031 vm_object_reference(dst_object);
8032 version.main_timestamp = dst_map->timestamp;
3e170ce0 8033 entry_offset = VME_OFFSET(entry);
1c79356b
A
8034 entry_end = entry->vme_end;
8035 vm_map_unlock_read(dst_map);
8036/*
8037 * Copy as much as possible in one pass
8038 */
8039 kr = vm_fault_copy(
3e170ce0
A
8040 VME_OBJECT(copy_entry),
8041 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
8042 &copy_size,
8043 dst_object,
8044 entry_offset + dst_offset,
8045 dst_map,
8046 &version,
8047 THREAD_UNINT );
8048
8049 start += copy_size;
8050 src_offset += copy_size;
8051 amount_left -= copy_size;
8052/*
8053 * Release the object reference
8054 */
8055 vm_object_deallocate(dst_object);
8056/*
8057 * If a hard error occurred, return it now
8058 */
8059 if (kr != KERN_SUCCESS)
8060 return kr;
8061
8062 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 8063 || amount_left == 0)
1c79356b
A
8064 {
8065/*
8066 * all done with this copy entry, dispose.
8067 */
39236c6e
A
8068 copy_entry_next = copy_entry->vme_next;
8069
8070 if (discard_on_success) {
8071 vm_map_copy_entry_unlink(copy, copy_entry);
8072 assert(!copy_entry->is_sub_map);
3e170ce0 8073 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
8074 vm_map_copy_entry_dispose(copy, copy_entry);
8075 }
1c79356b 8076
39236c6e
A
8077 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
8078 amount_left) {
1c79356b
A
8079/*
8080 * not finished copying but run out of source
8081 */
8082 return KERN_INVALID_ADDRESS;
8083 }
39236c6e
A
8084
8085 copy_entry = copy_entry_next;
8086
1c79356b
A
8087 src_offset = 0;
8088 }
8089
8090 if (amount_left == 0)
8091 return KERN_SUCCESS;
8092
8093 vm_map_lock_read(dst_map);
8094 if (version.main_timestamp == dst_map->timestamp) {
8095 if (start == entry_end) {
8096/*
8097 * destination region is split. Use the version
8098 * information to avoid a lookup in the normal
8099 * case.
8100 */
8101 entry = entry->vme_next;
8102/*
8103 * should be contiguous. Fail if we encounter
8104 * a hole in the destination.
8105 */
8106 if (start != entry->vme_start) {
8107 vm_map_unlock_read(dst_map);
8108 return KERN_INVALID_ADDRESS ;
8109 }
8110 }
8111 } else {
8112/*
8113 * Map version check failed.
8114 * we must lookup the entry because somebody
8115 * might have changed the map behind our backs.
8116 */
2d21ac55 8117 RetryLookup:
1c79356b
A
8118 if (!vm_map_lookup_entry(dst_map, start, &entry))
8119 {
8120 vm_map_unlock_read(dst_map);
8121 return KERN_INVALID_ADDRESS ;
8122 }
8123 }
8124 }/* while */
8125
1c79356b
A
8126 return KERN_SUCCESS;
8127}/* vm_map_copy_overwrite_unaligned */
8128
8129/*
91447636 8130 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
8131 *
8132 * Description:
8133 * Does all the vm_trickery possible for whole pages.
8134 *
8135 * Implementation:
8136 *
8137 * If there are no permanent objects in the destination,
8138 * and the source and destination map entry zones match,
8139 * and the destination map entry is not shared,
8140 * then the map entries can be deleted and replaced
8141 * with those from the copy. The following code is the
8142 * basic idea of what to do, but there are lots of annoying
8143 * little details about getting protection and inheritance
8144 * right. Should add protection, inheritance, and sharing checks
8145 * to the above pass and make sure that no wiring is involved.
8146 */
8147
e2d2fc5c
A
8148int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8149int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8150int vm_map_copy_overwrite_aligned_src_large = 0;
8151
91447636 8152static kern_return_t
1c79356b
A
8153vm_map_copy_overwrite_aligned(
8154 vm_map_t dst_map,
8155 vm_map_entry_t tmp_entry,
8156 vm_map_copy_t copy,
91447636 8157 vm_map_offset_t start,
2d21ac55 8158 __unused pmap_t pmap)
1c79356b
A
8159{
8160 vm_object_t object;
8161 vm_map_entry_t copy_entry;
91447636
A
8162 vm_map_size_t copy_size;
8163 vm_map_size_t size;
1c79356b
A
8164 vm_map_entry_t entry;
8165
8166 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 8167 != vm_map_copy_to_entry(copy))
1c79356b
A
8168 {
8169 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8170
8171 entry = tmp_entry;
fe8ab488
A
8172 if (entry->is_sub_map) {
8173 /* unnested when clipped earlier */
8174 assert(!entry->use_pmap);
8175 }
1c79356b
A
8176 if (entry == vm_map_to_entry(dst_map)) {
8177 vm_map_unlock(dst_map);
8178 return KERN_INVALID_ADDRESS;
8179 }
8180 size = (entry->vme_end - entry->vme_start);
8181 /*
8182 * Make sure that no holes popped up in the
8183 * address map, and that the protection is
8184 * still valid, in case the map was unlocked
8185 * earlier.
8186 */
8187
8188 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 8189 && !entry->needs_copy)) {
1c79356b
A
8190 vm_map_unlock(dst_map);
8191 return(KERN_INVALID_ADDRESS);
8192 }
8193 assert(entry != vm_map_to_entry(dst_map));
8194
8195 /*
8196 * Check protection again
8197 */
8198
8199 if ( ! (entry->protection & VM_PROT_WRITE)) {
8200 vm_map_unlock(dst_map);
8201 return(KERN_PROTECTION_FAILURE);
8202 }
8203
8204 /*
8205 * Adjust to source size first
8206 */
8207
8208 if (copy_size < size) {
fe8ab488
A
8209 if (entry->map_aligned &&
8210 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8211 VM_MAP_PAGE_MASK(dst_map))) {
8212 /* no longer map-aligned */
8213 entry->map_aligned = FALSE;
8214 }
1c79356b
A
8215 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8216 size = copy_size;
8217 }
8218
8219 /*
8220 * Adjust to destination size
8221 */
8222
8223 if (size < copy_size) {
8224 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8225 copy_entry->vme_start + size);
1c79356b
A
8226 copy_size = size;
8227 }
8228
8229 assert((entry->vme_end - entry->vme_start) == size);
8230 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8231 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8232
8233 /*
8234 * If the destination contains temporary unshared memory,
8235 * we can perform the copy by throwing it away and
8236 * installing the source data.
8237 */
8238
3e170ce0 8239 object = VME_OBJECT(entry);
1c79356b 8240 if ((!entry->is_shared &&
2d21ac55
A
8241 ((object == VM_OBJECT_NULL) ||
8242 (object->internal && !object->true_share))) ||
1c79356b 8243 entry->needs_copy) {
3e170ce0
A
8244 vm_object_t old_object = VME_OBJECT(entry);
8245 vm_object_offset_t old_offset = VME_OFFSET(entry);
1c79356b
A
8246 vm_object_offset_t offset;
8247
8248 /*
8249 * Ensure that the source and destination aren't
8250 * identical
8251 */
3e170ce0
A
8252 if (old_object == VME_OBJECT(copy_entry) &&
8253 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
8254 vm_map_copy_entry_unlink(copy, copy_entry);
8255 vm_map_copy_entry_dispose(copy, copy_entry);
8256
8257 if (old_object != VM_OBJECT_NULL)
8258 vm_object_deallocate(old_object);
8259
8260 start = tmp_entry->vme_end;
8261 tmp_entry = tmp_entry->vme_next;
8262 continue;
8263 }
8264
e2d2fc5c
A
8265#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8266#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
8267 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8268 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
8269 copy_size <= __TRADEOFF1_COPY_SIZE) {
8270 /*
8271 * Virtual vs. Physical copy tradeoff #1.
8272 *
8273 * Copying only a few pages out of a large
8274 * object: do a physical copy instead of
8275 * a virtual copy, to avoid possibly keeping
8276 * the entire large object alive because of
8277 * those few copy-on-write pages.
8278 */
8279 vm_map_copy_overwrite_aligned_src_large++;
8280 goto slow_copy;
8281 }
e2d2fc5c 8282
3e170ce0
A
8283 if ((dst_map->pmap != kernel_pmap) &&
8284 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8285 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
ebb1b9f4
A
8286 vm_object_t new_object, new_shadow;
8287
8288 /*
8289 * We're about to map something over a mapping
8290 * established by malloc()...
8291 */
3e170ce0 8292 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
8293 if (new_object != VM_OBJECT_NULL) {
8294 vm_object_lock_shared(new_object);
8295 }
8296 while (new_object != VM_OBJECT_NULL &&
e2d2fc5c
A
8297 !new_object->true_share &&
8298 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
ebb1b9f4
A
8299 new_object->internal) {
8300 new_shadow = new_object->shadow;
8301 if (new_shadow == VM_OBJECT_NULL) {
8302 break;
8303 }
8304 vm_object_lock_shared(new_shadow);
8305 vm_object_unlock(new_object);
8306 new_object = new_shadow;
8307 }
8308 if (new_object != VM_OBJECT_NULL) {
8309 if (!new_object->internal) {
8310 /*
8311 * The new mapping is backed
8312 * by an external object. We
8313 * don't want malloc'ed memory
8314 * to be replaced with such a
8315 * non-anonymous mapping, so
8316 * let's go off the optimized
8317 * path...
8318 */
e2d2fc5c 8319 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
8320 vm_object_unlock(new_object);
8321 goto slow_copy;
8322 }
e2d2fc5c
A
8323 if (new_object->true_share ||
8324 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8325 /*
8326 * Same if there's a "true_share"
8327 * object in the shadow chain, or
8328 * an object with a non-default
8329 * (SYMMETRIC) copy strategy.
8330 */
8331 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8332 vm_object_unlock(new_object);
8333 goto slow_copy;
8334 }
ebb1b9f4
A
8335 vm_object_unlock(new_object);
8336 }
8337 /*
8338 * The new mapping is still backed by
8339 * anonymous (internal) memory, so it's
8340 * OK to substitute it for the original
8341 * malloc() mapping.
8342 */
8343 }
8344
1c79356b
A
8345 if (old_object != VM_OBJECT_NULL) {
8346 if(entry->is_sub_map) {
9bccf70c 8347 if(entry->use_pmap) {
0c530ab8 8348#ifndef NO_NESTED_PMAP
9bccf70c 8349 pmap_unnest(dst_map->pmap,
2d21ac55
A
8350 (addr64_t)entry->vme_start,
8351 entry->vme_end - entry->vme_start);
0c530ab8 8352#endif /* NO_NESTED_PMAP */
316670eb 8353 if(dst_map->mapped_in_other_pmaps) {
9bccf70c
A
8354 /* clean up parent */
8355 /* map/maps */
2d21ac55
A
8356 vm_map_submap_pmap_clean(
8357 dst_map, entry->vme_start,
8358 entry->vme_end,
3e170ce0
A
8359 VME_SUBMAP(entry),
8360 VME_OFFSET(entry));
9bccf70c
A
8361 }
8362 } else {
8363 vm_map_submap_pmap_clean(
8364 dst_map, entry->vme_start,
8365 entry->vme_end,
3e170ce0
A
8366 VME_SUBMAP(entry),
8367 VME_OFFSET(entry));
9bccf70c 8368 }
3e170ce0 8369 vm_map_deallocate(VME_SUBMAP(entry));
9bccf70c 8370 } else {
316670eb 8371 if(dst_map->mapped_in_other_pmaps) {
39236c6e 8372 vm_object_pmap_protect_options(
3e170ce0
A
8373 VME_OBJECT(entry),
8374 VME_OFFSET(entry),
9bccf70c 8375 entry->vme_end
2d21ac55 8376 - entry->vme_start,
9bccf70c
A
8377 PMAP_NULL,
8378 entry->vme_start,
39236c6e
A
8379 VM_PROT_NONE,
8380 PMAP_OPTIONS_REMOVE);
9bccf70c 8381 } else {
39236c6e
A
8382 pmap_remove_options(
8383 dst_map->pmap,
8384 (addr64_t)(entry->vme_start),
8385 (addr64_t)(entry->vme_end),
8386 PMAP_OPTIONS_REMOVE);
9bccf70c 8387 }
1c79356b 8388 vm_object_deallocate(old_object);
9bccf70c 8389 }
1c79356b
A
8390 }
8391
8392 entry->is_sub_map = FALSE;
3e170ce0
A
8393 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8394 object = VME_OBJECT(entry);
1c79356b
A
8395 entry->needs_copy = copy_entry->needs_copy;
8396 entry->wired_count = 0;
8397 entry->user_wired_count = 0;
3e170ce0
A
8398 offset = VME_OFFSET(copy_entry);
8399 VME_OFFSET_SET(entry, offset);
1c79356b
A
8400
8401 vm_map_copy_entry_unlink(copy, copy_entry);
8402 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 8403
1c79356b 8404 /*
2d21ac55 8405 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
8406 * this optimization only saved on average 2 us per page if ALL
8407 * the pages in the source were currently mapped
8408 * and ALL the pages in the dest were touched, if there were fewer
8409 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 8410 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
8411 */
8412
1c79356b
A
8413 /*
8414 * Set up for the next iteration. The map
8415 * has not been unlocked, so the next
8416 * address should be at the end of this
8417 * entry, and the next map entry should be
8418 * the one following it.
8419 */
8420
8421 start = tmp_entry->vme_end;
8422 tmp_entry = tmp_entry->vme_next;
8423 } else {
8424 vm_map_version_t version;
ebb1b9f4
A
8425 vm_object_t dst_object;
8426 vm_object_offset_t dst_offset;
1c79356b
A
8427 kern_return_t r;
8428
ebb1b9f4 8429 slow_copy:
e2d2fc5c 8430 if (entry->needs_copy) {
3e170ce0
A
8431 VME_OBJECT_SHADOW(entry,
8432 (entry->vme_end -
8433 entry->vme_start));
e2d2fc5c
A
8434 entry->needs_copy = FALSE;
8435 }
8436
3e170ce0
A
8437 dst_object = VME_OBJECT(entry);
8438 dst_offset = VME_OFFSET(entry);
ebb1b9f4 8439
1c79356b
A
8440 /*
8441 * Take an object reference, and record
8442 * the map version information so that the
8443 * map can be safely unlocked.
8444 */
8445
ebb1b9f4
A
8446 if (dst_object == VM_OBJECT_NULL) {
8447 /*
8448 * We would usually have just taken the
8449 * optimized path above if the destination
8450 * object has not been allocated yet. But we
8451 * now disable that optimization if the copy
8452 * entry's object is not backed by anonymous
8453 * memory to avoid replacing malloc'ed
8454 * (i.e. re-usable) anonymous memory with a
8455 * not-so-anonymous mapping.
8456 * So we have to handle this case here and
8457 * allocate a new VM object for this map entry.
8458 */
8459 dst_object = vm_object_allocate(
8460 entry->vme_end - entry->vme_start);
8461 dst_offset = 0;
3e170ce0
A
8462 VME_OBJECT_SET(entry, dst_object);
8463 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 8464 assert(entry->use_pmap);
ebb1b9f4
A
8465
8466 }
8467
1c79356b
A
8468 vm_object_reference(dst_object);
8469
9bccf70c
A
8470 /* account for unlock bumping up timestamp */
8471 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
8472
8473 vm_map_unlock(dst_map);
8474
8475 /*
8476 * Copy as much as possible in one pass
8477 */
8478
8479 copy_size = size;
8480 r = vm_fault_copy(
3e170ce0
A
8481 VME_OBJECT(copy_entry),
8482 VME_OFFSET(copy_entry),
2d21ac55
A
8483 &copy_size,
8484 dst_object,
8485 dst_offset,
8486 dst_map,
8487 &version,
8488 THREAD_UNINT );
1c79356b
A
8489
8490 /*
8491 * Release the object reference
8492 */
8493
8494 vm_object_deallocate(dst_object);
8495
8496 /*
8497 * If a hard error occurred, return it now
8498 */
8499
8500 if (r != KERN_SUCCESS)
8501 return(r);
8502
8503 if (copy_size != 0) {
8504 /*
8505 * Dispose of the copied region
8506 */
8507
8508 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8509 copy_entry->vme_start + copy_size);
1c79356b 8510 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 8511 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
8512 vm_map_copy_entry_dispose(copy, copy_entry);
8513 }
8514
8515 /*
8516 * Pick up in the destination map where we left off.
8517 *
8518 * Use the version information to avoid a lookup
8519 * in the normal case.
8520 */
8521
8522 start += copy_size;
8523 vm_map_lock(dst_map);
e2d2fc5c
A
8524 if (version.main_timestamp == dst_map->timestamp &&
8525 copy_size != 0) {
1c79356b
A
8526 /* We can safely use saved tmp_entry value */
8527
fe8ab488
A
8528 if (tmp_entry->map_aligned &&
8529 !VM_MAP_PAGE_ALIGNED(
8530 start,
8531 VM_MAP_PAGE_MASK(dst_map))) {
8532 /* no longer map-aligned */
8533 tmp_entry->map_aligned = FALSE;
8534 }
1c79356b
A
8535 vm_map_clip_end(dst_map, tmp_entry, start);
8536 tmp_entry = tmp_entry->vme_next;
8537 } else {
8538 /* Must do lookup of tmp_entry */
8539
8540 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8541 vm_map_unlock(dst_map);
8542 return(KERN_INVALID_ADDRESS);
8543 }
fe8ab488
A
8544 if (tmp_entry->map_aligned &&
8545 !VM_MAP_PAGE_ALIGNED(
8546 start,
8547 VM_MAP_PAGE_MASK(dst_map))) {
8548 /* no longer map-aligned */
8549 tmp_entry->map_aligned = FALSE;
8550 }
1c79356b
A
8551 vm_map_clip_start(dst_map, tmp_entry, start);
8552 }
8553 }
8554 }/* while */
8555
8556 return(KERN_SUCCESS);
8557}/* vm_map_copy_overwrite_aligned */
8558
8559/*
91447636 8560 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
8561 *
8562 * Description:
8563 * Copy in data to a kernel buffer from space in the
91447636 8564 * source map. The original space may be optionally
1c79356b
A
8565 * deallocated.
8566 *
8567 * If successful, returns a new copy object.
8568 */
91447636 8569static kern_return_t
1c79356b
A
8570vm_map_copyin_kernel_buffer(
8571 vm_map_t src_map,
91447636
A
8572 vm_map_offset_t src_addr,
8573 vm_map_size_t len,
1c79356b
A
8574 boolean_t src_destroy,
8575 vm_map_copy_t *copy_result)
8576{
91447636 8577 kern_return_t kr;
1c79356b 8578 vm_map_copy_t copy;
b0d623f7
A
8579 vm_size_t kalloc_size;
8580
3e170ce0
A
8581 if (len > msg_ool_size_small)
8582 return KERN_INVALID_ARGUMENT;
1c79356b 8583
3e170ce0
A
8584 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8585
8586 copy = (vm_map_copy_t)kalloc(kalloc_size);
8587 if (copy == VM_MAP_COPY_NULL)
1c79356b 8588 return KERN_RESOURCE_SHORTAGE;
1c79356b
A
8589 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8590 copy->size = len;
8591 copy->offset = 0;
1c79356b 8592
3e170ce0 8593 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
8594 if (kr != KERN_SUCCESS) {
8595 kfree(copy, kalloc_size);
8596 return kr;
1c79356b
A
8597 }
8598 if (src_destroy) {
39236c6e
A
8599 (void) vm_map_remove(
8600 src_map,
8601 vm_map_trunc_page(src_addr,
8602 VM_MAP_PAGE_MASK(src_map)),
8603 vm_map_round_page(src_addr + len,
8604 VM_MAP_PAGE_MASK(src_map)),
8605 (VM_MAP_REMOVE_INTERRUPTIBLE |
8606 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
39037602 8607 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
1c79356b
A
8608 }
8609 *copy_result = copy;
8610 return KERN_SUCCESS;
8611}
8612
8613/*
91447636 8614 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
8615 *
8616 * Description:
8617 * Copy out data from a kernel buffer into space in the
8618 * destination map. The space may be otpionally dynamically
8619 * allocated.
8620 *
8621 * If successful, consumes the copy object.
8622 * Otherwise, the caller is responsible for it.
8623 */
91447636
A
8624static int vm_map_copyout_kernel_buffer_failures = 0;
8625static kern_return_t
1c79356b 8626vm_map_copyout_kernel_buffer(
91447636
A
8627 vm_map_t map,
8628 vm_map_address_t *addr, /* IN/OUT */
8629 vm_map_copy_t copy,
39037602 8630 vm_map_size_t copy_size,
39236c6e
A
8631 boolean_t overwrite,
8632 boolean_t consume_on_success)
1c79356b
A
8633{
8634 kern_return_t kr = KERN_SUCCESS;
91447636 8635 thread_t thread = current_thread();
1c79356b 8636
39037602
A
8637 assert(copy->size == copy_size);
8638
3e170ce0
A
8639 /*
8640 * check for corrupted vm_map_copy structure
8641 */
39037602 8642 if (copy_size > msg_ool_size_small || copy->offset)
3e170ce0
A
8643 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8644 (long long)copy->size, (long long)copy->offset);
8645
1c79356b
A
8646 if (!overwrite) {
8647
8648 /*
8649 * Allocate space in the target map for the data
8650 */
8651 *addr = 0;
8652 kr = vm_map_enter(map,
8653 addr,
39037602 8654 vm_map_round_page(copy_size,
39236c6e 8655 VM_MAP_PAGE_MASK(map)),
91447636
A
8656 (vm_map_offset_t) 0,
8657 VM_FLAGS_ANYWHERE,
1c79356b
A
8658 VM_OBJECT_NULL,
8659 (vm_object_offset_t) 0,
8660 FALSE,
8661 VM_PROT_DEFAULT,
8662 VM_PROT_ALL,
8663 VM_INHERIT_DEFAULT);
8664 if (kr != KERN_SUCCESS)
91447636 8665 return kr;
1c79356b
A
8666 }
8667
8668 /*
8669 * Copyout the data from the kernel buffer to the target map.
8670 */
91447636 8671 if (thread->map == map) {
1c79356b
A
8672
8673 /*
8674 * If the target map is the current map, just do
8675 * the copy.
8676 */
39037602
A
8677 assert((vm_size_t)copy_size == copy_size);
8678 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 8679 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8680 }
8681 }
8682 else {
8683 vm_map_t oldmap;
8684
8685 /*
8686 * If the target map is another map, assume the
8687 * target's address space identity for the duration
8688 * of the copy.
8689 */
8690 vm_map_reference(map);
8691 oldmap = vm_map_switch(map);
8692
39037602
A
8693 assert((vm_size_t)copy_size == copy_size);
8694 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
8695 vm_map_copyout_kernel_buffer_failures++;
8696 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8697 }
8698
8699 (void) vm_map_switch(oldmap);
8700 vm_map_deallocate(map);
8701 }
8702
91447636
A
8703 if (kr != KERN_SUCCESS) {
8704 /* the copy failed, clean up */
8705 if (!overwrite) {
8706 /*
8707 * Deallocate the space we allocated in the target map.
8708 */
39236c6e
A
8709 (void) vm_map_remove(
8710 map,
8711 vm_map_trunc_page(*addr,
8712 VM_MAP_PAGE_MASK(map)),
8713 vm_map_round_page((*addr +
39037602 8714 vm_map_round_page(copy_size,
39236c6e
A
8715 VM_MAP_PAGE_MASK(map))),
8716 VM_MAP_PAGE_MASK(map)),
8717 VM_MAP_NO_FLAGS);
91447636
A
8718 *addr = 0;
8719 }
8720 } else {
8721 /* copy was successful, dicard the copy structure */
39236c6e 8722 if (consume_on_success) {
39037602 8723 kfree(copy, copy_size + cpy_kdata_hdr_sz);
39236c6e 8724 }
91447636 8725 }
1c79356b 8726
91447636 8727 return kr;
1c79356b
A
8728}
8729
8730/*
8731 * Macro: vm_map_copy_insert
8732 *
8733 * Description:
8734 * Link a copy chain ("copy") into a map at the
8735 * specified location (after "where").
8736 * Side effects:
8737 * The copy chain is destroyed.
8738 * Warning:
8739 * The arguments are evaluated multiple times.
8740 */
8741#define vm_map_copy_insert(map, where, copy) \
8742MACRO_BEGIN \
6d2010ae
A
8743 vm_map_store_copy_insert(map, where, copy); \
8744 zfree(vm_map_copy_zone, copy); \
1c79356b
A
8745MACRO_END
8746
39236c6e
A
8747void
8748vm_map_copy_remap(
8749 vm_map_t map,
8750 vm_map_entry_t where,
8751 vm_map_copy_t copy,
8752 vm_map_offset_t adjustment,
8753 vm_prot_t cur_prot,
8754 vm_prot_t max_prot,
8755 vm_inherit_t inheritance)
8756{
8757 vm_map_entry_t copy_entry, new_entry;
8758
8759 for (copy_entry = vm_map_copy_first_entry(copy);
8760 copy_entry != vm_map_copy_to_entry(copy);
8761 copy_entry = copy_entry->vme_next) {
8762 /* get a new VM map entry for the map */
8763 new_entry = vm_map_entry_create(map,
8764 !map->hdr.entries_pageable);
8765 /* copy the "copy entry" to the new entry */
8766 vm_map_entry_copy(new_entry, copy_entry);
8767 /* adjust "start" and "end" */
8768 new_entry->vme_start += adjustment;
8769 new_entry->vme_end += adjustment;
8770 /* clear some attributes */
8771 new_entry->inheritance = inheritance;
8772 new_entry->protection = cur_prot;
8773 new_entry->max_protection = max_prot;
8774 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8775 /* take an extra reference on the entry's "object" */
8776 if (new_entry->is_sub_map) {
fe8ab488 8777 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
8778 vm_map_lock(VME_SUBMAP(new_entry));
8779 vm_map_reference(VME_SUBMAP(new_entry));
8780 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 8781 } else {
3e170ce0 8782 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
8783 }
8784 /* insert the new entry in the map */
8785 vm_map_store_entry_link(map, where, new_entry);
8786 /* continue inserting the "copy entries" after the new entry */
8787 where = new_entry;
8788 }
8789}
8790
2dced7af 8791
39037602
A
8792/*
8793 * Returns true if *size matches (or is in the range of) copy->size.
8794 * Upon returning true, the *size field is updated with the actual size of the
8795 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
8796 */
2dced7af
A
8797boolean_t
8798vm_map_copy_validate_size(
8799 vm_map_t dst_map,
8800 vm_map_copy_t copy,
39037602 8801 vm_map_size_t *size)
2dced7af
A
8802{
8803 if (copy == VM_MAP_COPY_NULL)
8804 return FALSE;
39037602
A
8805 vm_map_size_t copy_sz = copy->size;
8806 vm_map_size_t sz = *size;
2dced7af
A
8807 switch (copy->type) {
8808 case VM_MAP_COPY_OBJECT:
8809 case VM_MAP_COPY_KERNEL_BUFFER:
39037602 8810 if (sz == copy_sz)
2dced7af
A
8811 return TRUE;
8812 break;
8813 case VM_MAP_COPY_ENTRY_LIST:
8814 /*
8815 * potential page-size rounding prevents us from exactly
8816 * validating this flavor of vm_map_copy, but we can at least
8817 * assert that it's within a range.
8818 */
39037602
A
8819 if (copy_sz >= sz &&
8820 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
8821 *size = copy_sz;
2dced7af 8822 return TRUE;
39037602 8823 }
2dced7af
A
8824 break;
8825 default:
8826 break;
8827 }
8828 return FALSE;
8829}
8830
39037602
A
8831/*
8832 * Routine: vm_map_copyout_size
8833 *
8834 * Description:
8835 * Copy out a copy chain ("copy") into newly-allocated
8836 * space in the destination map. Uses a prevalidated
8837 * size for the copy object (vm_map_copy_validate_size).
8838 *
8839 * If successful, consumes the copy object.
8840 * Otherwise, the caller is responsible for it.
8841 */
8842kern_return_t
8843vm_map_copyout_size(
8844 vm_map_t dst_map,
8845 vm_map_address_t *dst_addr, /* OUT */
8846 vm_map_copy_t copy,
8847 vm_map_size_t copy_size)
8848{
8849 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
8850 TRUE, /* consume_on_success */
8851 VM_PROT_DEFAULT,
8852 VM_PROT_ALL,
8853 VM_INHERIT_DEFAULT);
8854}
2dced7af 8855
1c79356b
A
8856/*
8857 * Routine: vm_map_copyout
8858 *
8859 * Description:
8860 * Copy out a copy chain ("copy") into newly-allocated
8861 * space in the destination map.
8862 *
8863 * If successful, consumes the copy object.
8864 * Otherwise, the caller is responsible for it.
8865 */
8866kern_return_t
8867vm_map_copyout(
91447636
A
8868 vm_map_t dst_map,
8869 vm_map_address_t *dst_addr, /* OUT */
8870 vm_map_copy_t copy)
39236c6e 8871{
39037602
A
8872 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
8873 TRUE, /* consume_on_success */
8874 VM_PROT_DEFAULT,
8875 VM_PROT_ALL,
8876 VM_INHERIT_DEFAULT);
39236c6e
A
8877}
8878
8879kern_return_t
8880vm_map_copyout_internal(
8881 vm_map_t dst_map,
8882 vm_map_address_t *dst_addr, /* OUT */
8883 vm_map_copy_t copy,
39037602 8884 vm_map_size_t copy_size,
39236c6e
A
8885 boolean_t consume_on_success,
8886 vm_prot_t cur_protection,
8887 vm_prot_t max_protection,
8888 vm_inherit_t inheritance)
1c79356b 8889{
91447636
A
8890 vm_map_size_t size;
8891 vm_map_size_t adjustment;
8892 vm_map_offset_t start;
1c79356b
A
8893 vm_object_offset_t vm_copy_start;
8894 vm_map_entry_t last;
1c79356b 8895 vm_map_entry_t entry;
3e170ce0 8896 vm_map_entry_t hole_entry;
1c79356b
A
8897
8898 /*
8899 * Check for null copy object.
8900 */
8901
8902 if (copy == VM_MAP_COPY_NULL) {
8903 *dst_addr = 0;
8904 return(KERN_SUCCESS);
8905 }
8906
39037602
A
8907 if (copy->size != copy_size) {
8908 *dst_addr = 0;
8909 return KERN_FAILURE;
8910 }
8911
1c79356b
A
8912 /*
8913 * Check for special copy object, created
8914 * by vm_map_copyin_object.
8915 */
8916
8917 if (copy->type == VM_MAP_COPY_OBJECT) {
8918 vm_object_t object = copy->cpy_object;
8919 kern_return_t kr;
8920 vm_object_offset_t offset;
8921
91447636 8922 offset = vm_object_trunc_page(copy->offset);
39037602 8923 size = vm_map_round_page((copy_size +
39236c6e
A
8924 (vm_map_size_t)(copy->offset -
8925 offset)),
8926 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8927 *dst_addr = 0;
8928 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 8929 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
8930 object, offset, FALSE,
8931 VM_PROT_DEFAULT, VM_PROT_ALL,
8932 VM_INHERIT_DEFAULT);
8933 if (kr != KERN_SUCCESS)
8934 return(kr);
8935 /* Account for non-pagealigned copy object */
91447636 8936 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
39236c6e
A
8937 if (consume_on_success)
8938 zfree(vm_map_copy_zone, copy);
1c79356b
A
8939 return(KERN_SUCCESS);
8940 }
8941
8942 /*
8943 * Check for special kernel buffer allocated
8944 * by new_ipc_kmsg_copyin.
8945 */
8946
8947 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602
A
8948 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8949 copy, copy_size, FALSE,
39236c6e 8950 consume_on_success);
1c79356b
A
8951 }
8952
39236c6e 8953
1c79356b
A
8954 /*
8955 * Find space for the data
8956 */
8957
39236c6e
A
8958 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8959 VM_MAP_COPY_PAGE_MASK(copy));
39037602 8960 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
39236c6e 8961 VM_MAP_COPY_PAGE_MASK(copy))
2d21ac55 8962 - vm_copy_start;
1c79356b 8963
39236c6e 8964
2d21ac55 8965StartAgain: ;
1c79356b
A
8966
8967 vm_map_lock(dst_map);
6d2010ae
A
8968 if( dst_map->disable_vmentry_reuse == TRUE) {
8969 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8970 last = entry;
8971 } else {
3e170ce0
A
8972 if (dst_map->holelistenabled) {
8973 hole_entry = (vm_map_entry_t)dst_map->holes_list;
8974
8975 if (hole_entry == NULL) {
8976 /*
8977 * No more space in the map?
8978 */
8979 vm_map_unlock(dst_map);
8980 return(KERN_NO_SPACE);
8981 }
8982
8983 last = hole_entry;
8984 start = last->vme_start;
8985 } else {
8986 assert(first_free_is_valid(dst_map));
8987 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8988 vm_map_min(dst_map) : last->vme_end;
8989 }
39236c6e
A
8990 start = vm_map_round_page(start,
8991 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 8992 }
1c79356b
A
8993
8994 while (TRUE) {
8995 vm_map_entry_t next = last->vme_next;
91447636 8996 vm_map_offset_t end = start + size;
1c79356b
A
8997
8998 if ((end > dst_map->max_offset) || (end < start)) {
8999 if (dst_map->wait_for_space) {
9000 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
9001 assert_wait((event_t) dst_map,
9002 THREAD_INTERRUPTIBLE);
9003 vm_map_unlock(dst_map);
91447636 9004 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
9005 goto StartAgain;
9006 }
9007 }
9008 vm_map_unlock(dst_map);
9009 return(KERN_NO_SPACE);
9010 }
9011
3e170ce0
A
9012 if (dst_map->holelistenabled) {
9013 if (last->vme_end >= end)
9014 break;
9015 } else {
9016 /*
9017 * If there are no more entries, we must win.
9018 *
9019 * OR
9020 *
9021 * If there is another entry, it must be
9022 * after the end of the potential new region.
9023 */
9024
9025 if (next == vm_map_to_entry(dst_map))
9026 break;
9027
9028 if (next->vme_start >= end)
9029 break;
9030 }
1c79356b
A
9031
9032 last = next;
3e170ce0
A
9033
9034 if (dst_map->holelistenabled) {
9035 if (last == (vm_map_entry_t) dst_map->holes_list) {
9036 /*
9037 * Wrapped around
9038 */
9039 vm_map_unlock(dst_map);
9040 return(KERN_NO_SPACE);
9041 }
9042 start = last->vme_start;
9043 } else {
9044 start = last->vme_end;
9045 }
39236c6e
A
9046 start = vm_map_round_page(start,
9047 VM_MAP_PAGE_MASK(dst_map));
9048 }
9049
3e170ce0
A
9050 if (dst_map->holelistenabled) {
9051 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
9052 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
9053 }
9054 }
9055
9056
39236c6e
A
9057 adjustment = start - vm_copy_start;
9058 if (! consume_on_success) {
9059 /*
9060 * We're not allowed to consume "copy", so we'll have to
9061 * copy its map entries into the destination map below.
9062 * No need to re-allocate map entries from the correct
9063 * (pageable or not) zone, since we'll get new map entries
9064 * during the transfer.
9065 * We'll also adjust the map entries's "start" and "end"
9066 * during the transfer, to keep "copy"'s entries consistent
9067 * with its "offset".
9068 */
9069 goto after_adjustments;
1c79356b
A
9070 }
9071
9072 /*
9073 * Since we're going to just drop the map
9074 * entries from the copy into the destination
9075 * map, they must come from the same pool.
9076 */
9077
9078 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
9079 /*
9080 * Mismatches occur when dealing with the default
9081 * pager.
9082 */
9083 zone_t old_zone;
9084 vm_map_entry_t next, new;
9085
9086 /*
9087 * Find the zone that the copies were allocated from
9088 */
7ddcb079 9089
2d21ac55
A
9090 entry = vm_map_copy_first_entry(copy);
9091
9092 /*
9093 * Reinitialize the copy so that vm_map_copy_entry_link
9094 * will work.
9095 */
6d2010ae 9096 vm_map_store_copy_reset(copy, entry);
2d21ac55 9097 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
9098
9099 /*
9100 * Copy each entry.
9101 */
9102 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 9103 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 9104 vm_map_entry_copy_full(new, entry);
fe8ab488
A
9105 assert(!new->iokit_acct);
9106 if (new->is_sub_map) {
9107 /* clr address space specifics */
9108 new->use_pmap = FALSE;
9109 }
2d21ac55
A
9110 vm_map_copy_entry_link(copy,
9111 vm_map_copy_last_entry(copy),
9112 new);
9113 next = entry->vme_next;
7ddcb079 9114 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
9115 zfree(old_zone, entry);
9116 entry = next;
9117 }
1c79356b
A
9118 }
9119
9120 /*
9121 * Adjust the addresses in the copy chain, and
9122 * reset the region attributes.
9123 */
9124
1c79356b
A
9125 for (entry = vm_map_copy_first_entry(copy);
9126 entry != vm_map_copy_to_entry(copy);
9127 entry = entry->vme_next) {
39236c6e
A
9128 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
9129 /*
9130 * We're injecting this copy entry into a map that
9131 * has the standard page alignment, so clear
9132 * "map_aligned" (which might have been inherited
9133 * from the original map entry).
9134 */
9135 entry->map_aligned = FALSE;
9136 }
9137
1c79356b
A
9138 entry->vme_start += adjustment;
9139 entry->vme_end += adjustment;
9140
39236c6e
A
9141 if (entry->map_aligned) {
9142 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
9143 VM_MAP_PAGE_MASK(dst_map)));
9144 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
9145 VM_MAP_PAGE_MASK(dst_map)));
9146 }
9147
1c79356b
A
9148 entry->inheritance = VM_INHERIT_DEFAULT;
9149 entry->protection = VM_PROT_DEFAULT;
9150 entry->max_protection = VM_PROT_ALL;
9151 entry->behavior = VM_BEHAVIOR_DEFAULT;
9152
9153 /*
9154 * If the entry is now wired,
9155 * map the pages into the destination map.
9156 */
9157 if (entry->wired_count != 0) {
39037602 9158 vm_map_offset_t va;
2d21ac55 9159 vm_object_offset_t offset;
39037602 9160 vm_object_t object;
2d21ac55
A
9161 vm_prot_t prot;
9162 int type_of_fault;
1c79356b 9163
3e170ce0
A
9164 object = VME_OBJECT(entry);
9165 offset = VME_OFFSET(entry);
2d21ac55 9166 va = entry->vme_start;
1c79356b 9167
2d21ac55
A
9168 pmap_pageable(dst_map->pmap,
9169 entry->vme_start,
9170 entry->vme_end,
9171 TRUE);
1c79356b 9172
2d21ac55 9173 while (va < entry->vme_end) {
39037602 9174 vm_page_t m;
1c79356b 9175
2d21ac55
A
9176 /*
9177 * Look up the page in the object.
9178 * Assert that the page will be found in the
9179 * top object:
9180 * either
9181 * the object was newly created by
9182 * vm_object_copy_slowly, and has
9183 * copies of all of the pages from
9184 * the source object
9185 * or
9186 * the object was moved from the old
9187 * map entry; because the old map
9188 * entry was wired, all of the pages
9189 * were in the top-level object.
9190 * (XXX not true if we wire pages for
9191 * reading)
9192 */
9193 vm_object_lock(object);
91447636 9194
2d21ac55 9195 m = vm_page_lookup(object, offset);
b0d623f7 9196 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
9197 m->absent)
9198 panic("vm_map_copyout: wiring %p", m);
1c79356b 9199
2d21ac55
A
9200 /*
9201 * ENCRYPTED SWAP:
9202 * The page is assumed to be wired here, so it
9203 * shouldn't be encrypted. Otherwise, we
9204 * couldn't enter it in the page table, since
9205 * we don't want the user to see the encrypted
9206 * data.
9207 */
9208 ASSERT_PAGE_DECRYPTED(m);
1c79356b 9209
2d21ac55 9210 prot = entry->protection;
1c79356b 9211
3e170ce0
A
9212 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9213 prot)
2d21ac55 9214 prot |= VM_PROT_EXECUTE;
1c79356b 9215
2d21ac55 9216 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 9217
6d2010ae 9218 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
fe8ab488 9219 VM_PAGE_WIRED(m), FALSE, FALSE,
3e170ce0 9220 FALSE, VME_ALIAS(entry),
fe8ab488
A
9221 ((entry->iokit_acct ||
9222 (!entry->is_sub_map &&
9223 !entry->use_pmap))
9224 ? PMAP_OPTIONS_ALT_ACCT
9225 : 0),
9226 NULL, &type_of_fault);
1c79356b 9227
2d21ac55 9228 vm_object_unlock(object);
1c79356b 9229
2d21ac55
A
9230 offset += PAGE_SIZE_64;
9231 va += PAGE_SIZE;
1c79356b
A
9232 }
9233 }
9234 }
9235
39236c6e
A
9236after_adjustments:
9237
1c79356b
A
9238 /*
9239 * Correct the page alignment for the result
9240 */
9241
9242 *dst_addr = start + (copy->offset - vm_copy_start);
9243
9244 /*
9245 * Update the hints and the map size
9246 */
9247
39236c6e
A
9248 if (consume_on_success) {
9249 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9250 } else {
9251 SAVE_HINT_MAP_WRITE(dst_map, last);
9252 }
1c79356b
A
9253
9254 dst_map->size += size;
9255
9256 /*
9257 * Link in the copy
9258 */
9259
39236c6e
A
9260 if (consume_on_success) {
9261 vm_map_copy_insert(dst_map, last, copy);
9262 } else {
9263 vm_map_copy_remap(dst_map, last, copy, adjustment,
9264 cur_protection, max_protection,
9265 inheritance);
9266 }
1c79356b
A
9267
9268 vm_map_unlock(dst_map);
9269
9270 /*
9271 * XXX If wiring_required, call vm_map_pageable
9272 */
9273
9274 return(KERN_SUCCESS);
9275}
9276
1c79356b
A
9277/*
9278 * Routine: vm_map_copyin
9279 *
9280 * Description:
2d21ac55
A
9281 * see vm_map_copyin_common. Exported via Unsupported.exports.
9282 *
9283 */
9284
9285#undef vm_map_copyin
9286
9287kern_return_t
9288vm_map_copyin(
9289 vm_map_t src_map,
9290 vm_map_address_t src_addr,
9291 vm_map_size_t len,
9292 boolean_t src_destroy,
9293 vm_map_copy_t *copy_result) /* OUT */
9294{
9295 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9296 FALSE, copy_result, FALSE));
9297}
9298
9299/*
9300 * Routine: vm_map_copyin_common
9301 *
9302 * Description:
1c79356b
A
9303 * Copy the specified region (src_addr, len) from the
9304 * source address space (src_map), possibly removing
9305 * the region from the source address space (src_destroy).
9306 *
9307 * Returns:
9308 * A vm_map_copy_t object (copy_result), suitable for
9309 * insertion into another address space (using vm_map_copyout),
9310 * copying over another address space region (using
9311 * vm_map_copy_overwrite). If the copy is unused, it
9312 * should be destroyed (using vm_map_copy_discard).
9313 *
9314 * In/out conditions:
9315 * The source map should not be locked on entry.
9316 */
9317
9318typedef struct submap_map {
9319 vm_map_t parent_map;
91447636
A
9320 vm_map_offset_t base_start;
9321 vm_map_offset_t base_end;
2d21ac55 9322 vm_map_size_t base_len;
1c79356b
A
9323 struct submap_map *next;
9324} submap_map_t;
9325
9326kern_return_t
9327vm_map_copyin_common(
9328 vm_map_t src_map,
91447636
A
9329 vm_map_address_t src_addr,
9330 vm_map_size_t len,
1c79356b 9331 boolean_t src_destroy,
91447636 9332 __unused boolean_t src_volatile,
1c79356b
A
9333 vm_map_copy_t *copy_result, /* OUT */
9334 boolean_t use_maxprot)
4bd07ac2
A
9335{
9336 int flags;
9337
9338 flags = 0;
9339 if (src_destroy) {
9340 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9341 }
9342 if (use_maxprot) {
9343 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9344 }
9345 return vm_map_copyin_internal(src_map,
9346 src_addr,
9347 len,
9348 flags,
9349 copy_result);
9350}
9351kern_return_t
9352vm_map_copyin_internal(
9353 vm_map_t src_map,
9354 vm_map_address_t src_addr,
9355 vm_map_size_t len,
9356 int flags,
9357 vm_map_copy_t *copy_result) /* OUT */
1c79356b 9358{
1c79356b
A
9359 vm_map_entry_t tmp_entry; /* Result of last map lookup --
9360 * in multi-level lookup, this
9361 * entry contains the actual
9362 * vm_object/offset.
9363 */
1c79356b
A
9364 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
9365
91447636 9366 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
9367 * where copy is taking place now
9368 */
91447636 9369 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 9370 * copied */
2d21ac55 9371 vm_map_offset_t src_base;
91447636 9372 vm_map_t base_map = src_map;
1c79356b
A
9373 boolean_t map_share=FALSE;
9374 submap_map_t *parent_maps = NULL;
9375
1c79356b 9376 vm_map_copy_t copy; /* Resulting copy */
fe8ab488
A
9377 vm_map_address_t copy_addr;
9378 vm_map_size_t copy_size;
4bd07ac2
A
9379 boolean_t src_destroy;
9380 boolean_t use_maxprot;
39037602 9381 boolean_t preserve_purgeable;
4bd07ac2
A
9382
9383 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9384 return KERN_INVALID_ARGUMENT;
9385 }
9386
9387 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9388 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602
A
9389 preserve_purgeable =
9390 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
9391
9392 /*
9393 * Check for copies of zero bytes.
9394 */
9395
9396 if (len == 0) {
9397 *copy_result = VM_MAP_COPY_NULL;
9398 return(KERN_SUCCESS);
9399 }
9400
4a249263
A
9401 /*
9402 * Check that the end address doesn't overflow
9403 */
9404 src_end = src_addr + len;
9405 if (src_end < src_addr)
9406 return KERN_INVALID_ADDRESS;
9407
39037602
A
9408 /*
9409 * Compute (page aligned) start and end of region
9410 */
9411 src_start = vm_map_trunc_page(src_addr,
9412 VM_MAP_PAGE_MASK(src_map));
9413 src_end = vm_map_round_page(src_end,
9414 VM_MAP_PAGE_MASK(src_map));
9415
1c79356b
A
9416 /*
9417 * If the copy is sufficiently small, use a kernel buffer instead
9418 * of making a virtual copy. The theory being that the cost of
9419 * setting up VM (and taking C-O-W faults) dominates the copy costs
9420 * for small regions.
9421 */
4bd07ac2
A
9422 if ((len < msg_ool_size_small) &&
9423 !use_maxprot &&
39037602
A
9424 !preserve_purgeable &&
9425 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
9426 /*
9427 * Since the "msg_ool_size_small" threshold was increased and
9428 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
9429 * address space limits, we revert to doing a virtual copy if the
9430 * copied range goes beyond those limits. Otherwise, mach_vm_read()
9431 * of the commpage would now fail when it used to work.
9432 */
9433 (src_start >= vm_map_min(src_map) &&
9434 src_start < vm_map_max(src_map) &&
9435 src_end >= vm_map_min(src_map) &&
9436 src_end < vm_map_max(src_map)))
2d21ac55
A
9437 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9438 src_destroy, copy_result);
1c79356b 9439
b0d623f7 9440 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 9441
1c79356b
A
9442 /*
9443 * Allocate a header element for the list.
9444 *
9445 * Use the start and end in the header to
9446 * remember the endpoints prior to rounding.
9447 */
9448
9449 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 9450 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 9451 vm_map_copy_first_entry(copy) =
2d21ac55 9452 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
9453 copy->type = VM_MAP_COPY_ENTRY_LIST;
9454 copy->cpy_hdr.nentries = 0;
9455 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
9456#if 00
9457 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9458#else
9459 /*
9460 * The copy entries can be broken down for a variety of reasons,
9461 * so we can't guarantee that they will remain map-aligned...
9462 * Will need to adjust the first copy_entry's "vme_start" and
9463 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9464 * rather than the original map's alignment.
9465 */
9466 copy->cpy_hdr.page_shift = PAGE_SHIFT;
9467#endif
1c79356b 9468
6d2010ae
A
9469 vm_map_store_init( &(copy->cpy_hdr) );
9470
1c79356b
A
9471 copy->offset = src_addr;
9472 copy->size = len;
9473
7ddcb079 9474 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b
A
9475
9476#define RETURN(x) \
9477 MACRO_BEGIN \
9478 vm_map_unlock(src_map); \
9bccf70c
A
9479 if(src_map != base_map) \
9480 vm_map_deallocate(src_map); \
1c79356b
A
9481 if (new_entry != VM_MAP_ENTRY_NULL) \
9482 vm_map_copy_entry_dispose(copy,new_entry); \
9483 vm_map_copy_discard(copy); \
9484 { \
91447636 9485 submap_map_t *_ptr; \
1c79356b 9486 \
91447636 9487 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 9488 parent_maps=parent_maps->next; \
91447636
A
9489 if (_ptr->parent_map != base_map) \
9490 vm_map_deallocate(_ptr->parent_map); \
9491 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
9492 } \
9493 } \
9494 MACRO_RETURN(x); \
9495 MACRO_END
9496
9497 /*
9498 * Find the beginning of the region.
9499 */
9500
9501 vm_map_lock(src_map);
9502
fe8ab488
A
9503 /*
9504 * Lookup the original "src_addr" rather than the truncated
9505 * "src_start", in case "src_start" falls in a non-map-aligned
9506 * map entry *before* the map entry that contains "src_addr"...
9507 */
9508 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
1c79356b
A
9509 RETURN(KERN_INVALID_ADDRESS);
9510 if(!tmp_entry->is_sub_map) {
fe8ab488
A
9511 /*
9512 * ... but clip to the map-rounded "src_start" rather than
9513 * "src_addr" to preserve map-alignment. We'll adjust the
9514 * first copy entry at the end, if needed.
9515 */
1c79356b
A
9516 vm_map_clip_start(src_map, tmp_entry, src_start);
9517 }
fe8ab488
A
9518 if (src_start < tmp_entry->vme_start) {
9519 /*
9520 * Move "src_start" up to the start of the
9521 * first map entry to copy.
9522 */
9523 src_start = tmp_entry->vme_start;
9524 }
1c79356b
A
9525 /* set for later submap fix-up */
9526 copy_addr = src_start;
9527
9528 /*
9529 * Go through entries until we get to the end.
9530 */
9531
9532 while (TRUE) {
1c79356b 9533 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 9534 vm_map_size_t src_size; /* Size of source
1c79356b
A
9535 * map entry (in both
9536 * maps)
9537 */
9538
1c79356b
A
9539 vm_object_t src_object; /* Object to copy */
9540 vm_object_offset_t src_offset;
9541
9542 boolean_t src_needs_copy; /* Should source map
9543 * be made read-only
9544 * for copy-on-write?
9545 */
9546
9547 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
9548
9549 boolean_t was_wired; /* Was source wired? */
9550 vm_map_version_t version; /* Version before locks
9551 * dropped to make copy
9552 */
9553 kern_return_t result; /* Return value from
9554 * copy_strategically.
9555 */
9556 while(tmp_entry->is_sub_map) {
91447636 9557 vm_map_size_t submap_len;
1c79356b
A
9558 submap_map_t *ptr;
9559
9560 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9561 ptr->next = parent_maps;
9562 parent_maps = ptr;
9563 ptr->parent_map = src_map;
9564 ptr->base_start = src_start;
9565 ptr->base_end = src_end;
9566 submap_len = tmp_entry->vme_end - src_start;
9567 if(submap_len > (src_end-src_start))
9568 submap_len = src_end-src_start;
2d21ac55 9569 ptr->base_len = submap_len;
1c79356b
A
9570
9571 src_start -= tmp_entry->vme_start;
3e170ce0 9572 src_start += VME_OFFSET(tmp_entry);
1c79356b 9573 src_end = src_start + submap_len;
3e170ce0 9574 src_map = VME_SUBMAP(tmp_entry);
1c79356b 9575 vm_map_lock(src_map);
9bccf70c
A
9576 /* keep an outstanding reference for all maps in */
9577 /* the parents tree except the base map */
9578 vm_map_reference(src_map);
1c79356b
A
9579 vm_map_unlock(ptr->parent_map);
9580 if (!vm_map_lookup_entry(
2d21ac55 9581 src_map, src_start, &tmp_entry))
1c79356b
A
9582 RETURN(KERN_INVALID_ADDRESS);
9583 map_share = TRUE;
9584 if(!tmp_entry->is_sub_map)
2d21ac55 9585 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
9586 src_entry = tmp_entry;
9587 }
2d21ac55
A
9588 /* we are now in the lowest level submap... */
9589
3e170ce0
A
9590 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9591 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
9592 /* This is not, supported for now.In future */
9593 /* we will need to detect the phys_contig */
9594 /* condition and then upgrade copy_slowly */
9595 /* to do physical copy from the device mem */
9596 /* based object. We can piggy-back off of */
9597 /* the was wired boolean to set-up the */
9598 /* proper handling */
0b4e3aa0
A
9599 RETURN(KERN_PROTECTION_FAILURE);
9600 }
1c79356b
A
9601 /*
9602 * Create a new address map entry to hold the result.
9603 * Fill in the fields from the appropriate source entries.
9604 * We must unlock the source map to do this if we need
9605 * to allocate a map entry.
9606 */
9607 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
9608 version.main_timestamp = src_map->timestamp;
9609 vm_map_unlock(src_map);
1c79356b 9610
7ddcb079 9611 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 9612
2d21ac55
A
9613 vm_map_lock(src_map);
9614 if ((version.main_timestamp + 1) != src_map->timestamp) {
9615 if (!vm_map_lookup_entry(src_map, src_start,
9616 &tmp_entry)) {
9617 RETURN(KERN_INVALID_ADDRESS);
9618 }
9619 if (!tmp_entry->is_sub_map)
9620 vm_map_clip_start(src_map, tmp_entry, src_start);
9621 continue; /* restart w/ new tmp_entry */
1c79356b 9622 }
1c79356b
A
9623 }
9624
9625 /*
9626 * Verify that the region can be read.
9627 */
9628 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 9629 !use_maxprot) ||
1c79356b
A
9630 (src_entry->max_protection & VM_PROT_READ) == 0)
9631 RETURN(KERN_PROTECTION_FAILURE);
9632
9633 /*
9634 * Clip against the endpoints of the entire region.
9635 */
9636
9637 vm_map_clip_end(src_map, src_entry, src_end);
9638
9639 src_size = src_entry->vme_end - src_start;
3e170ce0
A
9640 src_object = VME_OBJECT(src_entry);
9641 src_offset = VME_OFFSET(src_entry);
1c79356b
A
9642 was_wired = (src_entry->wired_count != 0);
9643
9644 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
9645 if (new_entry->is_sub_map) {
9646 /* clr address space specifics */
9647 new_entry->use_pmap = FALSE;
9648 }
1c79356b
A
9649
9650 /*
9651 * Attempt non-blocking copy-on-write optimizations.
9652 */
9653
9654 if (src_destroy &&
9655 (src_object == VM_OBJECT_NULL ||
2d21ac55
A
9656 (src_object->internal && !src_object->true_share
9657 && !map_share))) {
9658 /*
9659 * If we are destroying the source, and the object
9660 * is internal, we can move the object reference
9661 * from the source to the copy. The copy is
9662 * copy-on-write only if the source is.
9663 * We make another reference to the object, because
9664 * destroying the source entry will deallocate it.
9665 */
9666 vm_object_reference(src_object);
1c79356b 9667
2d21ac55
A
9668 /*
9669 * Copy is always unwired. vm_map_copy_entry
9670 * set its wired count to zero.
9671 */
1c79356b 9672
2d21ac55 9673 goto CopySuccessful;
1c79356b
A
9674 }
9675
9676
2d21ac55 9677 RestartCopy:
1c79356b 9678 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
3e170ce0 9679 src_object, new_entry, VME_OBJECT(new_entry),
1c79356b 9680 was_wired, 0);
55e303ae 9681 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
9682 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9683 vm_object_copy_quickly(
3e170ce0 9684 &VME_OBJECT(new_entry),
2d21ac55
A
9685 src_offset,
9686 src_size,
9687 &src_needs_copy,
9688 &new_entry_needs_copy)) {
1c79356b
A
9689
9690 new_entry->needs_copy = new_entry_needs_copy;
9691
9692 /*
9693 * Handle copy-on-write obligations
9694 */
9695
9696 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
9697 vm_prot_t prot;
9698
9699 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 9700
3e170ce0
A
9701 if (override_nx(src_map, VME_ALIAS(src_entry))
9702 && prot)
0c530ab8 9703 prot |= VM_PROT_EXECUTE;
2d21ac55 9704
55e303ae
A
9705 vm_object_pmap_protect(
9706 src_object,
9707 src_offset,
9708 src_size,
9709 (src_entry->is_shared ?
2d21ac55
A
9710 PMAP_NULL
9711 : src_map->pmap),
55e303ae 9712 src_entry->vme_start,
0c530ab8
A
9713 prot);
9714
3e170ce0 9715 assert(tmp_entry->wired_count == 0);
55e303ae 9716 tmp_entry->needs_copy = TRUE;
1c79356b
A
9717 }
9718
9719 /*
9720 * The map has never been unlocked, so it's safe
9721 * to move to the next entry rather than doing
9722 * another lookup.
9723 */
9724
9725 goto CopySuccessful;
9726 }
9727
1c79356b
A
9728 /*
9729 * Take an object reference, so that we may
9730 * release the map lock(s).
9731 */
9732
9733 assert(src_object != VM_OBJECT_NULL);
9734 vm_object_reference(src_object);
9735
9736 /*
9737 * Record the timestamp for later verification.
9738 * Unlock the map.
9739 */
9740
9741 version.main_timestamp = src_map->timestamp;
9bccf70c 9742 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
9743
9744 /*
9745 * Perform the copy
9746 */
9747
9748 if (was_wired) {
55e303ae 9749 CopySlowly:
1c79356b
A
9750 vm_object_lock(src_object);
9751 result = vm_object_copy_slowly(
2d21ac55
A
9752 src_object,
9753 src_offset,
9754 src_size,
9755 THREAD_UNINT,
3e170ce0
A
9756 &VME_OBJECT(new_entry));
9757 VME_OFFSET_SET(new_entry, 0);
1c79356b 9758 new_entry->needs_copy = FALSE;
55e303ae
A
9759
9760 }
9761 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
2d21ac55 9762 (tmp_entry->is_shared || map_share)) {
55e303ae
A
9763 vm_object_t new_object;
9764
2d21ac55 9765 vm_object_lock_shared(src_object);
55e303ae 9766 new_object = vm_object_copy_delayed(
2d21ac55
A
9767 src_object,
9768 src_offset,
9769 src_size,
9770 TRUE);
55e303ae
A
9771 if (new_object == VM_OBJECT_NULL)
9772 goto CopySlowly;
9773
3e170ce0
A
9774 VME_OBJECT_SET(new_entry, new_object);
9775 assert(new_entry->wired_count == 0);
55e303ae 9776 new_entry->needs_copy = TRUE;
fe8ab488
A
9777 assert(!new_entry->iokit_acct);
9778 assert(new_object->purgable == VM_PURGABLE_DENY);
9779 new_entry->use_pmap = TRUE;
55e303ae
A
9780 result = KERN_SUCCESS;
9781
1c79356b 9782 } else {
3e170ce0
A
9783 vm_object_offset_t new_offset;
9784 new_offset = VME_OFFSET(new_entry);
1c79356b 9785 result = vm_object_copy_strategically(src_object,
2d21ac55
A
9786 src_offset,
9787 src_size,
3e170ce0
A
9788 &VME_OBJECT(new_entry),
9789 &new_offset,
2d21ac55 9790 &new_entry_needs_copy);
3e170ce0
A
9791 if (new_offset != VME_OFFSET(new_entry)) {
9792 VME_OFFSET_SET(new_entry, new_offset);
9793 }
1c79356b
A
9794
9795 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
9796 }
9797
39037602
A
9798 if (result == KERN_SUCCESS &&
9799 preserve_purgeable &&
9800 src_object->purgable != VM_PURGABLE_DENY) {
9801 vm_object_t new_object;
9802
9803 new_object = VME_OBJECT(new_entry);
9804 assert(new_object != src_object);
9805 vm_object_lock(new_object);
9806 assert(new_object->ref_count == 1);
9807 assert(new_object->shadow == VM_OBJECT_NULL);
9808 assert(new_object->copy == VM_OBJECT_NULL);
9809 assert(new_object->vo_purgeable_owner == NULL);
9810
9811 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
9812 new_object->true_share = TRUE;
9813 /* start as non-volatile with no owner... */
9814 new_object->purgable = VM_PURGABLE_NONVOLATILE;
9815 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
9816 /* ... and move to src_object's purgeable state */
9817 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
9818 int state;
9819 state = src_object->purgable;
9820 vm_object_purgable_control(
9821 new_object,
9822 VM_PURGABLE_SET_STATE,
9823 &state);
9824 }
9825 vm_object_unlock(new_object);
9826 new_object = VM_OBJECT_NULL;
9827 }
9828
1c79356b
A
9829 if (result != KERN_SUCCESS &&
9830 result != KERN_MEMORY_RESTART_COPY) {
9831 vm_map_lock(src_map);
9832 RETURN(result);
9833 }
9834
9835 /*
9836 * Throw away the extra reference
9837 */
9838
9839 vm_object_deallocate(src_object);
9840
9841 /*
9842 * Verify that the map has not substantially
9843 * changed while the copy was being made.
9844 */
9845
9bccf70c 9846 vm_map_lock(src_map);
1c79356b
A
9847
9848 if ((version.main_timestamp + 1) == src_map->timestamp)
9849 goto VerificationSuccessful;
9850
9851 /*
9852 * Simple version comparison failed.
9853 *
9854 * Retry the lookup and verify that the
9855 * same object/offset are still present.
9856 *
9857 * [Note: a memory manager that colludes with
9858 * the calling task can detect that we have
9859 * cheated. While the map was unlocked, the
9860 * mapping could have been changed and restored.]
9861 */
9862
9863 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 9864 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
9865 vm_object_deallocate(VME_OBJECT(new_entry));
9866 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
fe8ab488
A
9867 assert(!new_entry->iokit_acct);
9868 new_entry->use_pmap = TRUE;
9869 }
1c79356b
A
9870 RETURN(KERN_INVALID_ADDRESS);
9871 }
9872
9873 src_entry = tmp_entry;
9874 vm_map_clip_start(src_map, src_entry, src_start);
9875
91447636
A
9876 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9877 !use_maxprot) ||
9878 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
9879 goto VerificationFailed;
9880
39236c6e 9881 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
9882 /*
9883 * This entry might have been shortened
9884 * (vm_map_clip_end) or been replaced with
9885 * an entry that ends closer to "src_start"
9886 * than before.
9887 * Adjust "new_entry" accordingly; copying
9888 * less memory would be correct but we also
9889 * redo the copy (see below) if the new entry
9890 * no longer points at the same object/offset.
9891 */
39236c6e
A
9892 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9893 VM_MAP_COPY_PAGE_MASK(copy)));
9894 new_entry->vme_end = src_entry->vme_end;
9895 src_size = new_entry->vme_end - src_start;
39037602
A
9896 } else if (src_entry->vme_end > new_entry->vme_end) {
9897 /*
9898 * This entry might have been extended
9899 * (vm_map_entry_simplify() or coalesce)
9900 * or been replaced with an entry that ends farther
9901 * from "src_start" than before.
9902 *
9903 * We've called vm_object_copy_*() only on
9904 * the previous <start:end> range, so we can't
9905 * just extend new_entry. We have to re-do
9906 * the copy based on the new entry as if it was
9907 * pointing at a different object/offset (see
9908 * "Verification failed" below).
9909 */
39236c6e 9910 }
1c79356b 9911
3e170ce0 9912 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
9913 (VME_OFFSET(src_entry) != src_offset) ||
9914 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
9915
9916 /*
9917 * Verification failed.
9918 *
9919 * Start over with this top-level entry.
9920 */
9921
2d21ac55 9922 VerificationFailed: ;
1c79356b 9923
3e170ce0 9924 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
9925 tmp_entry = src_entry;
9926 continue;
9927 }
9928
9929 /*
9930 * Verification succeeded.
9931 */
9932
2d21ac55 9933 VerificationSuccessful: ;
1c79356b
A
9934
9935 if (result == KERN_MEMORY_RESTART_COPY)
9936 goto RestartCopy;
9937
9938 /*
9939 * Copy succeeded.
9940 */
9941
2d21ac55 9942 CopySuccessful: ;
1c79356b
A
9943
9944 /*
9945 * Link in the new copy entry.
9946 */
9947
9948 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9949 new_entry);
9950
9951 /*
9952 * Determine whether the entire region
9953 * has been copied.
9954 */
2d21ac55 9955 src_base = src_start;
1c79356b
A
9956 src_start = new_entry->vme_end;
9957 new_entry = VM_MAP_ENTRY_NULL;
9958 while ((src_start >= src_end) && (src_end != 0)) {
fe8ab488
A
9959 submap_map_t *ptr;
9960
9961 if (src_map == base_map) {
9962 /* back to the top */
1c79356b 9963 break;
fe8ab488
A
9964 }
9965
9966 ptr = parent_maps;
9967 assert(ptr != NULL);
9968 parent_maps = parent_maps->next;
9969
9970 /* fix up the damage we did in that submap */
9971 vm_map_simplify_range(src_map,
9972 src_base,
9973 src_end);
9974
9975 vm_map_unlock(src_map);
9976 vm_map_deallocate(src_map);
9977 vm_map_lock(ptr->parent_map);
9978 src_map = ptr->parent_map;
9979 src_base = ptr->base_start;
9980 src_start = ptr->base_start + ptr->base_len;
9981 src_end = ptr->base_end;
9982 if (!vm_map_lookup_entry(src_map,
9983 src_start,
9984 &tmp_entry) &&
9985 (src_end > src_start)) {
9986 RETURN(KERN_INVALID_ADDRESS);
9987 }
9988 kfree(ptr, sizeof(submap_map_t));
9989 if (parent_maps == NULL)
9990 map_share = FALSE;
9991 src_entry = tmp_entry->vme_prev;
9992 }
9993
9994 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9995 (src_start >= src_addr + len) &&
9996 (src_addr + len != 0)) {
9997 /*
9998 * Stop copying now, even though we haven't reached
9999 * "src_end". We'll adjust the end of the last copy
10000 * entry at the end, if needed.
10001 *
10002 * If src_map's aligment is different from the
10003 * system's page-alignment, there could be
10004 * extra non-map-aligned map entries between
10005 * the original (non-rounded) "src_addr + len"
10006 * and the rounded "src_end".
10007 * We do not want to copy those map entries since
10008 * they're not part of the copied range.
10009 */
10010 break;
1c79356b 10011 }
fe8ab488 10012
1c79356b
A
10013 if ((src_start >= src_end) && (src_end != 0))
10014 break;
10015
10016 /*
10017 * Verify that there are no gaps in the region
10018 */
10019
10020 tmp_entry = src_entry->vme_next;
fe8ab488 10021 if ((tmp_entry->vme_start != src_start) ||
39236c6e 10022 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 10023 RETURN(KERN_INVALID_ADDRESS);
39236c6e 10024 }
1c79356b
A
10025 }
10026
10027 /*
10028 * If the source should be destroyed, do it now, since the
10029 * copy was successful.
10030 */
10031 if (src_destroy) {
39236c6e
A
10032 (void) vm_map_delete(
10033 src_map,
10034 vm_map_trunc_page(src_addr,
10035 VM_MAP_PAGE_MASK(src_map)),
10036 src_end,
10037 ((src_map == kernel_map) ?
10038 VM_MAP_REMOVE_KUNWIRE :
10039 VM_MAP_NO_FLAGS),
10040 VM_MAP_NULL);
2d21ac55
A
10041 } else {
10042 /* fix up the damage we did in the base map */
39236c6e
A
10043 vm_map_simplify_range(
10044 src_map,
10045 vm_map_trunc_page(src_addr,
10046 VM_MAP_PAGE_MASK(src_map)),
10047 vm_map_round_page(src_end,
10048 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
10049 }
10050
10051 vm_map_unlock(src_map);
10052
39236c6e 10053 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488
A
10054 vm_map_offset_t original_start, original_offset, original_end;
10055
39236c6e
A
10056 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
10057
10058 /* adjust alignment of first copy_entry's "vme_start" */
10059 tmp_entry = vm_map_copy_first_entry(copy);
10060 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10061 vm_map_offset_t adjustment;
fe8ab488
A
10062
10063 original_start = tmp_entry->vme_start;
3e170ce0 10064 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
10065
10066 /* map-align the start of the first copy entry... */
10067 adjustment = (tmp_entry->vme_start -
10068 vm_map_trunc_page(
10069 tmp_entry->vme_start,
10070 VM_MAP_PAGE_MASK(src_map)));
10071 tmp_entry->vme_start -= adjustment;
3e170ce0
A
10072 VME_OFFSET_SET(tmp_entry,
10073 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
10074 copy_addr -= adjustment;
10075 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10076 /* ... adjust for mis-aligned start of copy range */
39236c6e
A
10077 adjustment =
10078 (vm_map_trunc_page(copy->offset,
10079 PAGE_MASK) -
10080 vm_map_trunc_page(copy->offset,
10081 VM_MAP_PAGE_MASK(src_map)));
10082 if (adjustment) {
10083 assert(page_aligned(adjustment));
10084 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10085 tmp_entry->vme_start += adjustment;
3e170ce0
A
10086 VME_OFFSET_SET(tmp_entry,
10087 (VME_OFFSET(tmp_entry) +
10088 adjustment));
39236c6e
A
10089 copy_addr += adjustment;
10090 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10091 }
fe8ab488
A
10092
10093 /*
10094 * Assert that the adjustments haven't exposed
10095 * more than was originally copied...
10096 */
10097 assert(tmp_entry->vme_start >= original_start);
3e170ce0 10098 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
10099 /*
10100 * ... and that it did not adjust outside of a
10101 * a single 16K page.
10102 */
10103 assert(vm_map_trunc_page(tmp_entry->vme_start,
10104 VM_MAP_PAGE_MASK(src_map)) ==
10105 vm_map_trunc_page(original_start,
10106 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
10107 }
10108
10109 /* adjust alignment of last copy_entry's "vme_end" */
10110 tmp_entry = vm_map_copy_last_entry(copy);
10111 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10112 vm_map_offset_t adjustment;
fe8ab488
A
10113
10114 original_end = tmp_entry->vme_end;
10115
10116 /* map-align the end of the last copy entry... */
10117 tmp_entry->vme_end =
10118 vm_map_round_page(tmp_entry->vme_end,
10119 VM_MAP_PAGE_MASK(src_map));
10120 /* ... adjust for mis-aligned end of copy range */
39236c6e
A
10121 adjustment =
10122 (vm_map_round_page((copy->offset +
10123 copy->size),
10124 VM_MAP_PAGE_MASK(src_map)) -
10125 vm_map_round_page((copy->offset +
10126 copy->size),
10127 PAGE_MASK));
10128 if (adjustment) {
10129 assert(page_aligned(adjustment));
10130 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10131 tmp_entry->vme_end -= adjustment;
10132 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10133 }
fe8ab488
A
10134
10135 /*
10136 * Assert that the adjustments haven't exposed
10137 * more than was originally copied...
10138 */
10139 assert(tmp_entry->vme_end <= original_end);
10140 /*
10141 * ... and that it did not adjust outside of a
10142 * a single 16K page.
10143 */
10144 assert(vm_map_round_page(tmp_entry->vme_end,
10145 VM_MAP_PAGE_MASK(src_map)) ==
10146 vm_map_round_page(original_end,
10147 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
10148 }
10149 }
10150
1c79356b
A
10151 /* Fix-up start and end points in copy. This is necessary */
10152 /* when the various entries in the copy object were picked */
10153 /* up from different sub-maps */
10154
10155 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 10156 copy_size = 0; /* compute actual size */
1c79356b 10157 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e
A
10158 assert(VM_MAP_PAGE_ALIGNED(
10159 copy_addr + (tmp_entry->vme_end -
10160 tmp_entry->vme_start),
10161 VM_MAP_COPY_PAGE_MASK(copy)));
10162 assert(VM_MAP_PAGE_ALIGNED(
10163 copy_addr,
10164 VM_MAP_COPY_PAGE_MASK(copy)));
10165
10166 /*
10167 * The copy_entries will be injected directly into the
10168 * destination map and might not be "map aligned" there...
10169 */
10170 tmp_entry->map_aligned = FALSE;
10171
1c79356b
A
10172 tmp_entry->vme_end = copy_addr +
10173 (tmp_entry->vme_end - tmp_entry->vme_start);
10174 tmp_entry->vme_start = copy_addr;
e2d2fc5c 10175 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 10176 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 10177 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
10178 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
10179 }
10180
fe8ab488
A
10181 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
10182 copy_size < copy->size) {
10183 /*
10184 * The actual size of the VM map copy is smaller than what
10185 * was requested by the caller. This must be because some
10186 * PAGE_SIZE-sized pages are missing at the end of the last
10187 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
10188 * The caller might not have been aware of those missing
10189 * pages and might not want to be aware of it, which is
10190 * fine as long as they don't try to access (and crash on)
10191 * those missing pages.
10192 * Let's adjust the size of the "copy", to avoid failing
10193 * in vm_map_copyout() or vm_map_copy_overwrite().
10194 */
10195 assert(vm_map_round_page(copy_size,
10196 VM_MAP_PAGE_MASK(src_map)) ==
10197 vm_map_round_page(copy->size,
10198 VM_MAP_PAGE_MASK(src_map)));
10199 copy->size = copy_size;
10200 }
10201
1c79356b
A
10202 *copy_result = copy;
10203 return(KERN_SUCCESS);
10204
10205#undef RETURN
10206}
10207
39236c6e
A
10208kern_return_t
10209vm_map_copy_extract(
10210 vm_map_t src_map,
10211 vm_map_address_t src_addr,
10212 vm_map_size_t len,
10213 vm_map_copy_t *copy_result, /* OUT */
10214 vm_prot_t *cur_prot, /* OUT */
10215 vm_prot_t *max_prot)
10216{
10217 vm_map_offset_t src_start, src_end;
10218 vm_map_copy_t copy;
10219 kern_return_t kr;
10220
10221 /*
10222 * Check for copies of zero bytes.
10223 */
10224
10225 if (len == 0) {
10226 *copy_result = VM_MAP_COPY_NULL;
10227 return(KERN_SUCCESS);
10228 }
10229
10230 /*
10231 * Check that the end address doesn't overflow
10232 */
10233 src_end = src_addr + len;
10234 if (src_end < src_addr)
10235 return KERN_INVALID_ADDRESS;
10236
10237 /*
10238 * Compute (page aligned) start and end of region
10239 */
10240 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10241 src_end = vm_map_round_page(src_end, PAGE_MASK);
10242
10243 /*
10244 * Allocate a header element for the list.
10245 *
10246 * Use the start and end in the header to
10247 * remember the endpoints prior to rounding.
10248 */
10249
10250 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10251 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
39236c6e
A
10252 vm_map_copy_first_entry(copy) =
10253 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10254 copy->type = VM_MAP_COPY_ENTRY_LIST;
10255 copy->cpy_hdr.nentries = 0;
10256 copy->cpy_hdr.entries_pageable = TRUE;
10257
10258 vm_map_store_init(&copy->cpy_hdr);
10259
10260 copy->offset = 0;
10261 copy->size = len;
10262
10263 kr = vm_map_remap_extract(src_map,
10264 src_addr,
10265 len,
10266 FALSE, /* copy */
10267 &copy->cpy_hdr,
10268 cur_prot,
10269 max_prot,
10270 VM_INHERIT_SHARE,
39037602
A
10271 TRUE, /* pageable */
10272 FALSE); /* same_map */
39236c6e
A
10273 if (kr != KERN_SUCCESS) {
10274 vm_map_copy_discard(copy);
10275 return kr;
10276 }
10277
10278 *copy_result = copy;
10279 return KERN_SUCCESS;
10280}
10281
1c79356b
A
10282/*
10283 * vm_map_copyin_object:
10284 *
10285 * Create a copy object from an object.
10286 * Our caller donates an object reference.
10287 */
10288
10289kern_return_t
10290vm_map_copyin_object(
10291 vm_object_t object,
10292 vm_object_offset_t offset, /* offset of region in object */
10293 vm_object_size_t size, /* size of region in object */
10294 vm_map_copy_t *copy_result) /* OUT */
10295{
10296 vm_map_copy_t copy; /* Resulting copy */
10297
10298 /*
10299 * We drop the object into a special copy object
10300 * that contains the object directly.
10301 */
10302
10303 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10304 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
10305 copy->type = VM_MAP_COPY_OBJECT;
10306 copy->cpy_object = object;
1c79356b
A
10307 copy->offset = offset;
10308 copy->size = size;
10309
10310 *copy_result = copy;
10311 return(KERN_SUCCESS);
10312}
10313
91447636 10314static void
1c79356b
A
10315vm_map_fork_share(
10316 vm_map_t old_map,
10317 vm_map_entry_t old_entry,
10318 vm_map_t new_map)
10319{
10320 vm_object_t object;
10321 vm_map_entry_t new_entry;
1c79356b
A
10322
10323 /*
10324 * New sharing code. New map entry
10325 * references original object. Internal
10326 * objects use asynchronous copy algorithm for
10327 * future copies. First make sure we have
10328 * the right object. If we need a shadow,
10329 * or someone else already has one, then
10330 * make a new shadow and share it.
10331 */
10332
3e170ce0 10333 object = VME_OBJECT(old_entry);
1c79356b
A
10334 if (old_entry->is_sub_map) {
10335 assert(old_entry->wired_count == 0);
0c530ab8 10336#ifndef NO_NESTED_PMAP
1c79356b 10337 if(old_entry->use_pmap) {
91447636
A
10338 kern_return_t result;
10339
1c79356b 10340 result = pmap_nest(new_map->pmap,
3e170ce0 10341 (VME_SUBMAP(old_entry))->pmap,
2d21ac55
A
10342 (addr64_t)old_entry->vme_start,
10343 (addr64_t)old_entry->vme_start,
10344 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
10345 if(result)
10346 panic("vm_map_fork_share: pmap_nest failed!");
10347 }
0c530ab8 10348#endif /* NO_NESTED_PMAP */
1c79356b 10349 } else if (object == VM_OBJECT_NULL) {
91447636 10350 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 10351 old_entry->vme_start));
3e170ce0
A
10352 VME_OFFSET_SET(old_entry, 0);
10353 VME_OBJECT_SET(old_entry, object);
fe8ab488 10354 old_entry->use_pmap = TRUE;
1c79356b
A
10355 assert(!old_entry->needs_copy);
10356 } else if (object->copy_strategy !=
2d21ac55 10357 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
10358
10359 /*
10360 * We are already using an asymmetric
10361 * copy, and therefore we already have
10362 * the right object.
10363 */
10364
10365 assert(! old_entry->needs_copy);
10366 }
10367 else if (old_entry->needs_copy || /* case 1 */
10368 object->shadowed || /* case 2 */
10369 (!object->true_share && /* case 3 */
2d21ac55 10370 !old_entry->is_shared &&
6d2010ae 10371 (object->vo_size >
2d21ac55
A
10372 (vm_map_size_t)(old_entry->vme_end -
10373 old_entry->vme_start)))) {
1c79356b
A
10374
10375 /*
10376 * We need to create a shadow.
10377 * There are three cases here.
10378 * In the first case, we need to
10379 * complete a deferred symmetrical
10380 * copy that we participated in.
10381 * In the second and third cases,
10382 * we need to create the shadow so
10383 * that changes that we make to the
10384 * object do not interfere with
10385 * any symmetrical copies which
10386 * have occured (case 2) or which
10387 * might occur (case 3).
10388 *
10389 * The first case is when we had
10390 * deferred shadow object creation
10391 * via the entry->needs_copy mechanism.
10392 * This mechanism only works when
10393 * only one entry points to the source
10394 * object, and we are about to create
10395 * a second entry pointing to the
10396 * same object. The problem is that
10397 * there is no way of mapping from
10398 * an object to the entries pointing
10399 * to it. (Deferred shadow creation
10400 * works with one entry because occurs
10401 * at fault time, and we walk from the
10402 * entry to the object when handling
10403 * the fault.)
10404 *
10405 * The second case is when the object
10406 * to be shared has already been copied
10407 * with a symmetric copy, but we point
10408 * directly to the object without
10409 * needs_copy set in our entry. (This
10410 * can happen because different ranges
10411 * of an object can be pointed to by
10412 * different entries. In particular,
10413 * a single entry pointing to an object
10414 * can be split by a call to vm_inherit,
10415 * which, combined with task_create, can
10416 * result in the different entries
10417 * having different needs_copy values.)
10418 * The shadowed flag in the object allows
10419 * us to detect this case. The problem
10420 * with this case is that if this object
10421 * has or will have shadows, then we
10422 * must not perform an asymmetric copy
10423 * of this object, since such a copy
10424 * allows the object to be changed, which
10425 * will break the previous symmetrical
10426 * copies (which rely upon the object
10427 * not changing). In a sense, the shadowed
10428 * flag says "don't change this object".
10429 * We fix this by creating a shadow
10430 * object for this object, and sharing
10431 * that. This works because we are free
10432 * to change the shadow object (and thus
10433 * to use an asymmetric copy strategy);
10434 * this is also semantically correct,
10435 * since this object is temporary, and
10436 * therefore a copy of the object is
10437 * as good as the object itself. (This
10438 * is not true for permanent objects,
10439 * since the pager needs to see changes,
10440 * which won't happen if the changes
10441 * are made to a copy.)
10442 *
10443 * The third case is when the object
10444 * to be shared has parts sticking
10445 * outside of the entry we're working
10446 * with, and thus may in the future
10447 * be subject to a symmetrical copy.
10448 * (This is a preemptive version of
10449 * case 2.)
10450 */
3e170ce0
A
10451 VME_OBJECT_SHADOW(old_entry,
10452 (vm_map_size_t) (old_entry->vme_end -
10453 old_entry->vme_start));
1c79356b
A
10454
10455 /*
10456 * If we're making a shadow for other than
10457 * copy on write reasons, then we have
10458 * to remove write permission.
10459 */
10460
1c79356b
A
10461 if (!old_entry->needs_copy &&
10462 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
10463 vm_prot_t prot;
10464
10465 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10466
3e170ce0 10467 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
0c530ab8 10468 prot |= VM_PROT_EXECUTE;
2d21ac55 10469
316670eb 10470 if (old_map->mapped_in_other_pmaps) {
9bccf70c 10471 vm_object_pmap_protect(
3e170ce0
A
10472 VME_OBJECT(old_entry),
10473 VME_OFFSET(old_entry),
9bccf70c 10474 (old_entry->vme_end -
2d21ac55 10475 old_entry->vme_start),
9bccf70c
A
10476 PMAP_NULL,
10477 old_entry->vme_start,
0c530ab8 10478 prot);
1c79356b 10479 } else {
9bccf70c 10480 pmap_protect(old_map->pmap,
2d21ac55
A
10481 old_entry->vme_start,
10482 old_entry->vme_end,
10483 prot);
1c79356b
A
10484 }
10485 }
10486
10487 old_entry->needs_copy = FALSE;
3e170ce0 10488 object = VME_OBJECT(old_entry);
1c79356b 10489 }
6d2010ae 10490
1c79356b
A
10491
10492 /*
10493 * If object was using a symmetric copy strategy,
10494 * change its copy strategy to the default
10495 * asymmetric copy strategy, which is copy_delay
10496 * in the non-norma case and copy_call in the
10497 * norma case. Bump the reference count for the
10498 * new entry.
10499 */
10500
10501 if(old_entry->is_sub_map) {
3e170ce0
A
10502 vm_map_lock(VME_SUBMAP(old_entry));
10503 vm_map_reference(VME_SUBMAP(old_entry));
10504 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
10505 } else {
10506 vm_object_lock(object);
2d21ac55 10507 vm_object_reference_locked(object);
1c79356b
A
10508 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10509 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10510 }
10511 vm_object_unlock(object);
10512 }
10513
10514 /*
10515 * Clone the entry, using object ref from above.
10516 * Mark both entries as shared.
10517 */
10518
7ddcb079
A
10519 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10520 * map or descendants */
1c79356b
A
10521 vm_map_entry_copy(new_entry, old_entry);
10522 old_entry->is_shared = TRUE;
10523 new_entry->is_shared = TRUE;
39037602
A
10524
10525 /*
10526 * If old entry's inheritence is VM_INHERIT_NONE,
10527 * the new entry is for corpse fork, remove the
10528 * write permission from the new entry.
10529 */
10530 if (old_entry->inheritance == VM_INHERIT_NONE) {
10531
10532 new_entry->protection &= ~VM_PROT_WRITE;
10533 new_entry->max_protection &= ~VM_PROT_WRITE;
10534 }
1c79356b
A
10535
10536 /*
10537 * Insert the entry into the new map -- we
10538 * know we're inserting at the end of the new
10539 * map.
10540 */
10541
6d2010ae 10542 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
1c79356b
A
10543
10544 /*
10545 * Update the physical map
10546 */
10547
10548 if (old_entry->is_sub_map) {
10549 /* Bill Angell pmap support goes here */
10550 } else {
10551 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
10552 old_entry->vme_end - old_entry->vme_start,
10553 old_entry->vme_start);
1c79356b
A
10554 }
10555}
10556
91447636 10557static boolean_t
1c79356b
A
10558vm_map_fork_copy(
10559 vm_map_t old_map,
10560 vm_map_entry_t *old_entry_p,
39037602
A
10561 vm_map_t new_map,
10562 int vm_map_copyin_flags)
1c79356b
A
10563{
10564 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
10565 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10566 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
10567 vm_map_copy_t copy;
10568 vm_map_entry_t last = vm_map_last_entry(new_map);
10569
10570 vm_map_unlock(old_map);
10571 /*
10572 * Use maxprot version of copyin because we
10573 * care about whether this memory can ever
10574 * be accessed, not just whether it's accessible
10575 * right now.
10576 */
39037602
A
10577 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
10578 if (vm_map_copyin_internal(old_map, start, entry_size,
10579 vm_map_copyin_flags, &copy)
1c79356b
A
10580 != KERN_SUCCESS) {
10581 /*
10582 * The map might have changed while it
10583 * was unlocked, check it again. Skip
10584 * any blank space or permanently
10585 * unreadable region.
10586 */
10587 vm_map_lock(old_map);
10588 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 10589 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
10590 last = last->vme_next;
10591 }
10592 *old_entry_p = last;
10593
10594 /*
10595 * XXX For some error returns, want to
10596 * XXX skip to the next element. Note
10597 * that INVALID_ADDRESS and
10598 * PROTECTION_FAILURE are handled above.
10599 */
10600
10601 return FALSE;
10602 }
10603
10604 /*
10605 * Insert the copy into the new map
10606 */
10607
10608 vm_map_copy_insert(new_map, last, copy);
10609
10610 /*
10611 * Pick up the traversal at the end of
10612 * the copied region.
10613 */
10614
10615 vm_map_lock(old_map);
10616 start += entry_size;
10617 if (! vm_map_lookup_entry(old_map, start, &last)) {
10618 last = last->vme_next;
10619 } else {
2d21ac55
A
10620 if (last->vme_start == start) {
10621 /*
10622 * No need to clip here and we don't
10623 * want to cause any unnecessary
10624 * unnesting...
10625 */
10626 } else {
10627 vm_map_clip_start(old_map, last, start);
10628 }
1c79356b
A
10629 }
10630 *old_entry_p = last;
10631
10632 return TRUE;
10633}
10634
10635/*
10636 * vm_map_fork:
10637 *
10638 * Create and return a new map based on the old
10639 * map, according to the inheritance values on the
39037602 10640 * regions in that map and the options.
1c79356b
A
10641 *
10642 * The source map must not be locked.
10643 */
10644vm_map_t
10645vm_map_fork(
316670eb 10646 ledger_t ledger,
39037602
A
10647 vm_map_t old_map,
10648 int options)
1c79356b 10649{
2d21ac55 10650 pmap_t new_pmap;
1c79356b
A
10651 vm_map_t new_map;
10652 vm_map_entry_t old_entry;
91447636 10653 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
10654 vm_map_entry_t new_entry;
10655 boolean_t src_needs_copy;
10656 boolean_t new_entry_needs_copy;
3e170ce0 10657 boolean_t pmap_is64bit;
39037602
A
10658 int vm_map_copyin_flags;
10659
10660 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
10661 VM_MAP_FORK_PRESERVE_PURGEABLE)) {
10662 /* unsupported option */
10663 return VM_MAP_NULL;
10664 }
1c79356b 10665
3e170ce0 10666 pmap_is64bit =
b0d623f7 10667#if defined(__i386__) || defined(__x86_64__)
3e170ce0 10668 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
b0d623f7 10669#else
316670eb 10670#error Unknown architecture.
b0d623f7 10671#endif
3e170ce0
A
10672
10673 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
2d21ac55 10674
1c79356b
A
10675 vm_map_reference_swap(old_map);
10676 vm_map_lock(old_map);
10677
10678 new_map = vm_map_create(new_pmap,
2d21ac55
A
10679 old_map->min_offset,
10680 old_map->max_offset,
10681 old_map->hdr.entries_pageable);
39037602 10682 vm_commit_pagezero_status(new_map);
39236c6e
A
10683 /* inherit the parent map's page size */
10684 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 10685 for (
2d21ac55
A
10686 old_entry = vm_map_first_entry(old_map);
10687 old_entry != vm_map_to_entry(old_map);
10688 ) {
1c79356b
A
10689
10690 entry_size = old_entry->vme_end - old_entry->vme_start;
10691
10692 switch (old_entry->inheritance) {
10693 case VM_INHERIT_NONE:
39037602
A
10694 /*
10695 * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
10696 * is not passed or it is backed by a device pager.
10697 */
10698 if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
10699 (!old_entry->is_sub_map &&
10700 VME_OBJECT(old_entry) != NULL &&
10701 VME_OBJECT(old_entry)->pager != NULL &&
10702 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
10703 break;
10704 }
10705 /* FALLTHROUGH */
1c79356b
A
10706
10707 case VM_INHERIT_SHARE:
10708 vm_map_fork_share(old_map, old_entry, new_map);
10709 new_size += entry_size;
10710 break;
10711
10712 case VM_INHERIT_COPY:
10713
10714 /*
10715 * Inline the copy_quickly case;
10716 * upon failure, fall back on call
10717 * to vm_map_fork_copy.
10718 */
10719
10720 if(old_entry->is_sub_map)
10721 break;
9bccf70c 10722 if ((old_entry->wired_count != 0) ||
3e170ce0
A
10723 ((VME_OBJECT(old_entry) != NULL) &&
10724 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
10725 goto slow_vm_map_fork_copy;
10726 }
10727
7ddcb079 10728 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 10729 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
10730 if (new_entry->is_sub_map) {
10731 /* clear address space specifics */
10732 new_entry->use_pmap = FALSE;
10733 }
1c79356b
A
10734
10735 if (! vm_object_copy_quickly(
3e170ce0
A
10736 &VME_OBJECT(new_entry),
10737 VME_OFFSET(old_entry),
2d21ac55
A
10738 (old_entry->vme_end -
10739 old_entry->vme_start),
10740 &src_needs_copy,
10741 &new_entry_needs_copy)) {
1c79356b
A
10742 vm_map_entry_dispose(new_map, new_entry);
10743 goto slow_vm_map_fork_copy;
10744 }
10745
10746 /*
10747 * Handle copy-on-write obligations
10748 */
10749
10750 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
10751 vm_prot_t prot;
10752
10753 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10754
3e170ce0
A
10755 if (override_nx(old_map, VME_ALIAS(old_entry))
10756 && prot)
0c530ab8 10757 prot |= VM_PROT_EXECUTE;
2d21ac55 10758
1c79356b 10759 vm_object_pmap_protect(
3e170ce0
A
10760 VME_OBJECT(old_entry),
10761 VME_OFFSET(old_entry),
1c79356b 10762 (old_entry->vme_end -
2d21ac55 10763 old_entry->vme_start),
1c79356b 10764 ((old_entry->is_shared
316670eb 10765 || old_map->mapped_in_other_pmaps)
2d21ac55
A
10766 ? PMAP_NULL :
10767 old_map->pmap),
1c79356b 10768 old_entry->vme_start,
0c530ab8 10769 prot);
1c79356b 10770
3e170ce0 10771 assert(old_entry->wired_count == 0);
1c79356b
A
10772 old_entry->needs_copy = TRUE;
10773 }
10774 new_entry->needs_copy = new_entry_needs_copy;
10775
10776 /*
10777 * Insert the entry at the end
10778 * of the map.
10779 */
10780
6d2010ae 10781 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
10782 new_entry);
10783 new_size += entry_size;
10784 break;
10785
10786 slow_vm_map_fork_copy:
39037602
A
10787 vm_map_copyin_flags = 0;
10788 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
10789 vm_map_copyin_flags |=
10790 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
10791 }
10792 if (vm_map_fork_copy(old_map,
10793 &old_entry,
10794 new_map,
10795 vm_map_copyin_flags)) {
1c79356b
A
10796 new_size += entry_size;
10797 }
10798 continue;
10799 }
10800 old_entry = old_entry->vme_next;
10801 }
10802
fe8ab488 10803
1c79356b
A
10804 new_map->size = new_size;
10805 vm_map_unlock(old_map);
10806 vm_map_deallocate(old_map);
10807
10808 return(new_map);
10809}
10810
2d21ac55
A
10811/*
10812 * vm_map_exec:
10813 *
10814 * Setup the "new_map" with the proper execution environment according
10815 * to the type of executable (platform, 64bit, chroot environment).
10816 * Map the comm page and shared region, etc...
10817 */
10818kern_return_t
10819vm_map_exec(
10820 vm_map_t new_map,
10821 task_t task,
39037602 10822 boolean_t is64bit,
2d21ac55
A
10823 void *fsroot,
10824 cpu_type_t cpu)
10825{
10826 SHARED_REGION_TRACE_DEBUG(
10827 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
fe8ab488
A
10828 (void *)VM_KERNEL_ADDRPERM(current_task()),
10829 (void *)VM_KERNEL_ADDRPERM(new_map),
10830 (void *)VM_KERNEL_ADDRPERM(task),
10831 (void *)VM_KERNEL_ADDRPERM(fsroot),
10832 cpu));
39037602
A
10833 (void) vm_commpage_enter(new_map, task, is64bit);
10834 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
2d21ac55
A
10835 SHARED_REGION_TRACE_DEBUG(
10836 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
fe8ab488
A
10837 (void *)VM_KERNEL_ADDRPERM(current_task()),
10838 (void *)VM_KERNEL_ADDRPERM(new_map),
10839 (void *)VM_KERNEL_ADDRPERM(task),
10840 (void *)VM_KERNEL_ADDRPERM(fsroot),
10841 cpu));
2d21ac55
A
10842 return KERN_SUCCESS;
10843}
1c79356b
A
10844
10845/*
10846 * vm_map_lookup_locked:
10847 *
10848 * Finds the VM object, offset, and
10849 * protection for a given virtual address in the
10850 * specified map, assuming a page fault of the
10851 * type specified.
10852 *
10853 * Returns the (object, offset, protection) for
10854 * this address, whether it is wired down, and whether
10855 * this map has the only reference to the data in question.
10856 * In order to later verify this lookup, a "version"
10857 * is returned.
10858 *
10859 * The map MUST be locked by the caller and WILL be
10860 * locked on exit. In order to guarantee the
10861 * existence of the returned object, it is returned
10862 * locked.
10863 *
10864 * If a lookup is requested with "write protection"
10865 * specified, the map may be changed to perform virtual
10866 * copying operations, although the data referenced will
10867 * remain the same.
10868 */
10869kern_return_t
10870vm_map_lookup_locked(
10871 vm_map_t *var_map, /* IN/OUT */
2d21ac55 10872 vm_map_offset_t vaddr,
91447636 10873 vm_prot_t fault_type,
2d21ac55 10874 int object_lock_type,
1c79356b
A
10875 vm_map_version_t *out_version, /* OUT */
10876 vm_object_t *object, /* OUT */
10877 vm_object_offset_t *offset, /* OUT */
10878 vm_prot_t *out_prot, /* OUT */
10879 boolean_t *wired, /* OUT */
2d21ac55 10880 vm_object_fault_info_t fault_info, /* OUT */
91447636 10881 vm_map_t *real_map)
1c79356b
A
10882{
10883 vm_map_entry_t entry;
39037602 10884 vm_map_t map = *var_map;
1c79356b
A
10885 vm_map_t old_map = *var_map;
10886 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
10887 vm_map_offset_t cow_parent_vaddr = 0;
10888 vm_map_offset_t old_start = 0;
10889 vm_map_offset_t old_end = 0;
39037602 10890 vm_prot_t prot;
6d2010ae 10891 boolean_t mask_protections;
fe8ab488 10892 boolean_t force_copy;
6d2010ae
A
10893 vm_prot_t original_fault_type;
10894
10895 /*
10896 * VM_PROT_MASK means that the caller wants us to use "fault_type"
10897 * as a mask against the mapping's actual protections, not as an
10898 * absolute value.
10899 */
10900 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
10901 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10902 fault_type &= VM_PROT_ALL;
6d2010ae 10903 original_fault_type = fault_type;
1c79356b 10904
91447636 10905 *real_map = map;
6d2010ae
A
10906
10907RetryLookup:
10908 fault_type = original_fault_type;
1c79356b
A
10909
10910 /*
10911 * If the map has an interesting hint, try it before calling
10912 * full blown lookup routine.
10913 */
1c79356b 10914 entry = map->hint;
1c79356b
A
10915
10916 if ((entry == vm_map_to_entry(map)) ||
10917 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10918 vm_map_entry_t tmp_entry;
10919
10920 /*
10921 * Entry was either not a valid hint, or the vaddr
10922 * was not contained in the entry, so do a full lookup.
10923 */
10924 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10925 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10926 vm_map_unlock(cow_sub_map_parent);
91447636 10927 if((*real_map != map)
2d21ac55 10928 && (*real_map != cow_sub_map_parent))
91447636 10929 vm_map_unlock(*real_map);
1c79356b
A
10930 return KERN_INVALID_ADDRESS;
10931 }
10932
10933 entry = tmp_entry;
10934 }
10935 if(map == old_map) {
10936 old_start = entry->vme_start;
10937 old_end = entry->vme_end;
10938 }
10939
10940 /*
10941 * Handle submaps. Drop lock on upper map, submap is
10942 * returned locked.
10943 */
10944
10945submap_recurse:
10946 if (entry->is_sub_map) {
91447636
A
10947 vm_map_offset_t local_vaddr;
10948 vm_map_offset_t end_delta;
10949 vm_map_offset_t start_delta;
1c79356b
A
10950 vm_map_entry_t submap_entry;
10951 boolean_t mapped_needs_copy=FALSE;
10952
10953 local_vaddr = vaddr;
10954
39037602
A
10955 if ((entry->use_pmap &&
10956 ! ((fault_type & VM_PROT_WRITE) ||
10957 force_copy))) {
91447636
A
10958 /* if real_map equals map we unlock below */
10959 if ((*real_map != map) &&
2d21ac55 10960 (*real_map != cow_sub_map_parent))
91447636 10961 vm_map_unlock(*real_map);
3e170ce0 10962 *real_map = VME_SUBMAP(entry);
1c79356b
A
10963 }
10964
39037602
A
10965 if(entry->needs_copy &&
10966 ((fault_type & VM_PROT_WRITE) ||
10967 force_copy)) {
1c79356b
A
10968 if (!mapped_needs_copy) {
10969 if (vm_map_lock_read_to_write(map)) {
10970 vm_map_lock_read(map);
99c3a104 10971 *real_map = map;
1c79356b
A
10972 goto RetryLookup;
10973 }
3e170ce0
A
10974 vm_map_lock_read(VME_SUBMAP(entry));
10975 *var_map = VME_SUBMAP(entry);
1c79356b
A
10976 cow_sub_map_parent = map;
10977 /* reset base to map before cow object */
10978 /* this is the map which will accept */
10979 /* the new cow object */
10980 old_start = entry->vme_start;
10981 old_end = entry->vme_end;
10982 cow_parent_vaddr = vaddr;
10983 mapped_needs_copy = TRUE;
10984 } else {
3e170ce0
A
10985 vm_map_lock_read(VME_SUBMAP(entry));
10986 *var_map = VME_SUBMAP(entry);
1c79356b 10987 if((cow_sub_map_parent != map) &&
2d21ac55 10988 (*real_map != map))
1c79356b
A
10989 vm_map_unlock(map);
10990 }
10991 } else {
3e170ce0
A
10992 vm_map_lock_read(VME_SUBMAP(entry));
10993 *var_map = VME_SUBMAP(entry);
1c79356b
A
10994 /* leave map locked if it is a target */
10995 /* cow sub_map above otherwise, just */
10996 /* follow the maps down to the object */
10997 /* here we unlock knowing we are not */
10998 /* revisiting the map. */
91447636 10999 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
11000 vm_map_unlock_read(map);
11001 }
11002
99c3a104 11003 map = *var_map;
1c79356b
A
11004
11005 /* calculate the offset in the submap for vaddr */
3e170ce0 11006 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 11007
2d21ac55 11008 RetrySubMap:
1c79356b
A
11009 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
11010 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
11011 vm_map_unlock(cow_sub_map_parent);
11012 }
91447636 11013 if((*real_map != map)
2d21ac55 11014 && (*real_map != cow_sub_map_parent)) {
91447636 11015 vm_map_unlock(*real_map);
1c79356b 11016 }
91447636 11017 *real_map = map;
1c79356b
A
11018 return KERN_INVALID_ADDRESS;
11019 }
2d21ac55 11020
1c79356b
A
11021 /* find the attenuated shadow of the underlying object */
11022 /* on our target map */
11023
11024 /* in english the submap object may extend beyond the */
11025 /* region mapped by the entry or, may only fill a portion */
11026 /* of it. For our purposes, we only care if the object */
11027 /* doesn't fill. In this case the area which will */
11028 /* ultimately be clipped in the top map will only need */
11029 /* to be as big as the portion of the underlying entry */
11030 /* which is mapped */
3e170ce0
A
11031 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
11032 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b
A
11033
11034 end_delta =
3e170ce0 11035 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
1c79356b 11036 submap_entry->vme_end ?
3e170ce0 11037 0 : (VME_OFFSET(entry) +
2d21ac55
A
11038 (old_end - old_start))
11039 - submap_entry->vme_end;
1c79356b
A
11040
11041 old_start += start_delta;
11042 old_end -= end_delta;
11043
11044 if(submap_entry->is_sub_map) {
11045 entry = submap_entry;
11046 vaddr = local_vaddr;
11047 goto submap_recurse;
11048 }
11049
39037602
A
11050 if (((fault_type & VM_PROT_WRITE) ||
11051 force_copy)
11052 && cow_sub_map_parent) {
1c79356b 11053
2d21ac55
A
11054 vm_object_t sub_object, copy_object;
11055 vm_object_offset_t copy_offset;
91447636
A
11056 vm_map_offset_t local_start;
11057 vm_map_offset_t local_end;
0b4e3aa0 11058 boolean_t copied_slowly = FALSE;
1c79356b
A
11059
11060 if (vm_map_lock_read_to_write(map)) {
11061 vm_map_lock_read(map);
11062 old_start -= start_delta;
11063 old_end += end_delta;
11064 goto RetrySubMap;
11065 }
0b4e3aa0
A
11066
11067
3e170ce0 11068 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
11069 if (sub_object == VM_OBJECT_NULL) {
11070 sub_object =
1c79356b 11071 vm_object_allocate(
91447636 11072 (vm_map_size_t)
2d21ac55
A
11073 (submap_entry->vme_end -
11074 submap_entry->vme_start));
3e170ce0
A
11075 VME_OBJECT_SET(submap_entry, sub_object);
11076 VME_OFFSET_SET(submap_entry, 0);
1c79356b
A
11077 }
11078 local_start = local_vaddr -
2d21ac55 11079 (cow_parent_vaddr - old_start);
1c79356b 11080 local_end = local_vaddr +
2d21ac55 11081 (old_end - cow_parent_vaddr);
1c79356b
A
11082 vm_map_clip_start(map, submap_entry, local_start);
11083 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
11084 if (submap_entry->is_sub_map) {
11085 /* unnesting was done when clipping */
11086 assert(!submap_entry->use_pmap);
11087 }
1c79356b
A
11088
11089 /* This is the COW case, lets connect */
11090 /* an entry in our space to the underlying */
11091 /* object in the submap, bypassing the */
11092 /* submap. */
0b4e3aa0
A
11093
11094
2d21ac55 11095 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
11096 (sub_object->copy_strategy ==
11097 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
11098 vm_object_lock(sub_object);
11099 vm_object_copy_slowly(sub_object,
3e170ce0 11100 VME_OFFSET(submap_entry),
2d21ac55
A
11101 (submap_entry->vme_end -
11102 submap_entry->vme_start),
11103 FALSE,
11104 &copy_object);
11105 copied_slowly = TRUE;
0b4e3aa0 11106 } else {
2d21ac55 11107
0b4e3aa0 11108 /* set up shadow object */
2d21ac55 11109 copy_object = sub_object;
39037602
A
11110 vm_object_lock(sub_object);
11111 vm_object_reference_locked(sub_object);
2d21ac55 11112 sub_object->shadowed = TRUE;
39037602
A
11113 vm_object_unlock(sub_object);
11114
3e170ce0 11115 assert(submap_entry->wired_count == 0);
0b4e3aa0 11116 submap_entry->needs_copy = TRUE;
0c530ab8
A
11117
11118 prot = submap_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11119
3e170ce0
A
11120 if (override_nx(old_map,
11121 VME_ALIAS(submap_entry))
11122 && prot)
0c530ab8 11123 prot |= VM_PROT_EXECUTE;
2d21ac55 11124
0b4e3aa0 11125 vm_object_pmap_protect(
2d21ac55 11126 sub_object,
3e170ce0 11127 VME_OFFSET(submap_entry),
1c79356b 11128 submap_entry->vme_end -
2d21ac55 11129 submap_entry->vme_start,
9bccf70c 11130 (submap_entry->is_shared
316670eb 11131 || map->mapped_in_other_pmaps) ?
2d21ac55 11132 PMAP_NULL : map->pmap,
1c79356b 11133 submap_entry->vme_start,
0c530ab8 11134 prot);
0b4e3aa0 11135 }
1c79356b 11136
2d21ac55
A
11137 /*
11138 * Adjust the fault offset to the submap entry.
11139 */
11140 copy_offset = (local_vaddr -
11141 submap_entry->vme_start +
3e170ce0 11142 VME_OFFSET(submap_entry));
1c79356b
A
11143
11144 /* This works diffently than the */
11145 /* normal submap case. We go back */
11146 /* to the parent of the cow map and*/
11147 /* clip out the target portion of */
11148 /* the sub_map, substituting the */
11149 /* new copy object, */
11150
11151 vm_map_unlock(map);
11152 local_start = old_start;
11153 local_end = old_end;
11154 map = cow_sub_map_parent;
11155 *var_map = cow_sub_map_parent;
11156 vaddr = cow_parent_vaddr;
11157 cow_sub_map_parent = NULL;
11158
2d21ac55
A
11159 if(!vm_map_lookup_entry(map,
11160 vaddr, &entry)) {
11161 vm_object_deallocate(
11162 copy_object);
11163 vm_map_lock_write_to_read(map);
11164 return KERN_INVALID_ADDRESS;
11165 }
11166
11167 /* clip out the portion of space */
11168 /* mapped by the sub map which */
11169 /* corresponds to the underlying */
11170 /* object */
11171
11172 /*
11173 * Clip (and unnest) the smallest nested chunk
11174 * possible around the faulting address...
11175 */
11176 local_start = vaddr & ~(pmap_nesting_size_min - 1);
11177 local_end = local_start + pmap_nesting_size_min;
11178 /*
11179 * ... but don't go beyond the "old_start" to "old_end"
11180 * range, to avoid spanning over another VM region
11181 * with a possibly different VM object and/or offset.
11182 */
11183 if (local_start < old_start) {
11184 local_start = old_start;
11185 }
11186 if (local_end > old_end) {
11187 local_end = old_end;
11188 }
11189 /*
11190 * Adjust copy_offset to the start of the range.
11191 */
11192 copy_offset -= (vaddr - local_start);
11193
1c79356b
A
11194 vm_map_clip_start(map, entry, local_start);
11195 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
11196 if (entry->is_sub_map) {
11197 /* unnesting was done when clipping */
11198 assert(!entry->use_pmap);
11199 }
1c79356b
A
11200
11201 /* substitute copy object for */
11202 /* shared map entry */
3e170ce0 11203 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 11204 assert(!entry->iokit_acct);
1c79356b 11205 entry->is_sub_map = FALSE;
fe8ab488 11206 entry->use_pmap = TRUE;
3e170ce0 11207 VME_OBJECT_SET(entry, copy_object);
1c79356b 11208
2d21ac55
A
11209 /* propagate the submap entry's protections */
11210 entry->protection |= submap_entry->protection;
11211 entry->max_protection |= submap_entry->max_protection;
11212
0b4e3aa0 11213 if(copied_slowly) {
3e170ce0 11214 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
11215 entry->needs_copy = FALSE;
11216 entry->is_shared = FALSE;
11217 } else {
3e170ce0
A
11218 VME_OFFSET_SET(entry, copy_offset);
11219 assert(entry->wired_count == 0);
0b4e3aa0
A
11220 entry->needs_copy = TRUE;
11221 if(entry->inheritance == VM_INHERIT_SHARE)
11222 entry->inheritance = VM_INHERIT_COPY;
11223 if (map != old_map)
11224 entry->is_shared = TRUE;
11225 }
1c79356b 11226 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 11227 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
11228
11229 vm_map_lock_write_to_read(map);
11230 } else {
11231 if((cow_sub_map_parent)
2d21ac55
A
11232 && (cow_sub_map_parent != *real_map)
11233 && (cow_sub_map_parent != map)) {
1c79356b
A
11234 vm_map_unlock(cow_sub_map_parent);
11235 }
11236 entry = submap_entry;
11237 vaddr = local_vaddr;
11238 }
11239 }
11240
11241 /*
11242 * Check whether this task is allowed to have
11243 * this page.
11244 */
2d21ac55 11245
6601e61a 11246 prot = entry->protection;
0c530ab8 11247
3e170ce0 11248 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0c530ab8 11249 /*
2d21ac55 11250 * HACK -- if not a stack, then allow execution
0c530ab8
A
11251 */
11252 prot |= VM_PROT_EXECUTE;
2d21ac55
A
11253 }
11254
6d2010ae
A
11255 if (mask_protections) {
11256 fault_type &= prot;
11257 if (fault_type == VM_PROT_NONE) {
11258 goto protection_failure;
11259 }
11260 }
39037602
A
11261 if (((fault_type & prot) != fault_type)
11262 ) {
6d2010ae 11263 protection_failure:
2d21ac55
A
11264 if (*real_map != map) {
11265 vm_map_unlock(*real_map);
0c530ab8
A
11266 }
11267 *real_map = map;
11268
11269 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 11270 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 11271
2d21ac55 11272 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 11273 return KERN_PROTECTION_FAILURE;
1c79356b
A
11274 }
11275
11276 /*
11277 * If this page is not pageable, we have to get
11278 * it for all possible accesses.
11279 */
11280
91447636
A
11281 *wired = (entry->wired_count != 0);
11282 if (*wired)
0c530ab8 11283 fault_type = prot;
1c79356b
A
11284
11285 /*
11286 * If the entry was copy-on-write, we either ...
11287 */
11288
11289 if (entry->needs_copy) {
11290 /*
11291 * If we want to write the page, we may as well
11292 * handle that now since we've got the map locked.
11293 *
11294 * If we don't need to write the page, we just
11295 * demote the permissions allowed.
11296 */
11297
fe8ab488 11298 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
11299 /*
11300 * Make a new object, and place it in the
11301 * object chain. Note that no new references
11302 * have appeared -- one just moved from the
11303 * map to the new object.
11304 */
11305
11306 if (vm_map_lock_read_to_write(map)) {
11307 vm_map_lock_read(map);
11308 goto RetryLookup;
11309 }
39037602
A
11310
11311 if (VME_OBJECT(entry)->shadowed == FALSE) {
11312 vm_object_lock(VME_OBJECT(entry));
11313 VME_OBJECT(entry)->shadowed = TRUE;
11314 vm_object_unlock(VME_OBJECT(entry));
11315 }
3e170ce0
A
11316 VME_OBJECT_SHADOW(entry,
11317 (vm_map_size_t) (entry->vme_end -
11318 entry->vme_start));
1c79356b 11319 entry->needs_copy = FALSE;
39037602 11320
1c79356b
A
11321 vm_map_lock_write_to_read(map);
11322 }
39037602 11323 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
11324 /*
11325 * We're attempting to read a copy-on-write
11326 * page -- don't allow writes.
11327 */
11328
11329 prot &= (~VM_PROT_WRITE);
11330 }
11331 }
11332
11333 /*
11334 * Create an object if necessary.
11335 */
3e170ce0 11336 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
11337
11338 if (vm_map_lock_read_to_write(map)) {
11339 vm_map_lock_read(map);
11340 goto RetryLookup;
11341 }
11342
3e170ce0
A
11343 VME_OBJECT_SET(entry,
11344 vm_object_allocate(
11345 (vm_map_size_t)(entry->vme_end -
11346 entry->vme_start)));
11347 VME_OFFSET_SET(entry, 0);
1c79356b
A
11348 vm_map_lock_write_to_read(map);
11349 }
11350
11351 /*
11352 * Return the object/offset from this entry. If the entry
11353 * was copy-on-write or empty, it has been fixed up. Also
11354 * return the protection.
11355 */
11356
3e170ce0
A
11357 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11358 *object = VME_OBJECT(entry);
1c79356b 11359 *out_prot = prot;
2d21ac55
A
11360
11361 if (fault_info) {
11362 fault_info->interruptible = THREAD_UNINT; /* for now... */
11363 /* ... the caller will change "interruptible" if needed */
11364 fault_info->cluster_size = 0;
3e170ce0 11365 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
11366 fault_info->pmap_options = 0;
11367 if (entry->iokit_acct ||
11368 (!entry->is_sub_map && !entry->use_pmap)) {
11369 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11370 }
2d21ac55 11371 fault_info->behavior = entry->behavior;
3e170ce0
A
11372 fault_info->lo_offset = VME_OFFSET(entry);
11373 fault_info->hi_offset =
11374 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 11375 fault_info->no_cache = entry->no_cache;
b0d623f7 11376 fault_info->stealth = FALSE;
6d2010ae 11377 fault_info->io_sync = FALSE;
3e170ce0
A
11378 if (entry->used_for_jit ||
11379 entry->vme_resilient_codesign) {
11380 fault_info->cs_bypass = TRUE;
11381 } else {
11382 fault_info->cs_bypass = FALSE;
11383 }
0b4c1975 11384 fault_info->mark_zf_absent = FALSE;
316670eb 11385 fault_info->batch_pmap_op = FALSE;
2d21ac55 11386 }
1c79356b
A
11387
11388 /*
11389 * Lock the object to prevent it from disappearing
11390 */
2d21ac55
A
11391 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11392 vm_object_lock(*object);
11393 else
11394 vm_object_lock_shared(*object);
11395
1c79356b
A
11396 /*
11397 * Save the version number
11398 */
11399
11400 out_version->main_timestamp = map->timestamp;
11401
11402 return KERN_SUCCESS;
11403}
11404
11405
11406/*
11407 * vm_map_verify:
11408 *
11409 * Verifies that the map in question has not changed
11410 * since the given version. If successful, the map
11411 * will not change until vm_map_verify_done() is called.
11412 */
11413boolean_t
11414vm_map_verify(
39037602
A
11415 vm_map_t map,
11416 vm_map_version_t *version) /* REF */
1c79356b
A
11417{
11418 boolean_t result;
11419
11420 vm_map_lock_read(map);
11421 result = (map->timestamp == version->main_timestamp);
11422
11423 if (!result)
11424 vm_map_unlock_read(map);
11425
11426 return(result);
11427}
11428
11429/*
11430 * vm_map_verify_done:
11431 *
11432 * Releases locks acquired by a vm_map_verify.
11433 *
11434 * This is now a macro in vm/vm_map.h. It does a
11435 * vm_map_unlock_read on the map.
11436 */
11437
11438
91447636
A
11439/*
11440 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11441 * Goes away after regular vm_region_recurse function migrates to
11442 * 64 bits
11443 * vm_region_recurse: A form of vm_region which follows the
11444 * submaps in a target map
11445 *
11446 */
11447
39037602
A
11448#if DEVELOPMENT || DEBUG
11449int vm_region_footprint = 0;
11450#endif /* DEVELOPMENT || DEBUG */
11451
91447636
A
11452kern_return_t
11453vm_map_region_recurse_64(
11454 vm_map_t map,
11455 vm_map_offset_t *address, /* IN/OUT */
11456 vm_map_size_t *size, /* OUT */
11457 natural_t *nesting_depth, /* IN/OUT */
11458 vm_region_submap_info_64_t submap_info, /* IN/OUT */
11459 mach_msg_type_number_t *count) /* IN/OUT */
11460{
39236c6e 11461 mach_msg_type_number_t original_count;
91447636
A
11462 vm_region_extended_info_data_t extended;
11463 vm_map_entry_t tmp_entry;
11464 vm_map_offset_t user_address;
11465 unsigned int user_max_depth;
11466
11467 /*
11468 * "curr_entry" is the VM map entry preceding or including the
11469 * address we're looking for.
11470 * "curr_map" is the map or sub-map containing "curr_entry".
6d2010ae
A
11471 * "curr_address" is the equivalent of the top map's "user_address"
11472 * in the current map.
91447636
A
11473 * "curr_offset" is the cumulated offset of "curr_map" in the
11474 * target task's address space.
11475 * "curr_depth" is the depth of "curr_map" in the chain of
11476 * sub-maps.
6d2010ae
A
11477 *
11478 * "curr_max_below" and "curr_max_above" limit the range (around
11479 * "curr_address") we should take into account in the current (sub)map.
11480 * They limit the range to what's visible through the map entries
11481 * we've traversed from the top map to the current map.
11482
91447636
A
11483 */
11484 vm_map_entry_t curr_entry;
6d2010ae 11485 vm_map_address_t curr_address;
91447636
A
11486 vm_map_offset_t curr_offset;
11487 vm_map_t curr_map;
11488 unsigned int curr_depth;
6d2010ae
A
11489 vm_map_offset_t curr_max_below, curr_max_above;
11490 vm_map_offset_t curr_skip;
91447636
A
11491
11492 /*
11493 * "next_" is the same as "curr_" but for the VM region immediately
11494 * after the address we're looking for. We need to keep track of this
11495 * too because we want to return info about that region if the
11496 * address we're looking for is not mapped.
11497 */
11498 vm_map_entry_t next_entry;
11499 vm_map_offset_t next_offset;
6d2010ae 11500 vm_map_offset_t next_address;
91447636
A
11501 vm_map_t next_map;
11502 unsigned int next_depth;
6d2010ae
A
11503 vm_map_offset_t next_max_below, next_max_above;
11504 vm_map_offset_t next_skip;
91447636 11505
2d21ac55
A
11506 boolean_t look_for_pages;
11507 vm_region_submap_short_info_64_t short_info;
11508
91447636
A
11509 if (map == VM_MAP_NULL) {
11510 /* no address space to work on */
11511 return KERN_INVALID_ARGUMENT;
11512 }
11513
39236c6e
A
11514
11515 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11516 /*
11517 * "info" structure is not big enough and
11518 * would overflow
11519 */
11520 return KERN_INVALID_ARGUMENT;
11521 }
11522
11523 original_count = *count;
11524
11525 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11526 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11527 look_for_pages = FALSE;
11528 short_info = (vm_region_submap_short_info_64_t) submap_info;
11529 submap_info = NULL;
2d21ac55
A
11530 } else {
11531 look_for_pages = TRUE;
39236c6e 11532 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 11533 short_info = NULL;
39236c6e
A
11534
11535 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11536 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11537 }
91447636 11538 }
39236c6e 11539
91447636
A
11540 user_address = *address;
11541 user_max_depth = *nesting_depth;
11542
3e170ce0
A
11543 if (not_in_kdp) {
11544 vm_map_lock_read(map);
11545 }
11546
11547recurse_again:
91447636
A
11548 curr_entry = NULL;
11549 curr_map = map;
6d2010ae 11550 curr_address = user_address;
91447636 11551 curr_offset = 0;
6d2010ae 11552 curr_skip = 0;
91447636 11553 curr_depth = 0;
6d2010ae
A
11554 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11555 curr_max_below = curr_address;
91447636
A
11556
11557 next_entry = NULL;
11558 next_map = NULL;
6d2010ae 11559 next_address = 0;
91447636 11560 next_offset = 0;
6d2010ae 11561 next_skip = 0;
91447636 11562 next_depth = 0;
6d2010ae
A
11563 next_max_above = (vm_map_offset_t) -1;
11564 next_max_below = (vm_map_offset_t) -1;
91447636 11565
91447636
A
11566 for (;;) {
11567 if (vm_map_lookup_entry(curr_map,
6d2010ae 11568 curr_address,
91447636
A
11569 &tmp_entry)) {
11570 /* tmp_entry contains the address we're looking for */
11571 curr_entry = tmp_entry;
11572 } else {
6d2010ae 11573 vm_map_offset_t skip;
91447636
A
11574 /*
11575 * The address is not mapped. "tmp_entry" is the
11576 * map entry preceding the address. We want the next
11577 * one, if it exists.
11578 */
11579 curr_entry = tmp_entry->vme_next;
6d2010ae 11580
91447636 11581 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
11582 (curr_entry->vme_start >=
11583 curr_address + curr_max_above)) {
91447636
A
11584 /* no next entry at this level: stop looking */
11585 if (not_in_kdp) {
11586 vm_map_unlock_read(curr_map);
11587 }
11588 curr_entry = NULL;
11589 curr_map = NULL;
3e170ce0 11590 curr_skip = 0;
91447636
A
11591 curr_offset = 0;
11592 curr_depth = 0;
6d2010ae
A
11593 curr_max_above = 0;
11594 curr_max_below = 0;
91447636
A
11595 break;
11596 }
6d2010ae
A
11597
11598 /* adjust current address and offset */
11599 skip = curr_entry->vme_start - curr_address;
11600 curr_address = curr_entry->vme_start;
3e170ce0 11601 curr_skip += skip;
6d2010ae
A
11602 curr_offset += skip;
11603 curr_max_above -= skip;
11604 curr_max_below = 0;
91447636
A
11605 }
11606
11607 /*
11608 * Is the next entry at this level closer to the address (or
11609 * deeper in the submap chain) than the one we had
11610 * so far ?
11611 */
11612 tmp_entry = curr_entry->vme_next;
11613 if (tmp_entry == vm_map_to_entry(curr_map)) {
11614 /* no next entry at this level */
6d2010ae
A
11615 } else if (tmp_entry->vme_start >=
11616 curr_address + curr_max_above) {
91447636
A
11617 /*
11618 * tmp_entry is beyond the scope of what we mapped of
11619 * this submap in the upper level: ignore it.
11620 */
11621 } else if ((next_entry == NULL) ||
11622 (tmp_entry->vme_start + curr_offset <=
11623 next_entry->vme_start + next_offset)) {
11624 /*
11625 * We didn't have a "next_entry" or this one is
11626 * closer to the address we're looking for:
11627 * use this "tmp_entry" as the new "next_entry".
11628 */
11629 if (next_entry != NULL) {
11630 /* unlock the last "next_map" */
11631 if (next_map != curr_map && not_in_kdp) {
11632 vm_map_unlock_read(next_map);
11633 }
11634 }
11635 next_entry = tmp_entry;
11636 next_map = curr_map;
91447636 11637 next_depth = curr_depth;
6d2010ae
A
11638 next_address = next_entry->vme_start;
11639 next_skip = curr_skip;
3e170ce0 11640 next_skip += (next_address - curr_address);
6d2010ae
A
11641 next_offset = curr_offset;
11642 next_offset += (next_address - curr_address);
11643 next_max_above = MIN(next_max_above, curr_max_above);
11644 next_max_above = MIN(next_max_above,
11645 next_entry->vme_end - next_address);
11646 next_max_below = MIN(next_max_below, curr_max_below);
11647 next_max_below = MIN(next_max_below,
11648 next_address - next_entry->vme_start);
91447636
A
11649 }
11650
6d2010ae
A
11651 /*
11652 * "curr_max_{above,below}" allow us to keep track of the
11653 * portion of the submap that is actually mapped at this level:
11654 * the rest of that submap is irrelevant to us, since it's not
11655 * mapped here.
11656 * The relevant portion of the map starts at
3e170ce0 11657 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
11658 */
11659 curr_max_above = MIN(curr_max_above,
11660 curr_entry->vme_end - curr_address);
11661 curr_max_below = MIN(curr_max_below,
11662 curr_address - curr_entry->vme_start);
11663
91447636
A
11664 if (!curr_entry->is_sub_map ||
11665 curr_depth >= user_max_depth) {
11666 /*
11667 * We hit a leaf map or we reached the maximum depth
11668 * we could, so stop looking. Keep the current map
11669 * locked.
11670 */
11671 break;
11672 }
11673
11674 /*
11675 * Get down to the next submap level.
11676 */
11677
11678 /*
11679 * Lock the next level and unlock the current level,
11680 * unless we need to keep it locked to access the "next_entry"
11681 * later.
11682 */
11683 if (not_in_kdp) {
3e170ce0 11684 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
11685 }
11686 if (curr_map == next_map) {
11687 /* keep "next_map" locked in case we need it */
11688 } else {
11689 /* release this map */
b0d623f7
A
11690 if (not_in_kdp)
11691 vm_map_unlock_read(curr_map);
91447636
A
11692 }
11693
11694 /*
11695 * Adjust the offset. "curr_entry" maps the submap
11696 * at relative address "curr_entry->vme_start" in the
3e170ce0 11697 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
11698 * bytes of the submap.
11699 * "curr_offset" always represents the offset of a virtual
11700 * address in the curr_map relative to the absolute address
11701 * space (i.e. the top-level VM map).
11702 */
11703 curr_offset +=
3e170ce0 11704 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 11705 curr_address = user_address + curr_offset;
91447636 11706 /* switch to the submap */
3e170ce0 11707 curr_map = VME_SUBMAP(curr_entry);
91447636 11708 curr_depth++;
91447636
A
11709 curr_entry = NULL;
11710 }
11711
11712 if (curr_entry == NULL) {
11713 /* no VM region contains the address... */
39037602
A
11714#if DEVELOPMENT || DEBUG
11715 if (vm_region_footprint && /* we want footprint numbers */
11716 look_for_pages && /* & we want page counts */
11717 next_entry == NULL && /* & there are no more regions */
11718 /* & we haven't already provided our fake region: */
11719 user_address == vm_map_last_entry(map)->vme_end) {
11720 ledger_amount_t nonvol, nonvol_compressed;
11721 /*
11722 * Add a fake memory region to account for
11723 * purgeable memory that counts towards this
11724 * task's memory footprint, i.e. the resident
11725 * compressed pages of non-volatile objects
11726 * owned by that task.
11727 */
11728 ledger_get_balance(
11729 map->pmap->ledger,
11730 task_ledgers.purgeable_nonvolatile,
11731 &nonvol);
11732 ledger_get_balance(
11733 map->pmap->ledger,
11734 task_ledgers.purgeable_nonvolatile_compressed,
11735 &nonvol_compressed);
11736 if (nonvol + nonvol_compressed == 0) {
11737 /* no purgeable memory usage to report */
11738 return KERN_FAILURE;
11739 }
11740 /* fake region to show nonvolatile footprint */
11741 submap_info->protection = VM_PROT_DEFAULT;
11742 submap_info->max_protection = VM_PROT_DEFAULT;
11743 submap_info->inheritance = VM_INHERIT_DEFAULT;
11744 submap_info->offset = 0;
11745 submap_info->user_tag = 0;
11746 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
11747 submap_info->pages_shared_now_private = 0;
11748 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
11749 submap_info->pages_dirtied = submap_info->pages_resident;
11750 submap_info->ref_count = 1;
11751 submap_info->shadow_depth = 0;
11752 submap_info->external_pager = 0;
11753 submap_info->share_mode = SM_PRIVATE;
11754 submap_info->is_submap = 0;
11755 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
11756 submap_info->object_id = 0x11111111;
11757 submap_info->user_wired_count = 0;
11758 submap_info->pages_reusable = 0;
11759 *nesting_depth = 0;
11760 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
11761 *address = user_address;
11762 return KERN_SUCCESS;
11763 }
11764#endif /* DEVELOPMENT || DEBUG */
91447636
A
11765 if (next_entry == NULL) {
11766 /* ... and no VM region follows it either */
11767 return KERN_INVALID_ADDRESS;
11768 }
11769 /* ... gather info about the next VM region */
11770 curr_entry = next_entry;
11771 curr_map = next_map; /* still locked ... */
6d2010ae
A
11772 curr_address = next_address;
11773 curr_skip = next_skip;
91447636
A
11774 curr_offset = next_offset;
11775 curr_depth = next_depth;
6d2010ae
A
11776 curr_max_above = next_max_above;
11777 curr_max_below = next_max_below;
91447636
A
11778 } else {
11779 /* we won't need "next_entry" after all */
11780 if (next_entry != NULL) {
11781 /* release "next_map" */
11782 if (next_map != curr_map && not_in_kdp) {
11783 vm_map_unlock_read(next_map);
11784 }
11785 }
11786 }
11787 next_entry = NULL;
11788 next_map = NULL;
11789 next_offset = 0;
6d2010ae 11790 next_skip = 0;
91447636 11791 next_depth = 0;
6d2010ae
A
11792 next_max_below = -1;
11793 next_max_above = -1;
91447636 11794
3e170ce0
A
11795 if (curr_entry->is_sub_map &&
11796 curr_depth < user_max_depth) {
11797 /*
11798 * We're not as deep as we could be: we must have
11799 * gone back up after not finding anything mapped
11800 * below the original top-level map entry's.
11801 * Let's move "curr_address" forward and recurse again.
11802 */
11803 user_address = curr_address;
11804 goto recurse_again;
11805 }
11806
91447636 11807 *nesting_depth = curr_depth;
6d2010ae
A
11808 *size = curr_max_above + curr_max_below;
11809 *address = user_address + curr_skip - curr_max_below;
91447636 11810
b0d623f7
A
11811// LP64todo: all the current tools are 32bit, obviously never worked for 64b
11812// so probably should be a real 32b ID vs. ptr.
11813// Current users just check for equality
39236c6e 11814#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 11815
2d21ac55 11816 if (look_for_pages) {
3e170ce0
A
11817 submap_info->user_tag = VME_ALIAS(curr_entry);
11818 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11819 submap_info->protection = curr_entry->protection;
11820 submap_info->inheritance = curr_entry->inheritance;
11821 submap_info->max_protection = curr_entry->max_protection;
11822 submap_info->behavior = curr_entry->behavior;
11823 submap_info->user_wired_count = curr_entry->user_wired_count;
11824 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11825 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11826 } else {
3e170ce0
A
11827 short_info->user_tag = VME_ALIAS(curr_entry);
11828 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11829 short_info->protection = curr_entry->protection;
11830 short_info->inheritance = curr_entry->inheritance;
11831 short_info->max_protection = curr_entry->max_protection;
11832 short_info->behavior = curr_entry->behavior;
11833 short_info->user_wired_count = curr_entry->user_wired_count;
11834 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11835 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11836 }
91447636
A
11837
11838 extended.pages_resident = 0;
11839 extended.pages_swapped_out = 0;
11840 extended.pages_shared_now_private = 0;
11841 extended.pages_dirtied = 0;
39236c6e 11842 extended.pages_reusable = 0;
91447636
A
11843 extended.external_pager = 0;
11844 extended.shadow_depth = 0;
3e170ce0
A
11845 extended.share_mode = SM_EMPTY;
11846 extended.ref_count = 0;
91447636
A
11847
11848 if (not_in_kdp) {
11849 if (!curr_entry->is_sub_map) {
6d2010ae
A
11850 vm_map_offset_t range_start, range_end;
11851 range_start = MAX((curr_address - curr_max_below),
11852 curr_entry->vme_start);
11853 range_end = MIN((curr_address + curr_max_above),
11854 curr_entry->vme_end);
91447636 11855 vm_map_region_walk(curr_map,
6d2010ae 11856 range_start,
91447636 11857 curr_entry,
3e170ce0 11858 (VME_OFFSET(curr_entry) +
6d2010ae
A
11859 (range_start -
11860 curr_entry->vme_start)),
11861 range_end - range_start,
2d21ac55 11862 &extended,
39236c6e 11863 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
11864 if (extended.external_pager &&
11865 extended.ref_count == 2 &&
11866 extended.share_mode == SM_SHARED) {
2d21ac55 11867 extended.share_mode = SM_PRIVATE;
91447636 11868 }
91447636
A
11869 } else {
11870 if (curr_entry->use_pmap) {
2d21ac55 11871 extended.share_mode = SM_TRUESHARED;
91447636 11872 } else {
2d21ac55 11873 extended.share_mode = SM_PRIVATE;
91447636 11874 }
3e170ce0 11875 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
91447636
A
11876 }
11877 }
11878
2d21ac55
A
11879 if (look_for_pages) {
11880 submap_info->pages_resident = extended.pages_resident;
11881 submap_info->pages_swapped_out = extended.pages_swapped_out;
11882 submap_info->pages_shared_now_private =
11883 extended.pages_shared_now_private;
11884 submap_info->pages_dirtied = extended.pages_dirtied;
11885 submap_info->external_pager = extended.external_pager;
11886 submap_info->shadow_depth = extended.shadow_depth;
11887 submap_info->share_mode = extended.share_mode;
11888 submap_info->ref_count = extended.ref_count;
39236c6e
A
11889
11890 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11891 submap_info->pages_reusable = extended.pages_reusable;
11892 }
2d21ac55
A
11893 } else {
11894 short_info->external_pager = extended.external_pager;
11895 short_info->shadow_depth = extended.shadow_depth;
11896 short_info->share_mode = extended.share_mode;
11897 short_info->ref_count = extended.ref_count;
11898 }
91447636
A
11899
11900 if (not_in_kdp) {
11901 vm_map_unlock_read(curr_map);
11902 }
11903
11904 return KERN_SUCCESS;
11905}
11906
1c79356b
A
11907/*
11908 * vm_region:
11909 *
11910 * User call to obtain information about a region in
11911 * a task's address map. Currently, only one flavor is
11912 * supported.
11913 *
11914 * XXX The reserved and behavior fields cannot be filled
11915 * in until the vm merge from the IK is completed, and
11916 * vm_reserve is implemented.
1c79356b
A
11917 */
11918
11919kern_return_t
91447636 11920vm_map_region(
1c79356b 11921 vm_map_t map,
91447636
A
11922 vm_map_offset_t *address, /* IN/OUT */
11923 vm_map_size_t *size, /* OUT */
1c79356b
A
11924 vm_region_flavor_t flavor, /* IN */
11925 vm_region_info_t info, /* OUT */
91447636
A
11926 mach_msg_type_number_t *count, /* IN/OUT */
11927 mach_port_t *object_name) /* OUT */
1c79356b
A
11928{
11929 vm_map_entry_t tmp_entry;
1c79356b 11930 vm_map_entry_t entry;
91447636 11931 vm_map_offset_t start;
1c79356b
A
11932
11933 if (map == VM_MAP_NULL)
11934 return(KERN_INVALID_ARGUMENT);
11935
11936 switch (flavor) {
91447636 11937
1c79356b 11938 case VM_REGION_BASIC_INFO:
2d21ac55 11939 /* legacy for old 32-bit objects info */
1c79356b 11940 {
2d21ac55 11941 vm_region_basic_info_t basic;
91447636 11942
2d21ac55
A
11943 if (*count < VM_REGION_BASIC_INFO_COUNT)
11944 return(KERN_INVALID_ARGUMENT);
1c79356b 11945
2d21ac55
A
11946 basic = (vm_region_basic_info_t) info;
11947 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 11948
2d21ac55 11949 vm_map_lock_read(map);
1c79356b 11950
2d21ac55
A
11951 start = *address;
11952 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11953 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11954 vm_map_unlock_read(map);
11955 return(KERN_INVALID_ADDRESS);
11956 }
11957 } else {
11958 entry = tmp_entry;
1c79356b 11959 }
1c79356b 11960
2d21ac55 11961 start = entry->vme_start;
1c79356b 11962
3e170ce0 11963 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
11964 basic->protection = entry->protection;
11965 basic->inheritance = entry->inheritance;
11966 basic->max_protection = entry->max_protection;
11967 basic->behavior = entry->behavior;
11968 basic->user_wired_count = entry->user_wired_count;
11969 basic->reserved = entry->is_sub_map;
11970 *address = start;
11971 *size = (entry->vme_end - start);
91447636 11972
2d21ac55
A
11973 if (object_name) *object_name = IP_NULL;
11974 if (entry->is_sub_map) {
11975 basic->shared = FALSE;
11976 } else {
11977 basic->shared = entry->is_shared;
11978 }
91447636 11979
2d21ac55
A
11980 vm_map_unlock_read(map);
11981 return(KERN_SUCCESS);
91447636
A
11982 }
11983
11984 case VM_REGION_BASIC_INFO_64:
11985 {
2d21ac55 11986 vm_region_basic_info_64_t basic;
91447636 11987
2d21ac55
A
11988 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11989 return(KERN_INVALID_ARGUMENT);
11990
11991 basic = (vm_region_basic_info_64_t) info;
11992 *count = VM_REGION_BASIC_INFO_COUNT_64;
11993
11994 vm_map_lock_read(map);
11995
11996 start = *address;
11997 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11998 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11999 vm_map_unlock_read(map);
12000 return(KERN_INVALID_ADDRESS);
12001 }
12002 } else {
12003 entry = tmp_entry;
12004 }
91447636 12005
2d21ac55 12006 start = entry->vme_start;
91447636 12007
3e170ce0 12008 basic->offset = VME_OFFSET(entry);
2d21ac55
A
12009 basic->protection = entry->protection;
12010 basic->inheritance = entry->inheritance;
12011 basic->max_protection = entry->max_protection;
12012 basic->behavior = entry->behavior;
12013 basic->user_wired_count = entry->user_wired_count;
12014 basic->reserved = entry->is_sub_map;
12015 *address = start;
12016 *size = (entry->vme_end - start);
91447636 12017
2d21ac55
A
12018 if (object_name) *object_name = IP_NULL;
12019 if (entry->is_sub_map) {
12020 basic->shared = FALSE;
12021 } else {
12022 basic->shared = entry->is_shared;
91447636 12023 }
2d21ac55
A
12024
12025 vm_map_unlock_read(map);
12026 return(KERN_SUCCESS);
1c79356b
A
12027 }
12028 case VM_REGION_EXTENDED_INFO:
2d21ac55
A
12029 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
12030 return(KERN_INVALID_ARGUMENT);
39236c6e
A
12031 /*fallthru*/
12032 case VM_REGION_EXTENDED_INFO__legacy:
12033 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
12034 return KERN_INVALID_ARGUMENT;
12035
12036 {
12037 vm_region_extended_info_t extended;
12038 mach_msg_type_number_t original_count;
1c79356b 12039
2d21ac55 12040 extended = (vm_region_extended_info_t) info;
1c79356b 12041
2d21ac55 12042 vm_map_lock_read(map);
1c79356b 12043
2d21ac55
A
12044 start = *address;
12045 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12046 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12047 vm_map_unlock_read(map);
12048 return(KERN_INVALID_ADDRESS);
12049 }
12050 } else {
12051 entry = tmp_entry;
1c79356b 12052 }
2d21ac55 12053 start = entry->vme_start;
1c79356b 12054
2d21ac55 12055 extended->protection = entry->protection;
3e170ce0 12056 extended->user_tag = VME_ALIAS(entry);
2d21ac55
A
12057 extended->pages_resident = 0;
12058 extended->pages_swapped_out = 0;
12059 extended->pages_shared_now_private = 0;
12060 extended->pages_dirtied = 0;
12061 extended->external_pager = 0;
12062 extended->shadow_depth = 0;
1c79356b 12063
39236c6e
A
12064 original_count = *count;
12065 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
12066 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
12067 } else {
12068 extended->pages_reusable = 0;
12069 *count = VM_REGION_EXTENDED_INFO_COUNT;
12070 }
12071
3e170ce0 12072 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 12073
2d21ac55
A
12074 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
12075 extended->share_mode = SM_PRIVATE;
1c79356b 12076
2d21ac55
A
12077 if (object_name)
12078 *object_name = IP_NULL;
12079 *address = start;
12080 *size = (entry->vme_end - start);
1c79356b 12081
2d21ac55
A
12082 vm_map_unlock_read(map);
12083 return(KERN_SUCCESS);
1c79356b
A
12084 }
12085 case VM_REGION_TOP_INFO:
12086 {
2d21ac55 12087 vm_region_top_info_t top;
1c79356b 12088
2d21ac55
A
12089 if (*count < VM_REGION_TOP_INFO_COUNT)
12090 return(KERN_INVALID_ARGUMENT);
1c79356b 12091
2d21ac55
A
12092 top = (vm_region_top_info_t) info;
12093 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 12094
2d21ac55 12095 vm_map_lock_read(map);
1c79356b 12096
2d21ac55
A
12097 start = *address;
12098 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12099 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12100 vm_map_unlock_read(map);
12101 return(KERN_INVALID_ADDRESS);
12102 }
12103 } else {
12104 entry = tmp_entry;
1c79356b 12105
2d21ac55
A
12106 }
12107 start = entry->vme_start;
1c79356b 12108
2d21ac55
A
12109 top->private_pages_resident = 0;
12110 top->shared_pages_resident = 0;
1c79356b 12111
2d21ac55 12112 vm_map_region_top_walk(entry, top);
1c79356b 12113
2d21ac55
A
12114 if (object_name)
12115 *object_name = IP_NULL;
12116 *address = start;
12117 *size = (entry->vme_end - start);
1c79356b 12118
2d21ac55
A
12119 vm_map_unlock_read(map);
12120 return(KERN_SUCCESS);
1c79356b
A
12121 }
12122 default:
2d21ac55 12123 return(KERN_INVALID_ARGUMENT);
1c79356b
A
12124 }
12125}
12126
b0d623f7
A
12127#define OBJ_RESIDENT_COUNT(obj, entry_size) \
12128 MIN((entry_size), \
12129 ((obj)->all_reusable ? \
12130 (obj)->wired_page_count : \
12131 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 12132
0c530ab8 12133void
91447636
A
12134vm_map_region_top_walk(
12135 vm_map_entry_t entry,
12136 vm_region_top_info_t top)
1c79356b 12137{
1c79356b 12138
3e170ce0 12139 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
12140 top->share_mode = SM_EMPTY;
12141 top->ref_count = 0;
12142 top->obj_id = 0;
12143 return;
1c79356b 12144 }
2d21ac55 12145
91447636 12146 {
2d21ac55
A
12147 struct vm_object *obj, *tmp_obj;
12148 int ref_count;
12149 uint32_t entry_size;
1c79356b 12150
b0d623f7 12151 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 12152
3e170ce0 12153 obj = VME_OBJECT(entry);
1c79356b 12154
2d21ac55
A
12155 vm_object_lock(obj);
12156
12157 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12158 ref_count--;
12159
b0d623f7 12160 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
12161 if (obj->shadow) {
12162 if (ref_count == 1)
b0d623f7
A
12163 top->private_pages_resident =
12164 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 12165 else
b0d623f7
A
12166 top->shared_pages_resident =
12167 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12168 top->ref_count = ref_count;
12169 top->share_mode = SM_COW;
91447636 12170
2d21ac55
A
12171 while ((tmp_obj = obj->shadow)) {
12172 vm_object_lock(tmp_obj);
12173 vm_object_unlock(obj);
12174 obj = tmp_obj;
1c79356b 12175
2d21ac55
A
12176 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12177 ref_count--;
1c79356b 12178
b0d623f7
A
12179 assert(obj->reusable_page_count <= obj->resident_page_count);
12180 top->shared_pages_resident +=
12181 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12182 top->ref_count += ref_count - 1;
12183 }
1c79356b 12184 } else {
6d2010ae
A
12185 if (entry->superpage_size) {
12186 top->share_mode = SM_LARGE_PAGE;
12187 top->shared_pages_resident = 0;
12188 top->private_pages_resident = entry_size;
12189 } else if (entry->needs_copy) {
2d21ac55 12190 top->share_mode = SM_COW;
b0d623f7
A
12191 top->shared_pages_resident =
12192 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12193 } else {
12194 if (ref_count == 1 ||
12195 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
12196 top->share_mode = SM_PRIVATE;
39236c6e
A
12197 top->private_pages_resident =
12198 OBJ_RESIDENT_COUNT(obj,
12199 entry_size);
2d21ac55
A
12200 } else {
12201 top->share_mode = SM_SHARED;
b0d623f7
A
12202 top->shared_pages_resident =
12203 OBJ_RESIDENT_COUNT(obj,
12204 entry_size);
2d21ac55
A
12205 }
12206 }
12207 top->ref_count = ref_count;
1c79356b 12208 }
b0d623f7 12209 /* XXX K64: obj_id will be truncated */
39236c6e 12210 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 12211
2d21ac55 12212 vm_object_unlock(obj);
1c79356b 12213 }
91447636
A
12214}
12215
0c530ab8 12216void
91447636
A
12217vm_map_region_walk(
12218 vm_map_t map,
2d21ac55
A
12219 vm_map_offset_t va,
12220 vm_map_entry_t entry,
91447636
A
12221 vm_object_offset_t offset,
12222 vm_object_size_t range,
2d21ac55 12223 vm_region_extended_info_t extended,
39236c6e
A
12224 boolean_t look_for_pages,
12225 mach_msg_type_number_t count)
91447636 12226{
39037602
A
12227 struct vm_object *obj, *tmp_obj;
12228 vm_map_offset_t last_offset;
12229 int i;
12230 int ref_count;
91447636
A
12231 struct vm_object *shadow_object;
12232 int shadow_depth;
12233
3e170ce0 12234 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 12235 (entry->is_sub_map) ||
3e170ce0 12236 (VME_OBJECT(entry)->phys_contiguous &&
6d2010ae 12237 !entry->superpage_size)) {
2d21ac55
A
12238 extended->share_mode = SM_EMPTY;
12239 extended->ref_count = 0;
12240 return;
1c79356b 12241 }
6d2010ae
A
12242
12243 if (entry->superpage_size) {
12244 extended->shadow_depth = 0;
12245 extended->share_mode = SM_LARGE_PAGE;
12246 extended->ref_count = 1;
12247 extended->external_pager = 0;
12248 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
12249 extended->shadow_depth = 0;
12250 return;
12251 }
12252
39037602 12253 obj = VME_OBJECT(entry);
2d21ac55 12254
39037602 12255 vm_object_lock(obj);
2d21ac55 12256
39037602
A
12257 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12258 ref_count--;
2d21ac55 12259
39037602
A
12260 if (look_for_pages) {
12261 for (last_offset = offset + range;
12262 offset < last_offset;
12263 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
12264#if DEVELOPMENT || DEBUG
12265 if (vm_region_footprint) {
12266 if (obj->purgable != VM_PURGABLE_DENY) {
12267 /* alternate accounting */
12268 } else if (entry->iokit_acct) {
12269 /* alternate accounting */
12270 extended->pages_resident++;
12271 extended->pages_dirtied++;
12272 } else {
12273 int disp;
12274
12275 disp = 0;
12276 pmap_query_page_info(map->pmap, va, &disp);
12277 if (disp & PMAP_QUERY_PAGE_PRESENT) {
12278 extended->pages_resident++;
12279 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
12280 extended->pages_reusable++;
12281 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
12282 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
12283 /* alternate accounting */
12284 } else {
12285 extended->pages_dirtied++;
12286 }
12287 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
12288 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
12289 /* alternate accounting */
12290 } else {
12291 extended->pages_swapped_out++;
12292 }
b0d623f7 12293 }
2d21ac55 12294 }
39037602 12295 continue;
2d21ac55 12296 }
39037602
A
12297#endif /* DEVELOPMENT || DEBUG */
12298 vm_map_region_look_for_page(map, va, obj,
12299 offset, ref_count,
12300 0, extended, count);
2d21ac55 12301 }
39037602
A
12302#if DEVELOPMENT || DEBUG
12303 if (vm_region_footprint) {
12304 goto collect_object_info;
12305 }
12306#endif /* DEVELOPMENT || DEBUG */
12307 } else {
12308#if DEVELOPMENT || DEBUG
12309 collect_object_info:
12310#endif /* DEVELOPMENT || DEBUG */
12311 shadow_object = obj->shadow;
12312 shadow_depth = 0;
2d21ac55 12313
39037602
A
12314 if ( !(obj->pager_trusted) && !(obj->internal))
12315 extended->external_pager = 1;
12316
12317 if (shadow_object != VM_OBJECT_NULL) {
12318 vm_object_lock(shadow_object);
12319 for (;
12320 shadow_object != VM_OBJECT_NULL;
12321 shadow_depth++) {
12322 vm_object_t next_shadow;
12323
12324 if ( !(shadow_object->pager_trusted) &&
12325 !(shadow_object->internal))
12326 extended->external_pager = 1;
12327
12328 next_shadow = shadow_object->shadow;
12329 if (next_shadow) {
12330 vm_object_lock(next_shadow);
12331 }
12332 vm_object_unlock(shadow_object);
12333 shadow_object = next_shadow;
2d21ac55 12334 }
91447636 12335 }
39037602
A
12336 extended->shadow_depth = shadow_depth;
12337 }
1c79356b 12338
39037602
A
12339 if (extended->shadow_depth || entry->needs_copy)
12340 extended->share_mode = SM_COW;
12341 else {
12342 if (ref_count == 1)
12343 extended->share_mode = SM_PRIVATE;
12344 else {
12345 if (obj->true_share)
12346 extended->share_mode = SM_TRUESHARED;
12347 else
12348 extended->share_mode = SM_SHARED;
2d21ac55 12349 }
39037602
A
12350 }
12351 extended->ref_count = ref_count - extended->shadow_depth;
12352
12353 for (i = 0; i < extended->shadow_depth; i++) {
12354 if ((tmp_obj = obj->shadow) == 0)
12355 break;
12356 vm_object_lock(tmp_obj);
2d21ac55 12357 vm_object_unlock(obj);
1c79356b 12358
39037602
A
12359 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
12360 ref_count--;
12361
12362 extended->ref_count += ref_count;
12363 obj = tmp_obj;
12364 }
12365 vm_object_unlock(obj);
91447636 12366
39037602
A
12367 if (extended->share_mode == SM_SHARED) {
12368 vm_map_entry_t cur;
12369 vm_map_entry_t last;
12370 int my_refs;
91447636 12371
39037602
A
12372 obj = VME_OBJECT(entry);
12373 last = vm_map_to_entry(map);
12374 my_refs = 0;
91447636 12375
39037602
A
12376 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12377 ref_count--;
12378 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
12379 my_refs += vm_map_region_count_obj_refs(cur, obj);
12380
12381 if (my_refs == ref_count)
12382 extended->share_mode = SM_PRIVATE_ALIASED;
12383 else if (my_refs > 1)
12384 extended->share_mode = SM_SHARED_ALIASED;
91447636 12385 }
1c79356b
A
12386}
12387
1c79356b 12388
91447636
A
12389/* object is locked on entry and locked on return */
12390
12391
12392static void
12393vm_map_region_look_for_page(
12394 __unused vm_map_t map,
2d21ac55
A
12395 __unused vm_map_offset_t va,
12396 vm_object_t object,
12397 vm_object_offset_t offset,
91447636
A
12398 int max_refcnt,
12399 int depth,
39236c6e
A
12400 vm_region_extended_info_t extended,
12401 mach_msg_type_number_t count)
1c79356b 12402{
39037602
A
12403 vm_page_t p;
12404 vm_object_t shadow;
12405 int ref_count;
12406 vm_object_t caller_object;
12407
91447636
A
12408 shadow = object->shadow;
12409 caller_object = object;
1c79356b 12410
91447636
A
12411
12412 while (TRUE) {
1c79356b 12413
91447636 12414 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 12415 extended->external_pager = 1;
1c79356b 12416
91447636
A
12417 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12418 if (shadow && (max_refcnt == 1))
12419 extended->pages_shared_now_private++;
1c79356b 12420
39236c6e 12421 if (!p->fictitious &&
39037602 12422 (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
91447636 12423 extended->pages_dirtied++;
39236c6e 12424 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
39037602 12425 if (p->reusable || object->all_reusable) {
39236c6e
A
12426 extended->pages_reusable++;
12427 }
12428 }
1c79356b 12429
39236c6e 12430 extended->pages_resident++;
91447636
A
12431
12432 if(object != caller_object)
2d21ac55 12433 vm_object_unlock(object);
91447636
A
12434
12435 return;
1c79356b 12436 }
39236c6e
A
12437 if (object->internal &&
12438 object->alive &&
12439 !object->terminating &&
12440 object->pager_ready) {
12441
39037602
A
12442 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
12443 == VM_EXTERNAL_STATE_EXISTS) {
12444 /* the pager has that page */
12445 extended->pages_swapped_out++;
12446 if (object != caller_object)
12447 vm_object_unlock(object);
12448 return;
2d21ac55 12449 }
1c79356b 12450 }
2d21ac55 12451
91447636 12452 if (shadow) {
2d21ac55 12453 vm_object_lock(shadow);
1c79356b 12454
91447636
A
12455 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12456 ref_count--;
1c79356b 12457
91447636
A
12458 if (++depth > extended->shadow_depth)
12459 extended->shadow_depth = depth;
1c79356b 12460
91447636
A
12461 if (ref_count > max_refcnt)
12462 max_refcnt = ref_count;
12463
12464 if(object != caller_object)
2d21ac55 12465 vm_object_unlock(object);
91447636 12466
6d2010ae 12467 offset = offset + object->vo_shadow_offset;
91447636
A
12468 object = shadow;
12469 shadow = object->shadow;
12470 continue;
1c79356b 12471 }
91447636 12472 if(object != caller_object)
2d21ac55 12473 vm_object_unlock(object);
91447636
A
12474 break;
12475 }
12476}
1c79356b 12477
91447636
A
12478static int
12479vm_map_region_count_obj_refs(
12480 vm_map_entry_t entry,
12481 vm_object_t object)
12482{
39037602
A
12483 int ref_count;
12484 vm_object_t chk_obj;
12485 vm_object_t tmp_obj;
1c79356b 12486
3e170ce0 12487 if (VME_OBJECT(entry) == 0)
2d21ac55 12488 return(0);
1c79356b 12489
91447636 12490 if (entry->is_sub_map)
2d21ac55 12491 return(0);
91447636 12492 else {
2d21ac55 12493 ref_count = 0;
1c79356b 12494
3e170ce0 12495 chk_obj = VME_OBJECT(entry);
2d21ac55 12496 vm_object_lock(chk_obj);
1c79356b 12497
2d21ac55
A
12498 while (chk_obj) {
12499 if (chk_obj == object)
12500 ref_count++;
12501 tmp_obj = chk_obj->shadow;
12502 if (tmp_obj)
12503 vm_object_lock(tmp_obj);
12504 vm_object_unlock(chk_obj);
1c79356b 12505
2d21ac55
A
12506 chk_obj = tmp_obj;
12507 }
1c79356b 12508 }
91447636 12509 return(ref_count);
1c79356b
A
12510}
12511
12512
12513/*
91447636
A
12514 * Routine: vm_map_simplify
12515 *
12516 * Description:
12517 * Attempt to simplify the map representation in
12518 * the vicinity of the given starting address.
12519 * Note:
12520 * This routine is intended primarily to keep the
12521 * kernel maps more compact -- they generally don't
12522 * benefit from the "expand a map entry" technology
12523 * at allocation time because the adjacent entry
12524 * is often wired down.
1c79356b 12525 */
91447636
A
12526void
12527vm_map_simplify_entry(
12528 vm_map_t map,
12529 vm_map_entry_t this_entry)
1c79356b 12530{
91447636 12531 vm_map_entry_t prev_entry;
1c79356b 12532
91447636 12533 counter(c_vm_map_simplify_entry_called++);
1c79356b 12534
91447636 12535 prev_entry = this_entry->vme_prev;
1c79356b 12536
91447636 12537 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 12538 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 12539
91447636 12540 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 12541
2d21ac55 12542 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
12543 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12544 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
91447636 12545 prev_entry->vme_start))
3e170ce0 12546 == VME_OFFSET(this_entry)) &&
1c79356b 12547
fe8ab488
A
12548 (prev_entry->behavior == this_entry->behavior) &&
12549 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
12550 (prev_entry->protection == this_entry->protection) &&
12551 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
12552 (prev_entry->inheritance == this_entry->inheritance) &&
12553 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 12554 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 12555 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
12556 (prev_entry->permanent == this_entry->permanent) &&
12557 (prev_entry->map_aligned == this_entry->map_aligned) &&
12558 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12559 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12560 /* from_reserved_zone: OK if that field doesn't match */
12561 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0
A
12562 (prev_entry->vme_resilient_codesign ==
12563 this_entry->vme_resilient_codesign) &&
12564 (prev_entry->vme_resilient_media ==
12565 this_entry->vme_resilient_media) &&
fe8ab488 12566
91447636
A
12567 (prev_entry->wired_count == this_entry->wired_count) &&
12568 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 12569
39037602 12570 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
12571 (prev_entry->in_transition == FALSE) &&
12572 (this_entry->in_transition == FALSE) &&
12573 (prev_entry->needs_wakeup == FALSE) &&
12574 (this_entry->needs_wakeup == FALSE) &&
12575 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
12576 (this_entry->is_shared == FALSE) &&
12577 (prev_entry->superpage_size == FALSE) &&
12578 (this_entry->superpage_size == FALSE)
2d21ac55 12579 ) {
316670eb 12580 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 12581 assert(prev_entry->vme_start < this_entry->vme_end);
39236c6e
A
12582 if (prev_entry->map_aligned)
12583 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12584 VM_MAP_PAGE_MASK(map)));
91447636 12585 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
12586 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12587
12588 if (map->holelistenabled) {
12589 vm_map_store_update_first_free(map, this_entry, TRUE);
12590 }
12591
2d21ac55 12592 if (prev_entry->is_sub_map) {
3e170ce0 12593 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 12594 } else {
3e170ce0 12595 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 12596 }
91447636 12597 vm_map_entry_dispose(map, prev_entry);
0c530ab8 12598 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 12599 counter(c_vm_map_simplified++);
1c79356b 12600 }
91447636 12601}
1c79356b 12602
91447636
A
12603void
12604vm_map_simplify(
12605 vm_map_t map,
12606 vm_map_offset_t start)
12607{
12608 vm_map_entry_t this_entry;
1c79356b 12609
91447636
A
12610 vm_map_lock(map);
12611 if (vm_map_lookup_entry(map, start, &this_entry)) {
12612 vm_map_simplify_entry(map, this_entry);
12613 vm_map_simplify_entry(map, this_entry->vme_next);
12614 }
12615 counter(c_vm_map_simplify_called++);
12616 vm_map_unlock(map);
12617}
1c79356b 12618
91447636
A
12619static void
12620vm_map_simplify_range(
12621 vm_map_t map,
12622 vm_map_offset_t start,
12623 vm_map_offset_t end)
12624{
12625 vm_map_entry_t entry;
1c79356b 12626
91447636
A
12627 /*
12628 * The map should be locked (for "write") by the caller.
12629 */
1c79356b 12630
91447636
A
12631 if (start >= end) {
12632 /* invalid address range */
12633 return;
12634 }
1c79356b 12635
39236c6e
A
12636 start = vm_map_trunc_page(start,
12637 VM_MAP_PAGE_MASK(map));
12638 end = vm_map_round_page(end,
12639 VM_MAP_PAGE_MASK(map));
2d21ac55 12640
91447636
A
12641 if (!vm_map_lookup_entry(map, start, &entry)) {
12642 /* "start" is not mapped and "entry" ends before "start" */
12643 if (entry == vm_map_to_entry(map)) {
12644 /* start with first entry in the map */
12645 entry = vm_map_first_entry(map);
12646 } else {
12647 /* start with next entry */
12648 entry = entry->vme_next;
12649 }
12650 }
12651
12652 while (entry != vm_map_to_entry(map) &&
12653 entry->vme_start <= end) {
12654 /* try and coalesce "entry" with its previous entry */
12655 vm_map_simplify_entry(map, entry);
12656 entry = entry->vme_next;
12657 }
12658}
1c79356b 12659
1c79356b 12660
91447636
A
12661/*
12662 * Routine: vm_map_machine_attribute
12663 * Purpose:
12664 * Provide machine-specific attributes to mappings,
12665 * such as cachability etc. for machines that provide
12666 * them. NUMA architectures and machines with big/strange
12667 * caches will use this.
12668 * Note:
12669 * Responsibilities for locking and checking are handled here,
12670 * everything else in the pmap module. If any non-volatile
12671 * information must be kept, the pmap module should handle
12672 * it itself. [This assumes that attributes do not
12673 * need to be inherited, which seems ok to me]
12674 */
12675kern_return_t
12676vm_map_machine_attribute(
12677 vm_map_t map,
12678 vm_map_offset_t start,
12679 vm_map_offset_t end,
12680 vm_machine_attribute_t attribute,
12681 vm_machine_attribute_val_t* value) /* IN/OUT */
12682{
12683 kern_return_t ret;
12684 vm_map_size_t sync_size;
12685 vm_map_entry_t entry;
12686
12687 if (start < vm_map_min(map) || end > vm_map_max(map))
12688 return KERN_INVALID_ADDRESS;
1c79356b 12689
91447636
A
12690 /* Figure how much memory we need to flush (in page increments) */
12691 sync_size = end - start;
1c79356b 12692
91447636
A
12693 vm_map_lock(map);
12694
12695 if (attribute != MATTR_CACHE) {
12696 /* If we don't have to find physical addresses, we */
12697 /* don't have to do an explicit traversal here. */
12698 ret = pmap_attribute(map->pmap, start, end-start,
12699 attribute, value);
12700 vm_map_unlock(map);
12701 return ret;
12702 }
1c79356b 12703
91447636 12704 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 12705
91447636
A
12706 while(sync_size) {
12707 if (vm_map_lookup_entry(map, start, &entry)) {
12708 vm_map_size_t sub_size;
12709 if((entry->vme_end - start) > sync_size) {
12710 sub_size = sync_size;
12711 sync_size = 0;
12712 } else {
12713 sub_size = entry->vme_end - start;
2d21ac55 12714 sync_size -= sub_size;
91447636
A
12715 }
12716 if(entry->is_sub_map) {
12717 vm_map_offset_t sub_start;
12718 vm_map_offset_t sub_end;
1c79356b 12719
91447636 12720 sub_start = (start - entry->vme_start)
3e170ce0 12721 + VME_OFFSET(entry);
91447636
A
12722 sub_end = sub_start + sub_size;
12723 vm_map_machine_attribute(
3e170ce0 12724 VME_SUBMAP(entry),
91447636
A
12725 sub_start,
12726 sub_end,
12727 attribute, value);
12728 } else {
3e170ce0 12729 if (VME_OBJECT(entry)) {
91447636
A
12730 vm_page_t m;
12731 vm_object_t object;
12732 vm_object_t base_object;
12733 vm_object_t last_object;
12734 vm_object_offset_t offset;
12735 vm_object_offset_t base_offset;
12736 vm_map_size_t range;
12737 range = sub_size;
12738 offset = (start - entry->vme_start)
3e170ce0 12739 + VME_OFFSET(entry);
91447636 12740 base_offset = offset;
3e170ce0 12741 object = VME_OBJECT(entry);
91447636
A
12742 base_object = object;
12743 last_object = NULL;
1c79356b 12744
91447636 12745 vm_object_lock(object);
1c79356b 12746
91447636
A
12747 while (range) {
12748 m = vm_page_lookup(
12749 object, offset);
1c79356b 12750
91447636
A
12751 if (m && !m->fictitious) {
12752 ret =
2d21ac55 12753 pmap_attribute_cache_sync(
39037602 12754 VM_PAGE_GET_PHYS_PAGE(m),
2d21ac55
A
12755 PAGE_SIZE,
12756 attribute, value);
91447636
A
12757
12758 } else if (object->shadow) {
6d2010ae 12759 offset = offset + object->vo_shadow_offset;
91447636
A
12760 last_object = object;
12761 object = object->shadow;
12762 vm_object_lock(last_object->shadow);
12763 vm_object_unlock(last_object);
12764 continue;
12765 }
12766 range -= PAGE_SIZE;
1c79356b 12767
91447636
A
12768 if (base_object != object) {
12769 vm_object_unlock(object);
12770 vm_object_lock(base_object);
12771 object = base_object;
12772 }
12773 /* Bump to the next page */
12774 base_offset += PAGE_SIZE;
12775 offset = base_offset;
12776 }
12777 vm_object_unlock(object);
12778 }
12779 }
12780 start += sub_size;
12781 } else {
12782 vm_map_unlock(map);
12783 return KERN_FAILURE;
12784 }
12785
1c79356b 12786 }
e5568f75 12787
91447636 12788 vm_map_unlock(map);
e5568f75 12789
91447636
A
12790 return ret;
12791}
e5568f75 12792
91447636
A
12793/*
12794 * vm_map_behavior_set:
12795 *
12796 * Sets the paging reference behavior of the specified address
12797 * range in the target map. Paging reference behavior affects
12798 * how pagein operations resulting from faults on the map will be
12799 * clustered.
12800 */
12801kern_return_t
12802vm_map_behavior_set(
12803 vm_map_t map,
12804 vm_map_offset_t start,
12805 vm_map_offset_t end,
12806 vm_behavior_t new_behavior)
12807{
39037602 12808 vm_map_entry_t entry;
91447636 12809 vm_map_entry_t temp_entry;
e5568f75 12810
91447636 12811 XPR(XPR_VM_MAP,
2d21ac55 12812 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 12813 map, start, end, new_behavior, 0);
e5568f75 12814
6d2010ae
A
12815 if (start > end ||
12816 start < vm_map_min(map) ||
12817 end > vm_map_max(map)) {
12818 return KERN_NO_SPACE;
12819 }
12820
91447636 12821 switch (new_behavior) {
b0d623f7
A
12822
12823 /*
12824 * This first block of behaviors all set a persistent state on the specified
12825 * memory range. All we have to do here is to record the desired behavior
12826 * in the vm_map_entry_t's.
12827 */
12828
91447636
A
12829 case VM_BEHAVIOR_DEFAULT:
12830 case VM_BEHAVIOR_RANDOM:
12831 case VM_BEHAVIOR_SEQUENTIAL:
12832 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
12833 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12834 vm_map_lock(map);
12835
12836 /*
12837 * The entire address range must be valid for the map.
12838 * Note that vm_map_range_check() does a
12839 * vm_map_lookup_entry() internally and returns the
12840 * entry containing the start of the address range if
12841 * the entire range is valid.
12842 */
12843 if (vm_map_range_check(map, start, end, &temp_entry)) {
12844 entry = temp_entry;
12845 vm_map_clip_start(map, entry, start);
12846 }
12847 else {
12848 vm_map_unlock(map);
12849 return(KERN_INVALID_ADDRESS);
12850 }
12851
12852 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12853 vm_map_clip_end(map, entry, end);
fe8ab488
A
12854 if (entry->is_sub_map) {
12855 assert(!entry->use_pmap);
12856 }
b0d623f7
A
12857
12858 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12859 entry->zero_wired_pages = TRUE;
12860 } else {
12861 entry->behavior = new_behavior;
12862 }
12863 entry = entry->vme_next;
12864 }
12865
12866 vm_map_unlock(map);
91447636 12867 break;
b0d623f7
A
12868
12869 /*
12870 * The rest of these are different from the above in that they cause
12871 * an immediate action to take place as opposed to setting a behavior that
12872 * affects future actions.
12873 */
12874
91447636 12875 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
12876 return vm_map_willneed(map, start, end);
12877
91447636 12878 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
12879 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12880
12881 case VM_BEHAVIOR_FREE:
12882 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12883
12884 case VM_BEHAVIOR_REUSABLE:
12885 return vm_map_reusable_pages(map, start, end);
12886
12887 case VM_BEHAVIOR_REUSE:
12888 return vm_map_reuse_pages(map, start, end);
12889
12890 case VM_BEHAVIOR_CAN_REUSE:
12891 return vm_map_can_reuse(map, start, end);
12892
3e170ce0
A
12893#if MACH_ASSERT
12894 case VM_BEHAVIOR_PAGEOUT:
12895 return vm_map_pageout(map, start, end);
12896#endif /* MACH_ASSERT */
12897
1c79356b 12898 default:
91447636 12899 return(KERN_INVALID_ARGUMENT);
1c79356b 12900 }
1c79356b 12901
b0d623f7
A
12902 return(KERN_SUCCESS);
12903}
12904
12905
12906/*
12907 * Internals for madvise(MADV_WILLNEED) system call.
12908 *
12909 * The present implementation is to do a read-ahead if the mapping corresponds
12910 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
12911 * and basically ignore the "advice" (which we are always free to do).
12912 */
12913
12914
12915static kern_return_t
12916vm_map_willneed(
12917 vm_map_t map,
12918 vm_map_offset_t start,
12919 vm_map_offset_t end
12920)
12921{
12922 vm_map_entry_t entry;
12923 vm_object_t object;
12924 memory_object_t pager;
12925 struct vm_object_fault_info fault_info;
12926 kern_return_t kr;
12927 vm_object_size_t len;
12928 vm_object_offset_t offset;
1c79356b 12929
91447636 12930 /*
b0d623f7
A
12931 * Fill in static values in fault_info. Several fields get ignored by the code
12932 * we call, but we'll fill them in anyway since uninitialized fields are bad
12933 * when it comes to future backwards compatibility.
91447636 12934 */
b0d623f7
A
12935
12936 fault_info.interruptible = THREAD_UNINT; /* ignored value */
12937 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
12938 fault_info.no_cache = FALSE; /* ignored value */
12939 fault_info.stealth = TRUE;
6d2010ae
A
12940 fault_info.io_sync = FALSE;
12941 fault_info.cs_bypass = FALSE;
0b4c1975 12942 fault_info.mark_zf_absent = FALSE;
316670eb 12943 fault_info.batch_pmap_op = FALSE;
b0d623f7
A
12944
12945 /*
12946 * The MADV_WILLNEED operation doesn't require any changes to the
12947 * vm_map_entry_t's, so the read lock is sufficient.
12948 */
12949
12950 vm_map_lock_read(map);
12951
12952 /*
12953 * The madvise semantics require that the address range be fully
12954 * allocated with no holes. Otherwise, we're required to return
12955 * an error.
12956 */
12957
6d2010ae
A
12958 if (! vm_map_range_check(map, start, end, &entry)) {
12959 vm_map_unlock_read(map);
12960 return KERN_INVALID_ADDRESS;
12961 }
b0d623f7 12962
6d2010ae
A
12963 /*
12964 * Examine each vm_map_entry_t in the range.
12965 */
12966 for (; entry != vm_map_to_entry(map) && start < end; ) {
12967
b0d623f7 12968 /*
6d2010ae
A
12969 * The first time through, the start address could be anywhere
12970 * within the vm_map_entry we found. So adjust the offset to
12971 * correspond. After that, the offset will always be zero to
12972 * correspond to the beginning of the current vm_map_entry.
b0d623f7 12973 */
3e170ce0 12974 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 12975
6d2010ae
A
12976 /*
12977 * Set the length so we don't go beyond the end of the
12978 * map_entry or beyond the end of the range we were given.
12979 * This range could span also multiple map entries all of which
12980 * map different files, so make sure we only do the right amount
12981 * of I/O for each object. Note that it's possible for there
12982 * to be multiple map entries all referring to the same object
12983 * but with different page permissions, but it's not worth
12984 * trying to optimize that case.
12985 */
12986 len = MIN(entry->vme_end - start, end - start);
b0d623f7 12987
6d2010ae
A
12988 if ((vm_size_t) len != len) {
12989 /* 32-bit overflow */
12990 len = (vm_size_t) (0 - PAGE_SIZE);
12991 }
12992 fault_info.cluster_size = (vm_size_t) len;
12993 fault_info.lo_offset = offset;
12994 fault_info.hi_offset = offset + len;
3e170ce0 12995 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
12996 fault_info.pmap_options = 0;
12997 if (entry->iokit_acct ||
12998 (!entry->is_sub_map && !entry->use_pmap)) {
12999 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13000 }
b0d623f7 13001
6d2010ae
A
13002 /*
13003 * If there's no read permission to this mapping, then just
13004 * skip it.
13005 */
13006 if ((entry->protection & VM_PROT_READ) == 0) {
13007 entry = entry->vme_next;
13008 start = entry->vme_start;
13009 continue;
13010 }
b0d623f7 13011
6d2010ae
A
13012 /*
13013 * Find the file object backing this map entry. If there is
13014 * none, then we simply ignore the "will need" advice for this
13015 * entry and go on to the next one.
13016 */
13017 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
13018 entry = entry->vme_next;
13019 start = entry->vme_start;
13020 continue;
13021 }
b0d623f7 13022
6d2010ae
A
13023 /*
13024 * The data_request() could take a long time, so let's
13025 * release the map lock to avoid blocking other threads.
13026 */
13027 vm_map_unlock_read(map);
b0d623f7 13028
6d2010ae
A
13029 vm_object_paging_begin(object);
13030 pager = object->pager;
13031 vm_object_unlock(object);
b0d623f7 13032
6d2010ae
A
13033 /*
13034 * Get the data from the object asynchronously.
13035 *
13036 * Note that memory_object_data_request() places limits on the
13037 * amount of I/O it will do. Regardless of the len we
fe8ab488 13038 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
6d2010ae
A
13039 * silently truncates the len to that size. This isn't
13040 * necessarily bad since madvise shouldn't really be used to
13041 * page in unlimited amounts of data. Other Unix variants
13042 * limit the willneed case as well. If this turns out to be an
13043 * issue for developers, then we can always adjust the policy
13044 * here and still be backwards compatible since this is all
13045 * just "advice".
13046 */
13047 kr = memory_object_data_request(
13048 pager,
13049 offset + object->paging_offset,
13050 0, /* ignored */
13051 VM_PROT_READ,
13052 (memory_object_fault_info_t)&fault_info);
b0d623f7 13053
6d2010ae
A
13054 vm_object_lock(object);
13055 vm_object_paging_end(object);
13056 vm_object_unlock(object);
b0d623f7 13057
6d2010ae
A
13058 /*
13059 * If we couldn't do the I/O for some reason, just give up on
13060 * the madvise. We still return success to the user since
13061 * madvise isn't supposed to fail when the advice can't be
13062 * taken.
13063 */
13064 if (kr != KERN_SUCCESS) {
13065 return KERN_SUCCESS;
13066 }
b0d623f7 13067
6d2010ae
A
13068 start += len;
13069 if (start >= end) {
13070 /* done */
13071 return KERN_SUCCESS;
13072 }
b0d623f7 13073
6d2010ae
A
13074 /* look up next entry */
13075 vm_map_lock_read(map);
13076 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 13077 /*
6d2010ae 13078 * There's a new hole in the address range.
b0d623f7 13079 */
6d2010ae
A
13080 vm_map_unlock_read(map);
13081 return KERN_INVALID_ADDRESS;
b0d623f7 13082 }
6d2010ae 13083 }
b0d623f7
A
13084
13085 vm_map_unlock_read(map);
6d2010ae 13086 return KERN_SUCCESS;
b0d623f7
A
13087}
13088
13089static boolean_t
13090vm_map_entry_is_reusable(
13091 vm_map_entry_t entry)
13092{
3e170ce0
A
13093 /* Only user map entries */
13094
b0d623f7
A
13095 vm_object_t object;
13096
2dced7af
A
13097 if (entry->is_sub_map) {
13098 return FALSE;
13099 }
13100
3e170ce0 13101 switch (VME_ALIAS(entry)) {
39236c6e
A
13102 case VM_MEMORY_MALLOC:
13103 case VM_MEMORY_MALLOC_SMALL:
13104 case VM_MEMORY_MALLOC_LARGE:
13105 case VM_MEMORY_REALLOC:
13106 case VM_MEMORY_MALLOC_TINY:
13107 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
13108 case VM_MEMORY_MALLOC_LARGE_REUSED:
13109 /*
13110 * This is a malloc() memory region: check if it's still
13111 * in its original state and can be re-used for more
13112 * malloc() allocations.
13113 */
13114 break;
13115 default:
13116 /*
13117 * Not a malloc() memory region: let the caller decide if
13118 * it's re-usable.
13119 */
13120 return TRUE;
13121 }
13122
b0d623f7
A
13123 if (entry->is_shared ||
13124 entry->is_sub_map ||
13125 entry->in_transition ||
13126 entry->protection != VM_PROT_DEFAULT ||
13127 entry->max_protection != VM_PROT_ALL ||
13128 entry->inheritance != VM_INHERIT_DEFAULT ||
13129 entry->no_cache ||
13130 entry->permanent ||
39236c6e 13131 entry->superpage_size != FALSE ||
b0d623f7
A
13132 entry->zero_wired_pages ||
13133 entry->wired_count != 0 ||
13134 entry->user_wired_count != 0) {
13135 return FALSE;
91447636 13136 }
b0d623f7 13137
3e170ce0 13138 object = VME_OBJECT(entry);
b0d623f7
A
13139 if (object == VM_OBJECT_NULL) {
13140 return TRUE;
13141 }
316670eb
A
13142 if (
13143#if 0
13144 /*
13145 * Let's proceed even if the VM object is potentially
13146 * shared.
13147 * We check for this later when processing the actual
13148 * VM pages, so the contents will be safe if shared.
13149 *
13150 * But we can still mark this memory region as "reusable" to
13151 * acknowledge that the caller did let us know that the memory
13152 * could be re-used and should not be penalized for holding
13153 * on to it. This allows its "resident size" to not include
13154 * the reusable range.
13155 */
13156 object->ref_count == 1 &&
13157#endif
b0d623f7
A
13158 object->wired_page_count == 0 &&
13159 object->copy == VM_OBJECT_NULL &&
13160 object->shadow == VM_OBJECT_NULL &&
13161 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
13162 object->internal &&
13163 !object->true_share &&
6d2010ae 13164 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
13165 !object->code_signed) {
13166 return TRUE;
1c79356b 13167 }
b0d623f7
A
13168 return FALSE;
13169
13170
13171}
1c79356b 13172
b0d623f7
A
13173static kern_return_t
13174vm_map_reuse_pages(
13175 vm_map_t map,
13176 vm_map_offset_t start,
13177 vm_map_offset_t end)
13178{
13179 vm_map_entry_t entry;
13180 vm_object_t object;
13181 vm_object_offset_t start_offset, end_offset;
13182
13183 /*
13184 * The MADV_REUSE operation doesn't require any changes to the
13185 * vm_map_entry_t's, so the read lock is sufficient.
13186 */
0b4e3aa0 13187
b0d623f7 13188 vm_map_lock_read(map);
3e170ce0 13189 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 13190
b0d623f7
A
13191 /*
13192 * The madvise semantics require that the address range be fully
13193 * allocated with no holes. Otherwise, we're required to return
13194 * an error.
13195 */
13196
13197 if (!vm_map_range_check(map, start, end, &entry)) {
13198 vm_map_unlock_read(map);
13199 vm_page_stats_reusable.reuse_pages_failure++;
13200 return KERN_INVALID_ADDRESS;
1c79356b 13201 }
91447636 13202
b0d623f7
A
13203 /*
13204 * Examine each vm_map_entry_t in the range.
13205 */
13206 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13207 entry = entry->vme_next) {
13208 /*
13209 * Sanity check on the VM map entry.
13210 */
13211 if (! vm_map_entry_is_reusable(entry)) {
13212 vm_map_unlock_read(map);
13213 vm_page_stats_reusable.reuse_pages_failure++;
13214 return KERN_INVALID_ADDRESS;
13215 }
13216
13217 /*
13218 * The first time through, the start address could be anywhere
13219 * within the vm_map_entry we found. So adjust the offset to
13220 * correspond.
13221 */
13222 if (entry->vme_start < start) {
13223 start_offset = start - entry->vme_start;
13224 } else {
13225 start_offset = 0;
13226 }
13227 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
13228 start_offset += VME_OFFSET(entry);
13229 end_offset += VME_OFFSET(entry);
b0d623f7 13230
2dced7af 13231 assert(!entry->is_sub_map);
3e170ce0 13232 object = VME_OBJECT(entry);
b0d623f7
A
13233 if (object != VM_OBJECT_NULL) {
13234 vm_object_lock(object);
13235 vm_object_reuse_pages(object, start_offset, end_offset,
13236 TRUE);
13237 vm_object_unlock(object);
13238 }
13239
3e170ce0 13240 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
13241 /*
13242 * XXX
13243 * We do not hold the VM map exclusively here.
13244 * The "alias" field is not that critical, so it's
13245 * safe to update it here, as long as it is the only
13246 * one that can be modified while holding the VM map
13247 * "shared".
13248 */
3e170ce0 13249 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
13250 }
13251 }
13252
13253 vm_map_unlock_read(map);
13254 vm_page_stats_reusable.reuse_pages_success++;
13255 return KERN_SUCCESS;
1c79356b
A
13256}
13257
1c79356b 13258
b0d623f7
A
13259static kern_return_t
13260vm_map_reusable_pages(
13261 vm_map_t map,
13262 vm_map_offset_t start,
13263 vm_map_offset_t end)
13264{
13265 vm_map_entry_t entry;
13266 vm_object_t object;
13267 vm_object_offset_t start_offset, end_offset;
3e170ce0 13268 vm_map_offset_t pmap_offset;
b0d623f7
A
13269
13270 /*
13271 * The MADV_REUSABLE operation doesn't require any changes to the
13272 * vm_map_entry_t's, so the read lock is sufficient.
13273 */
13274
13275 vm_map_lock_read(map);
3e170ce0 13276 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13277
13278 /*
13279 * The madvise semantics require that the address range be fully
13280 * allocated with no holes. Otherwise, we're required to return
13281 * an error.
13282 */
13283
13284 if (!vm_map_range_check(map, start, end, &entry)) {
13285 vm_map_unlock_read(map);
13286 vm_page_stats_reusable.reusable_pages_failure++;
13287 return KERN_INVALID_ADDRESS;
13288 }
13289
13290 /*
13291 * Examine each vm_map_entry_t in the range.
13292 */
13293 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13294 entry = entry->vme_next) {
13295 int kill_pages = 0;
13296
13297 /*
13298 * Sanity check on the VM map entry.
13299 */
13300 if (! vm_map_entry_is_reusable(entry)) {
13301 vm_map_unlock_read(map);
13302 vm_page_stats_reusable.reusable_pages_failure++;
13303 return KERN_INVALID_ADDRESS;
13304 }
13305
39037602
A
13306 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
13307 /* not writable: can't discard contents */
13308 vm_map_unlock_read(map);
13309 vm_page_stats_reusable.reusable_nonwritable++;
13310 vm_page_stats_reusable.reusable_pages_failure++;
13311 return KERN_PROTECTION_FAILURE;
13312 }
13313
b0d623f7
A
13314 /*
13315 * The first time through, the start address could be anywhere
13316 * within the vm_map_entry we found. So adjust the offset to
13317 * correspond.
13318 */
13319 if (entry->vme_start < start) {
13320 start_offset = start - entry->vme_start;
3e170ce0 13321 pmap_offset = start;
b0d623f7
A
13322 } else {
13323 start_offset = 0;
3e170ce0 13324 pmap_offset = entry->vme_start;
b0d623f7
A
13325 }
13326 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
13327 start_offset += VME_OFFSET(entry);
13328 end_offset += VME_OFFSET(entry);
b0d623f7 13329
2dced7af 13330 assert(!entry->is_sub_map);
3e170ce0 13331 object = VME_OBJECT(entry);
b0d623f7
A
13332 if (object == VM_OBJECT_NULL)
13333 continue;
13334
13335
13336 vm_object_lock(object);
39037602
A
13337 if (((object->ref_count == 1) ||
13338 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
13339 object->copy == VM_OBJECT_NULL)) &&
13340 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
13341 /*
13342 * "iokit_acct" entries are billed for their virtual size
13343 * (rather than for their resident pages only), so they
13344 * wouldn't benefit from making pages reusable, and it
13345 * would be hard to keep track of pages that are both
39037602
A
13346 * "iokit_acct" and "reusable" in the pmap stats and
13347 * ledgers.
fe8ab488
A
13348 */
13349 !(entry->iokit_acct ||
39037602
A
13350 (!entry->is_sub_map && !entry->use_pmap))) {
13351 if (object->ref_count != 1) {
13352 vm_page_stats_reusable.reusable_shared++;
13353 }
b0d623f7 13354 kill_pages = 1;
39037602 13355 } else {
b0d623f7 13356 kill_pages = -1;
39037602 13357 }
b0d623f7
A
13358 if (kill_pages != -1) {
13359 vm_object_deactivate_pages(object,
13360 start_offset,
13361 end_offset - start_offset,
13362 kill_pages,
3e170ce0
A
13363 TRUE /*reusable_pages*/,
13364 map->pmap,
13365 pmap_offset);
b0d623f7
A
13366 } else {
13367 vm_page_stats_reusable.reusable_pages_shared++;
13368 }
13369 vm_object_unlock(object);
13370
3e170ce0
A
13371 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13372 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
13373 /*
13374 * XXX
13375 * We do not hold the VM map exclusively here.
13376 * The "alias" field is not that critical, so it's
13377 * safe to update it here, as long as it is the only
13378 * one that can be modified while holding the VM map
13379 * "shared".
13380 */
3e170ce0 13381 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
13382 }
13383 }
13384
13385 vm_map_unlock_read(map);
13386 vm_page_stats_reusable.reusable_pages_success++;
13387 return KERN_SUCCESS;
13388}
13389
13390
13391static kern_return_t
13392vm_map_can_reuse(
13393 vm_map_t map,
13394 vm_map_offset_t start,
13395 vm_map_offset_t end)
13396{
13397 vm_map_entry_t entry;
13398
13399 /*
13400 * The MADV_REUSABLE operation doesn't require any changes to the
13401 * vm_map_entry_t's, so the read lock is sufficient.
13402 */
13403
13404 vm_map_lock_read(map);
3e170ce0 13405 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13406
13407 /*
13408 * The madvise semantics require that the address range be fully
13409 * allocated with no holes. Otherwise, we're required to return
13410 * an error.
13411 */
13412
13413 if (!vm_map_range_check(map, start, end, &entry)) {
13414 vm_map_unlock_read(map);
13415 vm_page_stats_reusable.can_reuse_failure++;
13416 return KERN_INVALID_ADDRESS;
13417 }
13418
13419 /*
13420 * Examine each vm_map_entry_t in the range.
13421 */
13422 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13423 entry = entry->vme_next) {
13424 /*
13425 * Sanity check on the VM map entry.
13426 */
13427 if (! vm_map_entry_is_reusable(entry)) {
13428 vm_map_unlock_read(map);
13429 vm_page_stats_reusable.can_reuse_failure++;
13430 return KERN_INVALID_ADDRESS;
13431 }
13432 }
13433
13434 vm_map_unlock_read(map);
13435 vm_page_stats_reusable.can_reuse_success++;
13436 return KERN_SUCCESS;
13437}
13438
13439
3e170ce0
A
13440#if MACH_ASSERT
13441static kern_return_t
13442vm_map_pageout(
13443 vm_map_t map,
13444 vm_map_offset_t start,
13445 vm_map_offset_t end)
13446{
13447 vm_map_entry_t entry;
13448
13449 /*
13450 * The MADV_PAGEOUT operation doesn't require any changes to the
13451 * vm_map_entry_t's, so the read lock is sufficient.
13452 */
13453
13454 vm_map_lock_read(map);
13455
13456 /*
13457 * The madvise semantics require that the address range be fully
13458 * allocated with no holes. Otherwise, we're required to return
13459 * an error.
13460 */
13461
13462 if (!vm_map_range_check(map, start, end, &entry)) {
13463 vm_map_unlock_read(map);
13464 return KERN_INVALID_ADDRESS;
13465 }
13466
13467 /*
13468 * Examine each vm_map_entry_t in the range.
13469 */
13470 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13471 entry = entry->vme_next) {
13472 vm_object_t object;
13473
13474 /*
13475 * Sanity check on the VM map entry.
13476 */
13477 if (entry->is_sub_map) {
13478 vm_map_t submap;
13479 vm_map_offset_t submap_start;
13480 vm_map_offset_t submap_end;
13481 vm_map_entry_t submap_entry;
13482
13483 submap = VME_SUBMAP(entry);
13484 submap_start = VME_OFFSET(entry);
13485 submap_end = submap_start + (entry->vme_end -
13486 entry->vme_start);
13487
13488 vm_map_lock_read(submap);
13489
13490 if (! vm_map_range_check(submap,
13491 submap_start,
13492 submap_end,
13493 &submap_entry)) {
13494 vm_map_unlock_read(submap);
13495 vm_map_unlock_read(map);
13496 return KERN_INVALID_ADDRESS;
13497 }
13498
13499 object = VME_OBJECT(submap_entry);
13500 if (submap_entry->is_sub_map ||
13501 object == VM_OBJECT_NULL ||
13502 !object->internal) {
13503 vm_map_unlock_read(submap);
13504 continue;
13505 }
13506
13507 vm_object_pageout(object);
13508
13509 vm_map_unlock_read(submap);
13510 submap = VM_MAP_NULL;
13511 submap_entry = VM_MAP_ENTRY_NULL;
13512 continue;
13513 }
13514
13515 object = VME_OBJECT(entry);
13516 if (entry->is_sub_map ||
13517 object == VM_OBJECT_NULL ||
13518 !object->internal) {
13519 continue;
13520 }
13521
13522 vm_object_pageout(object);
13523 }
13524
13525 vm_map_unlock_read(map);
13526 return KERN_SUCCESS;
13527}
13528#endif /* MACH_ASSERT */
13529
13530
1c79356b 13531/*
91447636
A
13532 * Routine: vm_map_entry_insert
13533 *
13534 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 13535 */
91447636
A
13536vm_map_entry_t
13537vm_map_entry_insert(
13538 vm_map_t map,
13539 vm_map_entry_t insp_entry,
13540 vm_map_offset_t start,
13541 vm_map_offset_t end,
13542 vm_object_t object,
13543 vm_object_offset_t offset,
13544 boolean_t needs_copy,
13545 boolean_t is_shared,
13546 boolean_t in_transition,
13547 vm_prot_t cur_protection,
13548 vm_prot_t max_protection,
13549 vm_behavior_t behavior,
13550 vm_inherit_t inheritance,
2d21ac55 13551 unsigned wired_count,
b0d623f7
A
13552 boolean_t no_cache,
13553 boolean_t permanent,
39236c6e 13554 unsigned int superpage_size,
fe8ab488
A
13555 boolean_t clear_map_aligned,
13556 boolean_t is_submap)
1c79356b 13557{
91447636 13558 vm_map_entry_t new_entry;
1c79356b 13559
91447636 13560 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 13561
7ddcb079 13562 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 13563
39236c6e
A
13564 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13565 new_entry->map_aligned = TRUE;
13566 } else {
13567 new_entry->map_aligned = FALSE;
13568 }
13569 if (clear_map_aligned &&
fe8ab488
A
13570 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13571 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
13572 new_entry->map_aligned = FALSE;
13573 }
13574
91447636
A
13575 new_entry->vme_start = start;
13576 new_entry->vme_end = end;
13577 assert(page_aligned(new_entry->vme_start));
13578 assert(page_aligned(new_entry->vme_end));
39236c6e 13579 if (new_entry->map_aligned) {
fe8ab488
A
13580 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13581 VM_MAP_PAGE_MASK(map)));
39236c6e
A
13582 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13583 VM_MAP_PAGE_MASK(map)));
13584 }
e2d2fc5c 13585 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 13586
3e170ce0
A
13587 VME_OBJECT_SET(new_entry, object);
13588 VME_OFFSET_SET(new_entry, offset);
91447636 13589 new_entry->is_shared = is_shared;
fe8ab488 13590 new_entry->is_sub_map = is_submap;
91447636
A
13591 new_entry->needs_copy = needs_copy;
13592 new_entry->in_transition = in_transition;
13593 new_entry->needs_wakeup = FALSE;
13594 new_entry->inheritance = inheritance;
13595 new_entry->protection = cur_protection;
13596 new_entry->max_protection = max_protection;
13597 new_entry->behavior = behavior;
13598 new_entry->wired_count = wired_count;
13599 new_entry->user_wired_count = 0;
fe8ab488
A
13600 if (is_submap) {
13601 /*
13602 * submap: "use_pmap" means "nested".
13603 * default: false.
13604 */
13605 new_entry->use_pmap = FALSE;
13606 } else {
13607 /*
13608 * object: "use_pmap" means "use pmap accounting" for footprint.
13609 * default: true.
13610 */
13611 new_entry->use_pmap = TRUE;
13612 }
3e170ce0 13613 VME_ALIAS_SET(new_entry, 0);
b0d623f7 13614 new_entry->zero_wired_pages = FALSE;
2d21ac55 13615 new_entry->no_cache = no_cache;
b0d623f7 13616 new_entry->permanent = permanent;
39236c6e
A
13617 if (superpage_size)
13618 new_entry->superpage_size = TRUE;
13619 else
13620 new_entry->superpage_size = FALSE;
6d2010ae 13621 new_entry->used_for_jit = FALSE;
fe8ab488 13622 new_entry->iokit_acct = FALSE;
3e170ce0
A
13623 new_entry->vme_resilient_codesign = FALSE;
13624 new_entry->vme_resilient_media = FALSE;
39037602 13625 new_entry->vme_atomic = FALSE;
1c79356b 13626
91447636
A
13627 /*
13628 * Insert the new entry into the list.
13629 */
1c79356b 13630
6d2010ae 13631 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
13632 map->size += end - start;
13633
13634 /*
13635 * Update the free space hint and the lookup hint.
13636 */
13637
0c530ab8 13638 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 13639 return new_entry;
1c79356b
A
13640}
13641
13642/*
91447636
A
13643 * Routine: vm_map_remap_extract
13644 *
13645 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 13646 */
91447636
A
13647static kern_return_t
13648vm_map_remap_extract(
13649 vm_map_t map,
13650 vm_map_offset_t addr,
13651 vm_map_size_t size,
13652 boolean_t copy,
13653 struct vm_map_header *map_header,
13654 vm_prot_t *cur_protection,
13655 vm_prot_t *max_protection,
13656 /* What, no behavior? */
13657 vm_inherit_t inheritance,
39037602
A
13658 boolean_t pageable,
13659 boolean_t same_map)
1c79356b 13660{
91447636
A
13661 kern_return_t result;
13662 vm_map_size_t mapped_size;
13663 vm_map_size_t tmp_size;
13664 vm_map_entry_t src_entry; /* result of last map lookup */
13665 vm_map_entry_t new_entry;
13666 vm_object_offset_t offset;
13667 vm_map_offset_t map_address;
13668 vm_map_offset_t src_start; /* start of entry to map */
13669 vm_map_offset_t src_end; /* end of region to be mapped */
13670 vm_object_t object;
13671 vm_map_version_t version;
13672 boolean_t src_needs_copy;
13673 boolean_t new_entry_needs_copy;
1c79356b 13674
91447636 13675 assert(map != VM_MAP_NULL);
39236c6e
A
13676 assert(size != 0);
13677 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636
A
13678 assert(inheritance == VM_INHERIT_NONE ||
13679 inheritance == VM_INHERIT_COPY ||
13680 inheritance == VM_INHERIT_SHARE);
1c79356b 13681
91447636
A
13682 /*
13683 * Compute start and end of region.
13684 */
39236c6e
A
13685 src_start = vm_map_trunc_page(addr, PAGE_MASK);
13686 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13687
1c79356b 13688
91447636
A
13689 /*
13690 * Initialize map_header.
13691 */
13692 map_header->links.next = (struct vm_map_entry *)&map_header->links;
13693 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13694 map_header->nentries = 0;
13695 map_header->entries_pageable = pageable;
39236c6e 13696 map_header->page_shift = PAGE_SHIFT;
1c79356b 13697
6d2010ae
A
13698 vm_map_store_init( map_header );
13699
91447636
A
13700 *cur_protection = VM_PROT_ALL;
13701 *max_protection = VM_PROT_ALL;
1c79356b 13702
91447636
A
13703 map_address = 0;
13704 mapped_size = 0;
13705 result = KERN_SUCCESS;
1c79356b 13706
91447636
A
13707 /*
13708 * The specified source virtual space might correspond to
13709 * multiple map entries, need to loop on them.
13710 */
13711 vm_map_lock(map);
13712 while (mapped_size != size) {
13713 vm_map_size_t entry_size;
1c79356b 13714
91447636
A
13715 /*
13716 * Find the beginning of the region.
13717 */
13718 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13719 result = KERN_INVALID_ADDRESS;
13720 break;
13721 }
1c79356b 13722
91447636
A
13723 if (src_start < src_entry->vme_start ||
13724 (mapped_size && src_start != src_entry->vme_start)) {
13725 result = KERN_INVALID_ADDRESS;
13726 break;
13727 }
1c79356b 13728
91447636
A
13729 tmp_size = size - mapped_size;
13730 if (src_end > src_entry->vme_end)
13731 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 13732
91447636 13733 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 13734 src_entry->vme_start);
1c79356b 13735
91447636 13736 if(src_entry->is_sub_map) {
3e170ce0 13737 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
13738 object = VM_OBJECT_NULL;
13739 } else {
3e170ce0 13740 object = VME_OBJECT(src_entry);
fe8ab488
A
13741 if (src_entry->iokit_acct) {
13742 /*
13743 * This entry uses "IOKit accounting".
13744 */
13745 } else if (object != VM_OBJECT_NULL &&
13746 object->purgable != VM_PURGABLE_DENY) {
13747 /*
13748 * Purgeable objects have their own accounting:
13749 * no pmap accounting for them.
13750 */
13751 assert(!src_entry->use_pmap);
13752 } else {
13753 /*
13754 * Not IOKit or purgeable:
13755 * must be accounted by pmap stats.
13756 */
13757 assert(src_entry->use_pmap);
13758 }
55e303ae 13759
91447636
A
13760 if (object == VM_OBJECT_NULL) {
13761 object = vm_object_allocate(entry_size);
3e170ce0
A
13762 VME_OFFSET_SET(src_entry, 0);
13763 VME_OBJECT_SET(src_entry, object);
91447636
A
13764 } else if (object->copy_strategy !=
13765 MEMORY_OBJECT_COPY_SYMMETRIC) {
13766 /*
13767 * We are already using an asymmetric
13768 * copy, and therefore we already have
13769 * the right object.
13770 */
13771 assert(!src_entry->needs_copy);
13772 } else if (src_entry->needs_copy || object->shadowed ||
13773 (object->internal && !object->true_share &&
2d21ac55 13774 !src_entry->is_shared &&
6d2010ae 13775 object->vo_size > entry_size)) {
1c79356b 13776
3e170ce0 13777 VME_OBJECT_SHADOW(src_entry, entry_size);
1c79356b 13778
91447636
A
13779 if (!src_entry->needs_copy &&
13780 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
13781 vm_prot_t prot;
13782
13783 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13784
3e170ce0
A
13785 if (override_nx(map,
13786 VME_ALIAS(src_entry))
13787 && prot)
0c530ab8 13788 prot |= VM_PROT_EXECUTE;
2d21ac55 13789
316670eb 13790 if(map->mapped_in_other_pmaps) {
2d21ac55 13791 vm_object_pmap_protect(
3e170ce0
A
13792 VME_OBJECT(src_entry),
13793 VME_OFFSET(src_entry),
2d21ac55
A
13794 entry_size,
13795 PMAP_NULL,
0c530ab8 13796 src_entry->vme_start,
0c530ab8 13797 prot);
2d21ac55
A
13798 } else {
13799 pmap_protect(vm_map_pmap(map),
13800 src_entry->vme_start,
13801 src_entry->vme_end,
13802 prot);
91447636
A
13803 }
13804 }
1c79356b 13805
3e170ce0 13806 object = VME_OBJECT(src_entry);
91447636
A
13807 src_entry->needs_copy = FALSE;
13808 }
1c79356b 13809
1c79356b 13810
91447636 13811 vm_object_lock(object);
2d21ac55 13812 vm_object_reference_locked(object); /* object ref. for new entry */
91447636 13813 if (object->copy_strategy ==
2d21ac55 13814 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
13815 object->copy_strategy =
13816 MEMORY_OBJECT_COPY_DELAY;
13817 }
13818 vm_object_unlock(object);
13819 }
1c79356b 13820
3e170ce0
A
13821 offset = (VME_OFFSET(src_entry) +
13822 (src_start - src_entry->vme_start));
1c79356b 13823
7ddcb079 13824 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 13825 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
13826 if (new_entry->is_sub_map) {
13827 /* clr address space specifics */
13828 new_entry->use_pmap = FALSE;
13829 }
1c79356b 13830
39236c6e
A
13831 new_entry->map_aligned = FALSE;
13832
91447636
A
13833 new_entry->vme_start = map_address;
13834 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 13835 assert(new_entry->vme_start < new_entry->vme_end);
91447636 13836 new_entry->inheritance = inheritance;
3e170ce0 13837 VME_OFFSET_SET(new_entry, offset);
1c79356b 13838
91447636
A
13839 /*
13840 * The new region has to be copied now if required.
13841 */
13842 RestartCopy:
13843 if (!copy) {
316670eb
A
13844 /*
13845 * Cannot allow an entry describing a JIT
13846 * region to be shared across address spaces.
13847 */
39037602 13848 if (src_entry->used_for_jit == TRUE && !same_map) {
316670eb
A
13849 result = KERN_INVALID_ARGUMENT;
13850 break;
13851 }
91447636
A
13852 src_entry->is_shared = TRUE;
13853 new_entry->is_shared = TRUE;
13854 if (!(new_entry->is_sub_map))
13855 new_entry->needs_copy = FALSE;
1c79356b 13856
91447636
A
13857 } else if (src_entry->is_sub_map) {
13858 /* make this a COW sub_map if not already */
3e170ce0 13859 assert(new_entry->wired_count == 0);
91447636
A
13860 new_entry->needs_copy = TRUE;
13861 object = VM_OBJECT_NULL;
13862 } else if (src_entry->wired_count == 0 &&
3e170ce0
A
13863 vm_object_copy_quickly(&VME_OBJECT(new_entry),
13864 VME_OFFSET(new_entry),
2d21ac55
A
13865 (new_entry->vme_end -
13866 new_entry->vme_start),
13867 &src_needs_copy,
13868 &new_entry_needs_copy)) {
55e303ae 13869
91447636
A
13870 new_entry->needs_copy = new_entry_needs_copy;
13871 new_entry->is_shared = FALSE;
1c79356b 13872
91447636
A
13873 /*
13874 * Handle copy_on_write semantics.
13875 */
13876 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
13877 vm_prot_t prot;
13878
13879 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13880
3e170ce0
A
13881 if (override_nx(map,
13882 VME_ALIAS(src_entry))
13883 && prot)
0c530ab8 13884 prot |= VM_PROT_EXECUTE;
2d21ac55 13885
91447636
A
13886 vm_object_pmap_protect(object,
13887 offset,
13888 entry_size,
13889 ((src_entry->is_shared
316670eb 13890 || map->mapped_in_other_pmaps) ?
91447636
A
13891 PMAP_NULL : map->pmap),
13892 src_entry->vme_start,
0c530ab8 13893 prot);
1c79356b 13894
3e170ce0 13895 assert(src_entry->wired_count == 0);
91447636
A
13896 src_entry->needs_copy = TRUE;
13897 }
13898 /*
13899 * Throw away the old object reference of the new entry.
13900 */
13901 vm_object_deallocate(object);
1c79356b 13902
91447636
A
13903 } else {
13904 new_entry->is_shared = FALSE;
1c79356b 13905
91447636
A
13906 /*
13907 * The map can be safely unlocked since we
13908 * already hold a reference on the object.
13909 *
13910 * Record the timestamp of the map for later
13911 * verification, and unlock the map.
13912 */
13913 version.main_timestamp = map->timestamp;
13914 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 13915
91447636
A
13916 /*
13917 * Perform the copy.
13918 */
13919 if (src_entry->wired_count > 0) {
13920 vm_object_lock(object);
13921 result = vm_object_copy_slowly(
2d21ac55
A
13922 object,
13923 offset,
13924 entry_size,
13925 THREAD_UNINT,
3e170ce0 13926 &VME_OBJECT(new_entry));
1c79356b 13927
3e170ce0 13928 VME_OFFSET_SET(new_entry, 0);
91447636
A
13929 new_entry->needs_copy = FALSE;
13930 } else {
3e170ce0
A
13931 vm_object_offset_t new_offset;
13932
13933 new_offset = VME_OFFSET(new_entry);
91447636 13934 result = vm_object_copy_strategically(
2d21ac55
A
13935 object,
13936 offset,
13937 entry_size,
3e170ce0
A
13938 &VME_OBJECT(new_entry),
13939 &new_offset,
2d21ac55 13940 &new_entry_needs_copy);
3e170ce0
A
13941 if (new_offset != VME_OFFSET(new_entry)) {
13942 VME_OFFSET_SET(new_entry, new_offset);
13943 }
1c79356b 13944
91447636
A
13945 new_entry->needs_copy = new_entry_needs_copy;
13946 }
1c79356b 13947
91447636
A
13948 /*
13949 * Throw away the old object reference of the new entry.
13950 */
13951 vm_object_deallocate(object);
1c79356b 13952
91447636
A
13953 if (result != KERN_SUCCESS &&
13954 result != KERN_MEMORY_RESTART_COPY) {
13955 _vm_map_entry_dispose(map_header, new_entry);
39037602 13956 vm_map_lock(map);
91447636
A
13957 break;
13958 }
1c79356b 13959
91447636
A
13960 /*
13961 * Verify that the map has not substantially
13962 * changed while the copy was being made.
13963 */
1c79356b 13964
91447636
A
13965 vm_map_lock(map);
13966 if (version.main_timestamp + 1 != map->timestamp) {
13967 /*
13968 * Simple version comparison failed.
13969 *
13970 * Retry the lookup and verify that the
13971 * same object/offset are still present.
13972 */
3e170ce0 13973 vm_object_deallocate(VME_OBJECT(new_entry));
91447636
A
13974 _vm_map_entry_dispose(map_header, new_entry);
13975 if (result == KERN_MEMORY_RESTART_COPY)
13976 result = KERN_SUCCESS;
13977 continue;
13978 }
1c79356b 13979
91447636
A
13980 if (result == KERN_MEMORY_RESTART_COPY) {
13981 vm_object_reference(object);
13982 goto RestartCopy;
13983 }
13984 }
1c79356b 13985
6d2010ae 13986 _vm_map_store_entry_link(map_header,
91447636 13987 map_header->links.prev, new_entry);
1c79356b 13988
6d2010ae
A
13989 /*Protections for submap mapping are irrelevant here*/
13990 if( !src_entry->is_sub_map ) {
13991 *cur_protection &= src_entry->protection;
13992 *max_protection &= src_entry->max_protection;
13993 }
91447636
A
13994 map_address += tmp_size;
13995 mapped_size += tmp_size;
13996 src_start += tmp_size;
1c79356b 13997
91447636 13998 } /* end while */
1c79356b 13999
91447636
A
14000 vm_map_unlock(map);
14001 if (result != KERN_SUCCESS) {
14002 /*
14003 * Free all allocated elements.
14004 */
14005 for (src_entry = map_header->links.next;
14006 src_entry != (struct vm_map_entry *)&map_header->links;
14007 src_entry = new_entry) {
14008 new_entry = src_entry->vme_next;
6d2010ae 14009 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 14010 if (src_entry->is_sub_map) {
3e170ce0 14011 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 14012 } else {
3e170ce0 14013 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 14014 }
91447636
A
14015 _vm_map_entry_dispose(map_header, src_entry);
14016 }
14017 }
14018 return result;
1c79356b
A
14019}
14020
14021/*
91447636 14022 * Routine: vm_remap
1c79356b 14023 *
91447636
A
14024 * Map portion of a task's address space.
14025 * Mapped region must not overlap more than
14026 * one vm memory object. Protections and
14027 * inheritance attributes remain the same
14028 * as in the original task and are out parameters.
14029 * Source and Target task can be identical
14030 * Other attributes are identical as for vm_map()
1c79356b
A
14031 */
14032kern_return_t
91447636
A
14033vm_map_remap(
14034 vm_map_t target_map,
14035 vm_map_address_t *address,
14036 vm_map_size_t size,
14037 vm_map_offset_t mask,
060df5ea 14038 int flags,
91447636
A
14039 vm_map_t src_map,
14040 vm_map_offset_t memory_address,
1c79356b 14041 boolean_t copy,
1c79356b
A
14042 vm_prot_t *cur_protection,
14043 vm_prot_t *max_protection,
91447636 14044 vm_inherit_t inheritance)
1c79356b
A
14045{
14046 kern_return_t result;
91447636 14047 vm_map_entry_t entry;
0c530ab8 14048 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 14049 vm_map_entry_t new_entry;
91447636 14050 struct vm_map_header map_header;
39236c6e 14051 vm_map_offset_t offset_in_mapping;
1c79356b 14052
91447636
A
14053 if (target_map == VM_MAP_NULL)
14054 return KERN_INVALID_ARGUMENT;
1c79356b 14055
91447636 14056 switch (inheritance) {
2d21ac55
A
14057 case VM_INHERIT_NONE:
14058 case VM_INHERIT_COPY:
14059 case VM_INHERIT_SHARE:
91447636
A
14060 if (size != 0 && src_map != VM_MAP_NULL)
14061 break;
14062 /*FALL THRU*/
2d21ac55 14063 default:
91447636
A
14064 return KERN_INVALID_ARGUMENT;
14065 }
1c79356b 14066
39236c6e
A
14067 /*
14068 * If the user is requesting that we return the address of the
14069 * first byte of the data (rather than the base of the page),
14070 * then we use different rounding semantics: specifically,
14071 * we assume that (memory_address, size) describes a region
14072 * all of whose pages we must cover, rather than a base to be truncated
14073 * down and a size to be added to that base. So we figure out
14074 * the highest page that the requested region includes and make
14075 * sure that the size will cover it.
14076 *
14077 * The key example we're worried about it is of the form:
14078 *
14079 * memory_address = 0x1ff0, size = 0x20
14080 *
14081 * With the old semantics, we round down the memory_address to 0x1000
14082 * and round up the size to 0x1000, resulting in our covering *only*
14083 * page 0x1000. With the new semantics, we'd realize that the region covers
14084 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
14085 * 0x1000 and page 0x2000 in the region we remap.
14086 */
14087 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14088 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
14089 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
14090 } else {
14091 size = vm_map_round_page(size, PAGE_MASK);
14092 }
1c79356b 14093
91447636 14094 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
14095 size, copy, &map_header,
14096 cur_protection,
14097 max_protection,
14098 inheritance,
39037602
A
14099 target_map->hdr.entries_pageable,
14100 src_map == target_map);
1c79356b 14101
91447636
A
14102 if (result != KERN_SUCCESS) {
14103 return result;
14104 }
1c79356b 14105
91447636
A
14106 /*
14107 * Allocate/check a range of free virtual address
14108 * space for the target
1c79356b 14109 */
39236c6e
A
14110 *address = vm_map_trunc_page(*address,
14111 VM_MAP_PAGE_MASK(target_map));
91447636
A
14112 vm_map_lock(target_map);
14113 result = vm_map_remap_range_allocate(target_map, address, size,
060df5ea 14114 mask, flags, &insp_entry);
1c79356b 14115
91447636
A
14116 for (entry = map_header.links.next;
14117 entry != (struct vm_map_entry *)&map_header.links;
14118 entry = new_entry) {
14119 new_entry = entry->vme_next;
6d2010ae 14120 _vm_map_store_entry_unlink(&map_header, entry);
91447636 14121 if (result == KERN_SUCCESS) {
3e170ce0
A
14122 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14123 /* no codesigning -> read-only access */
14124 assert(!entry->used_for_jit);
14125 entry->max_protection = VM_PROT_READ;
14126 entry->protection = VM_PROT_READ;
14127 entry->vme_resilient_codesign = TRUE;
14128 }
91447636
A
14129 entry->vme_start += *address;
14130 entry->vme_end += *address;
39236c6e 14131 assert(!entry->map_aligned);
6d2010ae 14132 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
14133 insp_entry = entry;
14134 } else {
14135 if (!entry->is_sub_map) {
3e170ce0 14136 vm_object_deallocate(VME_OBJECT(entry));
91447636 14137 } else {
3e170ce0 14138 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 14139 }
91447636 14140 _vm_map_entry_dispose(&map_header, entry);
1c79356b 14141 }
91447636 14142 }
1c79356b 14143
3e170ce0
A
14144 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14145 *cur_protection = VM_PROT_READ;
14146 *max_protection = VM_PROT_READ;
14147 }
14148
6d2010ae 14149 if( target_map->disable_vmentry_reuse == TRUE) {
39037602 14150 assert(!target_map->is_nested_map);
6d2010ae
A
14151 if( target_map->highest_entry_end < insp_entry->vme_end ){
14152 target_map->highest_entry_end = insp_entry->vme_end;
14153 }
14154 }
14155
91447636
A
14156 if (result == KERN_SUCCESS) {
14157 target_map->size += size;
0c530ab8 14158 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
14159 }
14160 vm_map_unlock(target_map);
1c79356b 14161
91447636
A
14162 if (result == KERN_SUCCESS && target_map->wiring_required)
14163 result = vm_map_wire(target_map, *address,
3e170ce0
A
14164 *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
14165 TRUE);
39236c6e
A
14166
14167 /*
14168 * If requested, return the address of the data pointed to by the
14169 * request, rather than the base of the resulting page.
14170 */
14171 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14172 *address += offset_in_mapping;
14173 }
14174
91447636
A
14175 return result;
14176}
1c79356b 14177
91447636
A
14178/*
14179 * Routine: vm_map_remap_range_allocate
14180 *
14181 * Description:
14182 * Allocate a range in the specified virtual address map.
14183 * returns the address and the map entry just before the allocated
14184 * range
14185 *
14186 * Map must be locked.
14187 */
1c79356b 14188
91447636
A
14189static kern_return_t
14190vm_map_remap_range_allocate(
14191 vm_map_t map,
14192 vm_map_address_t *address, /* IN/OUT */
14193 vm_map_size_t size,
14194 vm_map_offset_t mask,
060df5ea 14195 int flags,
91447636
A
14196 vm_map_entry_t *map_entry) /* OUT */
14197{
060df5ea
A
14198 vm_map_entry_t entry;
14199 vm_map_offset_t start;
14200 vm_map_offset_t end;
14201 kern_return_t kr;
3e170ce0 14202 vm_map_entry_t hole_entry;
1c79356b 14203
2d21ac55 14204StartAgain: ;
1c79356b 14205
2d21ac55 14206 start = *address;
1c79356b 14207
060df5ea 14208 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55 14209 {
39037602
A
14210 if (flags & VM_FLAGS_RANDOM_ADDR)
14211 {
14212 /*
14213 * Get a random start address.
14214 */
14215 kr = vm_map_random_address_for_size(map, address, size);
14216 if (kr != KERN_SUCCESS) {
14217 return(kr);
14218 }
14219 start = *address;
14220 }
14221
2d21ac55
A
14222 /*
14223 * Calculate the first possible address.
14224 */
1c79356b 14225
2d21ac55
A
14226 if (start < map->min_offset)
14227 start = map->min_offset;
14228 if (start > map->max_offset)
14229 return(KERN_NO_SPACE);
91447636 14230
2d21ac55
A
14231 /*
14232 * Look for the first possible address;
14233 * if there's already something at this
14234 * address, we have to start after it.
14235 */
1c79356b 14236
6d2010ae
A
14237 if( map->disable_vmentry_reuse == TRUE) {
14238 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 14239 } else {
3e170ce0
A
14240
14241 if (map->holelistenabled) {
14242 hole_entry = (vm_map_entry_t)map->holes_list;
14243
14244 if (hole_entry == NULL) {
14245 /*
14246 * No more space in the map?
14247 */
14248 return(KERN_NO_SPACE);
14249 } else {
14250
14251 boolean_t found_hole = FALSE;
14252
14253 do {
14254 if (hole_entry->vme_start >= start) {
14255 start = hole_entry->vme_start;
14256 found_hole = TRUE;
14257 break;
14258 }
14259
14260 if (hole_entry->vme_end > start) {
14261 found_hole = TRUE;
14262 break;
14263 }
14264 hole_entry = hole_entry->vme_next;
14265
14266 } while (hole_entry != (vm_map_entry_t) map->holes_list);
14267
14268 if (found_hole == FALSE) {
14269 return (KERN_NO_SPACE);
14270 }
14271
14272 entry = hole_entry;
14273 }
6d2010ae 14274 } else {
3e170ce0
A
14275 assert(first_free_is_valid(map));
14276 if (start == map->min_offset) {
14277 if ((entry = map->first_free) != vm_map_to_entry(map))
14278 start = entry->vme_end;
14279 } else {
14280 vm_map_entry_t tmp_entry;
14281 if (vm_map_lookup_entry(map, start, &tmp_entry))
14282 start = tmp_entry->vme_end;
14283 entry = tmp_entry;
14284 }
6d2010ae 14285 }
39236c6e
A
14286 start = vm_map_round_page(start,
14287 VM_MAP_PAGE_MASK(map));
2d21ac55 14288 }
91447636 14289
2d21ac55
A
14290 /*
14291 * In any case, the "entry" always precedes
14292 * the proposed new region throughout the
14293 * loop:
14294 */
1c79356b 14295
2d21ac55 14296 while (TRUE) {
39037602 14297 vm_map_entry_t next;
2d21ac55
A
14298
14299 /*
14300 * Find the end of the proposed new region.
14301 * Be sure we didn't go beyond the end, or
14302 * wrap around the address.
14303 */
14304
14305 end = ((start + mask) & ~mask);
39236c6e
A
14306 end = vm_map_round_page(end,
14307 VM_MAP_PAGE_MASK(map));
2d21ac55
A
14308 if (end < start)
14309 return(KERN_NO_SPACE);
14310 start = end;
14311 end += size;
14312
14313 if ((end > map->max_offset) || (end < start)) {
14314 if (map->wait_for_space) {
14315 if (size <= (map->max_offset -
14316 map->min_offset)) {
14317 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
14318 vm_map_unlock(map);
14319 thread_block(THREAD_CONTINUE_NULL);
14320 vm_map_lock(map);
14321 goto StartAgain;
14322 }
14323 }
91447636 14324
2d21ac55
A
14325 return(KERN_NO_SPACE);
14326 }
1c79356b 14327
2d21ac55 14328 next = entry->vme_next;
1c79356b 14329
3e170ce0
A
14330 if (map->holelistenabled) {
14331 if (entry->vme_end >= end)
14332 break;
14333 } else {
14334 /*
14335 * If there are no more entries, we must win.
14336 *
14337 * OR
14338 *
14339 * If there is another entry, it must be
14340 * after the end of the potential new region.
14341 */
1c79356b 14342
3e170ce0
A
14343 if (next == vm_map_to_entry(map))
14344 break;
14345
14346 if (next->vme_start >= end)
14347 break;
14348 }
1c79356b 14349
2d21ac55
A
14350 /*
14351 * Didn't fit -- move to the next entry.
14352 */
1c79356b 14353
2d21ac55 14354 entry = next;
3e170ce0
A
14355
14356 if (map->holelistenabled) {
14357 if (entry == (vm_map_entry_t) map->holes_list) {
14358 /*
14359 * Wrapped around
14360 */
14361 return(KERN_NO_SPACE);
14362 }
14363 start = entry->vme_start;
14364 } else {
14365 start = entry->vme_end;
14366 }
14367 }
14368
14369 if (map->holelistenabled) {
14370
14371 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
14372 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
14373 }
2d21ac55 14374 }
3e170ce0 14375
2d21ac55 14376 *address = start;
3e170ce0 14377
2d21ac55
A
14378 } else {
14379 vm_map_entry_t temp_entry;
91447636 14380
2d21ac55
A
14381 /*
14382 * Verify that:
14383 * the address doesn't itself violate
14384 * the mask requirement.
14385 */
1c79356b 14386
2d21ac55
A
14387 if ((start & mask) != 0)
14388 return(KERN_NO_SPACE);
1c79356b 14389
1c79356b 14390
2d21ac55
A
14391 /*
14392 * ... the address is within bounds
14393 */
1c79356b 14394
2d21ac55 14395 end = start + size;
1c79356b 14396
2d21ac55
A
14397 if ((start < map->min_offset) ||
14398 (end > map->max_offset) ||
14399 (start >= end)) {
14400 return(KERN_INVALID_ADDRESS);
14401 }
1c79356b 14402
060df5ea
A
14403 /*
14404 * If we're asked to overwrite whatever was mapped in that
14405 * range, first deallocate that range.
14406 */
14407 if (flags & VM_FLAGS_OVERWRITE) {
14408 vm_map_t zap_map;
14409
14410 /*
14411 * We use a "zap_map" to avoid having to unlock
14412 * the "map" in vm_map_delete(), which would compromise
14413 * the atomicity of the "deallocate" and then "remap"
14414 * combination.
14415 */
14416 zap_map = vm_map_create(PMAP_NULL,
14417 start,
316670eb 14418 end,
060df5ea
A
14419 map->hdr.entries_pageable);
14420 if (zap_map == VM_MAP_NULL) {
14421 return KERN_RESOURCE_SHORTAGE;
14422 }
39236c6e 14423 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 14424 vm_map_disable_hole_optimization(zap_map);
060df5ea
A
14425
14426 kr = vm_map_delete(map, start, end,
fe8ab488
A
14427 (VM_MAP_REMOVE_SAVE_ENTRIES |
14428 VM_MAP_REMOVE_NO_MAP_ALIGN),
060df5ea
A
14429 zap_map);
14430 if (kr == KERN_SUCCESS) {
14431 vm_map_destroy(zap_map,
14432 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14433 zap_map = VM_MAP_NULL;
14434 }
14435 }
14436
2d21ac55
A
14437 /*
14438 * ... the starting address isn't allocated
14439 */
91447636 14440
2d21ac55
A
14441 if (vm_map_lookup_entry(map, start, &temp_entry))
14442 return(KERN_NO_SPACE);
91447636 14443
2d21ac55 14444 entry = temp_entry;
91447636 14445
2d21ac55
A
14446 /*
14447 * ... the next region doesn't overlap the
14448 * end point.
14449 */
1c79356b 14450
2d21ac55
A
14451 if ((entry->vme_next != vm_map_to_entry(map)) &&
14452 (entry->vme_next->vme_start < end))
14453 return(KERN_NO_SPACE);
14454 }
14455 *map_entry = entry;
14456 return(KERN_SUCCESS);
91447636 14457}
1c79356b 14458
91447636
A
14459/*
14460 * vm_map_switch:
14461 *
14462 * Set the address map for the current thread to the specified map
14463 */
1c79356b 14464
91447636
A
14465vm_map_t
14466vm_map_switch(
14467 vm_map_t map)
14468{
14469 int mycpu;
14470 thread_t thread = current_thread();
14471 vm_map_t oldmap = thread->map;
1c79356b 14472
91447636
A
14473 mp_disable_preemption();
14474 mycpu = cpu_number();
1c79356b 14475
91447636
A
14476 /*
14477 * Deactivate the current map and activate the requested map
14478 */
14479 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 14480
91447636
A
14481 mp_enable_preemption();
14482 return(oldmap);
14483}
1c79356b 14484
1c79356b 14485
91447636
A
14486/*
14487 * Routine: vm_map_write_user
14488 *
14489 * Description:
14490 * Copy out data from a kernel space into space in the
14491 * destination map. The space must already exist in the
14492 * destination map.
14493 * NOTE: This routine should only be called by threads
14494 * which can block on a page fault. i.e. kernel mode user
14495 * threads.
14496 *
14497 */
14498kern_return_t
14499vm_map_write_user(
14500 vm_map_t map,
14501 void *src_p,
14502 vm_map_address_t dst_addr,
14503 vm_size_t size)
14504{
14505 kern_return_t kr = KERN_SUCCESS;
1c79356b 14506
91447636
A
14507 if(current_map() == map) {
14508 if (copyout(src_p, dst_addr, size)) {
14509 kr = KERN_INVALID_ADDRESS;
14510 }
14511 } else {
14512 vm_map_t oldmap;
1c79356b 14513
91447636
A
14514 /* take on the identity of the target map while doing */
14515 /* the transfer */
1c79356b 14516
91447636
A
14517 vm_map_reference(map);
14518 oldmap = vm_map_switch(map);
14519 if (copyout(src_p, dst_addr, size)) {
14520 kr = KERN_INVALID_ADDRESS;
1c79356b 14521 }
91447636
A
14522 vm_map_switch(oldmap);
14523 vm_map_deallocate(map);
1c79356b 14524 }
91447636 14525 return kr;
1c79356b
A
14526}
14527
14528/*
91447636
A
14529 * Routine: vm_map_read_user
14530 *
14531 * Description:
14532 * Copy in data from a user space source map into the
14533 * kernel map. The space must already exist in the
14534 * kernel map.
14535 * NOTE: This routine should only be called by threads
14536 * which can block on a page fault. i.e. kernel mode user
14537 * threads.
1c79356b 14538 *
1c79356b
A
14539 */
14540kern_return_t
91447636
A
14541vm_map_read_user(
14542 vm_map_t map,
14543 vm_map_address_t src_addr,
14544 void *dst_p,
14545 vm_size_t size)
1c79356b 14546{
91447636 14547 kern_return_t kr = KERN_SUCCESS;
1c79356b 14548
91447636
A
14549 if(current_map() == map) {
14550 if (copyin(src_addr, dst_p, size)) {
14551 kr = KERN_INVALID_ADDRESS;
14552 }
14553 } else {
14554 vm_map_t oldmap;
1c79356b 14555
91447636
A
14556 /* take on the identity of the target map while doing */
14557 /* the transfer */
14558
14559 vm_map_reference(map);
14560 oldmap = vm_map_switch(map);
14561 if (copyin(src_addr, dst_p, size)) {
14562 kr = KERN_INVALID_ADDRESS;
14563 }
14564 vm_map_switch(oldmap);
14565 vm_map_deallocate(map);
1c79356b 14566 }
91447636
A
14567 return kr;
14568}
14569
1c79356b 14570
91447636
A
14571/*
14572 * vm_map_check_protection:
14573 *
14574 * Assert that the target map allows the specified
14575 * privilege on the entire address region given.
14576 * The entire region must be allocated.
14577 */
2d21ac55
A
14578boolean_t
14579vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14580 vm_map_offset_t end, vm_prot_t protection)
91447636 14581{
2d21ac55
A
14582 vm_map_entry_t entry;
14583 vm_map_entry_t tmp_entry;
1c79356b 14584
91447636 14585 vm_map_lock(map);
1c79356b 14586
2d21ac55 14587 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 14588 {
2d21ac55
A
14589 vm_map_unlock(map);
14590 return (FALSE);
1c79356b
A
14591 }
14592
91447636
A
14593 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14594 vm_map_unlock(map);
14595 return(FALSE);
14596 }
1c79356b 14597
91447636
A
14598 entry = tmp_entry;
14599
14600 while (start < end) {
14601 if (entry == vm_map_to_entry(map)) {
14602 vm_map_unlock(map);
14603 return(FALSE);
1c79356b 14604 }
1c79356b 14605
91447636
A
14606 /*
14607 * No holes allowed!
14608 */
1c79356b 14609
91447636
A
14610 if (start < entry->vme_start) {
14611 vm_map_unlock(map);
14612 return(FALSE);
14613 }
14614
14615 /*
14616 * Check protection associated with entry.
14617 */
14618
14619 if ((entry->protection & protection) != protection) {
14620 vm_map_unlock(map);
14621 return(FALSE);
14622 }
14623
14624 /* go to next entry */
14625
14626 start = entry->vme_end;
14627 entry = entry->vme_next;
14628 }
14629 vm_map_unlock(map);
14630 return(TRUE);
1c79356b
A
14631}
14632
1c79356b 14633kern_return_t
91447636
A
14634vm_map_purgable_control(
14635 vm_map_t map,
14636 vm_map_offset_t address,
14637 vm_purgable_t control,
14638 int *state)
1c79356b 14639{
91447636
A
14640 vm_map_entry_t entry;
14641 vm_object_t object;
14642 kern_return_t kr;
fe8ab488 14643 boolean_t was_nonvolatile;
1c79356b 14644
1c79356b 14645 /*
91447636
A
14646 * Vet all the input parameters and current type and state of the
14647 * underlaying object. Return with an error if anything is amiss.
1c79356b 14648 */
91447636
A
14649 if (map == VM_MAP_NULL)
14650 return(KERN_INVALID_ARGUMENT);
1c79356b 14651
91447636 14652 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7
A
14653 control != VM_PURGABLE_GET_STATE &&
14654 control != VM_PURGABLE_PURGE_ALL)
91447636 14655 return(KERN_INVALID_ARGUMENT);
1c79356b 14656
b0d623f7
A
14657 if (control == VM_PURGABLE_PURGE_ALL) {
14658 vm_purgeable_object_purge_all();
14659 return KERN_SUCCESS;
14660 }
14661
91447636 14662 if (control == VM_PURGABLE_SET_STATE &&
b0d623f7 14663 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 14664 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
14665 return(KERN_INVALID_ARGUMENT);
14666
b0d623f7 14667 vm_map_lock_read(map);
91447636
A
14668
14669 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14670
14671 /*
14672 * Must pass a valid non-submap address.
14673 */
b0d623f7 14674 vm_map_unlock_read(map);
91447636
A
14675 return(KERN_INVALID_ADDRESS);
14676 }
14677
14678 if ((entry->protection & VM_PROT_WRITE) == 0) {
14679 /*
14680 * Can't apply purgable controls to something you can't write.
14681 */
b0d623f7 14682 vm_map_unlock_read(map);
91447636
A
14683 return(KERN_PROTECTION_FAILURE);
14684 }
14685
3e170ce0 14686 object = VME_OBJECT(entry);
fe8ab488
A
14687 if (object == VM_OBJECT_NULL ||
14688 object->purgable == VM_PURGABLE_DENY) {
91447636 14689 /*
fe8ab488 14690 * Object must already be present and be purgeable.
91447636 14691 */
b0d623f7 14692 vm_map_unlock_read(map);
91447636
A
14693 return KERN_INVALID_ARGUMENT;
14694 }
14695
14696 vm_object_lock(object);
14697
39236c6e 14698#if 00
3e170ce0 14699 if (VME_OFFSET(entry) != 0 ||
6d2010ae 14700 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
14701 /*
14702 * Can only apply purgable controls to the whole (existing)
14703 * object at once.
14704 */
b0d623f7 14705 vm_map_unlock_read(map);
91447636
A
14706 vm_object_unlock(object);
14707 return KERN_INVALID_ARGUMENT;
1c79356b 14708 }
39236c6e 14709#endif
fe8ab488
A
14710
14711 assert(!entry->is_sub_map);
14712 assert(!entry->use_pmap); /* purgeable has its own accounting */
14713
b0d623f7 14714 vm_map_unlock_read(map);
1c79356b 14715
fe8ab488
A
14716 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14717
91447636 14718 kr = vm_object_purgable_control(object, control, state);
1c79356b 14719
fe8ab488
A
14720 if (was_nonvolatile &&
14721 object->purgable != VM_PURGABLE_NONVOLATILE &&
14722 map->pmap == kernel_pmap) {
14723#if DEBUG
14724 object->vo_purgeable_volatilizer = kernel_task;
14725#endif /* DEBUG */
14726 }
14727
91447636 14728 vm_object_unlock(object);
1c79356b 14729
91447636
A
14730 return kr;
14731}
1c79356b 14732
91447636 14733kern_return_t
b0d623f7 14734vm_map_page_query_internal(
2d21ac55 14735 vm_map_t target_map,
91447636 14736 vm_map_offset_t offset,
2d21ac55
A
14737 int *disposition,
14738 int *ref_count)
91447636 14739{
b0d623f7
A
14740 kern_return_t kr;
14741 vm_page_info_basic_data_t info;
14742 mach_msg_type_number_t count;
14743
14744 count = VM_PAGE_INFO_BASIC_COUNT;
14745 kr = vm_map_page_info(target_map,
14746 offset,
14747 VM_PAGE_INFO_BASIC,
14748 (vm_page_info_t) &info,
14749 &count);
14750 if (kr == KERN_SUCCESS) {
14751 *disposition = info.disposition;
14752 *ref_count = info.ref_count;
14753 } else {
14754 *disposition = 0;
14755 *ref_count = 0;
14756 }
2d21ac55 14757
b0d623f7
A
14758 return kr;
14759}
14760
14761kern_return_t
14762vm_map_page_info(
14763 vm_map_t map,
14764 vm_map_offset_t offset,
14765 vm_page_info_flavor_t flavor,
14766 vm_page_info_t info,
14767 mach_msg_type_number_t *count)
14768{
14769 vm_map_entry_t map_entry;
14770 vm_object_t object;
14771 vm_page_t m;
b0d623f7
A
14772 kern_return_t retval = KERN_SUCCESS;
14773 boolean_t top_object;
14774 int disposition;
14775 int ref_count;
b0d623f7
A
14776 vm_page_info_basic_t basic_info;
14777 int depth;
6d2010ae 14778 vm_map_offset_t offset_in_page;
2d21ac55 14779
b0d623f7
A
14780 switch (flavor) {
14781 case VM_PAGE_INFO_BASIC:
14782 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
14783 /*
14784 * The "vm_page_info_basic_data" structure was not
14785 * properly padded, so allow the size to be off by
14786 * one to maintain backwards binary compatibility...
14787 */
14788 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14789 return KERN_INVALID_ARGUMENT;
b0d623f7
A
14790 }
14791 break;
14792 default:
14793 return KERN_INVALID_ARGUMENT;
91447636 14794 }
2d21ac55 14795
b0d623f7
A
14796 disposition = 0;
14797 ref_count = 0;
b0d623f7
A
14798 top_object = TRUE;
14799 depth = 0;
14800
14801 retval = KERN_SUCCESS;
6d2010ae 14802 offset_in_page = offset & PAGE_MASK;
39236c6e 14803 offset = vm_map_trunc_page(offset, PAGE_MASK);
b0d623f7
A
14804
14805 vm_map_lock_read(map);
14806
14807 /*
14808 * First, find the map entry covering "offset", going down
14809 * submaps if necessary.
14810 */
14811 for (;;) {
14812 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14813 vm_map_unlock_read(map);
14814 return KERN_INVALID_ADDRESS;
14815 }
14816 /* compute offset from this map entry's start */
14817 offset -= map_entry->vme_start;
14818 /* compute offset into this map entry's object (or submap) */
3e170ce0 14819 offset += VME_OFFSET(map_entry);
b0d623f7
A
14820
14821 if (map_entry->is_sub_map) {
14822 vm_map_t sub_map;
2d21ac55 14823
3e170ce0 14824 sub_map = VME_SUBMAP(map_entry);
2d21ac55 14825 vm_map_lock_read(sub_map);
b0d623f7 14826 vm_map_unlock_read(map);
2d21ac55 14827
b0d623f7
A
14828 map = sub_map;
14829
14830 ref_count = MAX(ref_count, map->ref_count);
14831 continue;
1c79356b 14832 }
b0d623f7 14833 break;
91447636 14834 }
b0d623f7 14835
3e170ce0 14836 object = VME_OBJECT(map_entry);
b0d623f7
A
14837 if (object == VM_OBJECT_NULL) {
14838 /* no object -> no page */
14839 vm_map_unlock_read(map);
14840 goto done;
14841 }
14842
91447636 14843 vm_object_lock(object);
b0d623f7
A
14844 vm_map_unlock_read(map);
14845
14846 /*
14847 * Go down the VM object shadow chain until we find the page
14848 * we're looking for.
14849 */
14850 for (;;) {
14851 ref_count = MAX(ref_count, object->ref_count);
2d21ac55 14852
91447636 14853 m = vm_page_lookup(object, offset);
2d21ac55 14854
91447636 14855 if (m != VM_PAGE_NULL) {
b0d623f7 14856 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
91447636
A
14857 break;
14858 } else {
39236c6e
A
14859 if (object->internal &&
14860 object->alive &&
14861 !object->terminating &&
14862 object->pager_ready) {
14863
39037602
A
14864 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14865 == VM_EXTERNAL_STATE_EXISTS) {
14866 /* the pager has that page */
14867 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14868 break;
2d21ac55
A
14869 }
14870 }
b0d623f7 14871
2d21ac55
A
14872 if (object->shadow != VM_OBJECT_NULL) {
14873 vm_object_t shadow;
14874
6d2010ae 14875 offset += object->vo_shadow_offset;
2d21ac55
A
14876 shadow = object->shadow;
14877
14878 vm_object_lock(shadow);
14879 vm_object_unlock(object);
14880
14881 object = shadow;
14882 top_object = FALSE;
b0d623f7 14883 depth++;
2d21ac55 14884 } else {
b0d623f7
A
14885// if (!object->internal)
14886// break;
14887// retval = KERN_FAILURE;
14888// goto done_with_object;
14889 break;
91447636 14890 }
91447636
A
14891 }
14892 }
91447636
A
14893 /* The ref_count is not strictly accurate, it measures the number */
14894 /* of entities holding a ref on the object, they may not be mapping */
14895 /* the object or may not be mapping the section holding the */
14896 /* target page but its still a ball park number and though an over- */
14897 /* count, it picks up the copy-on-write cases */
1c79356b 14898
91447636
A
14899 /* We could also get a picture of page sharing from pmap_attributes */
14900 /* but this would under count as only faulted-in mappings would */
14901 /* show up. */
1c79356b 14902
2d21ac55 14903 if (top_object == TRUE && object->shadow)
b0d623f7
A
14904 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14905
14906 if (! object->internal)
14907 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
2d21ac55
A
14908
14909 if (m == VM_PAGE_NULL)
b0d623f7 14910 goto done_with_object;
2d21ac55 14911
91447636 14912 if (m->fictitious) {
b0d623f7
A
14913 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14914 goto done_with_object;
91447636 14915 }
39037602 14916 if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 14917 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 14918
39037602 14919 if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 14920 disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 14921
39037602 14922 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
b0d623f7 14923 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
1c79356b 14924
593a1d5f 14925 if (m->cs_validated)
b0d623f7 14926 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
593a1d5f 14927 if (m->cs_tainted)
b0d623f7 14928 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
c18c124e
A
14929 if (m->cs_nx)
14930 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
593a1d5f 14931
b0d623f7 14932done_with_object:
2d21ac55 14933 vm_object_unlock(object);
b0d623f7
A
14934done:
14935
14936 switch (flavor) {
14937 case VM_PAGE_INFO_BASIC:
14938 basic_info = (vm_page_info_basic_t) info;
14939 basic_info->disposition = disposition;
14940 basic_info->ref_count = ref_count;
39236c6e
A
14941 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14942 VM_KERNEL_ADDRPERM(object);
6d2010ae
A
14943 basic_info->offset =
14944 (memory_object_offset_t) offset + offset_in_page;
b0d623f7
A
14945 basic_info->depth = depth;
14946 break;
14947 }
0c530ab8 14948
2d21ac55 14949 return retval;
91447636
A
14950}
14951
14952/*
14953 * vm_map_msync
14954 *
14955 * Synchronises the memory range specified with its backing store
14956 * image by either flushing or cleaning the contents to the appropriate
14957 * memory manager engaging in a memory object synchronize dialog with
14958 * the manager. The client doesn't return until the manager issues
14959 * m_o_s_completed message. MIG Magically converts user task parameter
14960 * to the task's address map.
14961 *
14962 * interpretation of sync_flags
14963 * VM_SYNC_INVALIDATE - discard pages, only return precious
14964 * pages to manager.
14965 *
14966 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14967 * - discard pages, write dirty or precious
14968 * pages back to memory manager.
14969 *
14970 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14971 * - write dirty or precious pages back to
14972 * the memory manager.
14973 *
14974 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
14975 * is a hole in the region, and we would
14976 * have returned KERN_SUCCESS, return
14977 * KERN_INVALID_ADDRESS instead.
14978 *
14979 * NOTE
14980 * The memory object attributes have not yet been implemented, this
14981 * function will have to deal with the invalidate attribute
14982 *
14983 * RETURNS
14984 * KERN_INVALID_TASK Bad task parameter
14985 * KERN_INVALID_ARGUMENT both sync and async were specified.
14986 * KERN_SUCCESS The usual.
14987 * KERN_INVALID_ADDRESS There was a hole in the region.
14988 */
14989
14990kern_return_t
14991vm_map_msync(
14992 vm_map_t map,
14993 vm_map_address_t address,
14994 vm_map_size_t size,
14995 vm_sync_t sync_flags)
14996{
14997 msync_req_t msr;
14998 msync_req_t new_msr;
14999 queue_chain_t req_q; /* queue of requests for this msync */
15000 vm_map_entry_t entry;
15001 vm_map_size_t amount_left;
15002 vm_object_offset_t offset;
15003 boolean_t do_sync_req;
91447636 15004 boolean_t had_hole = FALSE;
2d21ac55 15005 memory_object_t pager;
3e170ce0 15006 vm_map_offset_t pmap_offset;
91447636
A
15007
15008 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
15009 (sync_flags & VM_SYNC_SYNCHRONOUS))
15010 return(KERN_INVALID_ARGUMENT);
1c79356b
A
15011
15012 /*
91447636 15013 * align address and size on page boundaries
1c79356b 15014 */
39236c6e
A
15015 size = (vm_map_round_page(address + size,
15016 VM_MAP_PAGE_MASK(map)) -
15017 vm_map_trunc_page(address,
15018 VM_MAP_PAGE_MASK(map)));
15019 address = vm_map_trunc_page(address,
15020 VM_MAP_PAGE_MASK(map));
1c79356b 15021
91447636
A
15022 if (map == VM_MAP_NULL)
15023 return(KERN_INVALID_TASK);
1c79356b 15024
91447636
A
15025 if (size == 0)
15026 return(KERN_SUCCESS);
1c79356b 15027
91447636
A
15028 queue_init(&req_q);
15029 amount_left = size;
1c79356b 15030
91447636
A
15031 while (amount_left > 0) {
15032 vm_object_size_t flush_size;
15033 vm_object_t object;
1c79356b 15034
91447636
A
15035 vm_map_lock(map);
15036 if (!vm_map_lookup_entry(map,
3e170ce0 15037 address,
39236c6e 15038 &entry)) {
91447636 15039
2d21ac55 15040 vm_map_size_t skip;
91447636
A
15041
15042 /*
15043 * hole in the address map.
15044 */
15045 had_hole = TRUE;
15046
39037602
A
15047 if (sync_flags & VM_SYNC_KILLPAGES) {
15048 /*
15049 * For VM_SYNC_KILLPAGES, there should be
15050 * no holes in the range, since we couldn't
15051 * prevent someone else from allocating in
15052 * that hole and we wouldn't want to "kill"
15053 * their pages.
15054 */
15055 vm_map_unlock(map);
15056 break;
15057 }
15058
91447636
A
15059 /*
15060 * Check for empty map.
15061 */
15062 if (entry == vm_map_to_entry(map) &&
15063 entry->vme_next == entry) {
15064 vm_map_unlock(map);
15065 break;
15066 }
15067 /*
15068 * Check that we don't wrap and that
15069 * we have at least one real map entry.
15070 */
15071 if ((map->hdr.nentries == 0) ||
15072 (entry->vme_next->vme_start < address)) {
15073 vm_map_unlock(map);
15074 break;
15075 }
15076 /*
15077 * Move up to the next entry if needed
15078 */
15079 skip = (entry->vme_next->vme_start - address);
15080 if (skip >= amount_left)
15081 amount_left = 0;
15082 else
15083 amount_left -= skip;
15084 address = entry->vme_next->vme_start;
15085 vm_map_unlock(map);
15086 continue;
15087 }
1c79356b 15088
91447636 15089 offset = address - entry->vme_start;
3e170ce0 15090 pmap_offset = address;
1c79356b 15091
91447636
A
15092 /*
15093 * do we have more to flush than is contained in this
15094 * entry ?
15095 */
15096 if (amount_left + entry->vme_start + offset > entry->vme_end) {
15097 flush_size = entry->vme_end -
2d21ac55 15098 (entry->vme_start + offset);
91447636
A
15099 } else {
15100 flush_size = amount_left;
15101 }
15102 amount_left -= flush_size;
15103 address += flush_size;
1c79356b 15104
91447636
A
15105 if (entry->is_sub_map == TRUE) {
15106 vm_map_t local_map;
15107 vm_map_offset_t local_offset;
1c79356b 15108
3e170ce0
A
15109 local_map = VME_SUBMAP(entry);
15110 local_offset = VME_OFFSET(entry);
91447636
A
15111 vm_map_unlock(map);
15112 if (vm_map_msync(
2d21ac55
A
15113 local_map,
15114 local_offset,
15115 flush_size,
15116 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
15117 had_hole = TRUE;
15118 }
15119 continue;
15120 }
3e170ce0 15121 object = VME_OBJECT(entry);
1c79356b 15122
91447636
A
15123 /*
15124 * We can't sync this object if the object has not been
15125 * created yet
15126 */
15127 if (object == VM_OBJECT_NULL) {
15128 vm_map_unlock(map);
15129 continue;
15130 }
3e170ce0 15131 offset += VME_OFFSET(entry);
1c79356b 15132
91447636 15133 vm_object_lock(object);
1c79356b 15134
91447636 15135 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
15136 int kill_pages = 0;
15137 boolean_t reusable_pages = FALSE;
91447636
A
15138
15139 if (sync_flags & VM_SYNC_KILLPAGES) {
39037602
A
15140 if (((object->ref_count == 1) ||
15141 ((object->copy_strategy !=
15142 MEMORY_OBJECT_COPY_SYMMETRIC) &&
15143 (object->copy == VM_OBJECT_NULL))) &&
15144 (object->shadow == VM_OBJECT_NULL)) {
15145 if (object->ref_count != 1) {
15146 vm_page_stats_reusable.free_shared++;
15147 }
91447636 15148 kill_pages = 1;
39037602 15149 } else {
91447636 15150 kill_pages = -1;
39037602 15151 }
91447636
A
15152 }
15153 if (kill_pages != -1)
3e170ce0
A
15154 vm_object_deactivate_pages(
15155 object,
15156 offset,
15157 (vm_object_size_t) flush_size,
15158 kill_pages,
15159 reusable_pages,
15160 map->pmap,
15161 pmap_offset);
91447636
A
15162 vm_object_unlock(object);
15163 vm_map_unlock(map);
15164 continue;
1c79356b 15165 }
91447636
A
15166 /*
15167 * We can't sync this object if there isn't a pager.
15168 * Don't bother to sync internal objects, since there can't
15169 * be any "permanent" storage for these objects anyway.
15170 */
15171 if ((object->pager == MEMORY_OBJECT_NULL) ||
15172 (object->internal) || (object->private)) {
15173 vm_object_unlock(object);
15174 vm_map_unlock(map);
15175 continue;
15176 }
15177 /*
15178 * keep reference on the object until syncing is done
15179 */
2d21ac55 15180 vm_object_reference_locked(object);
91447636 15181 vm_object_unlock(object);
1c79356b 15182
91447636 15183 vm_map_unlock(map);
1c79356b 15184
91447636 15185 do_sync_req = vm_object_sync(object,
2d21ac55
A
15186 offset,
15187 flush_size,
15188 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
15189 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
15190 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 15191 sync_flags & VM_SYNC_SYNCHRONOUS);
91447636
A
15192 /*
15193 * only send a m_o_s if we returned pages or if the entry
15194 * is writable (ie dirty pages may have already been sent back)
15195 */
b0d623f7 15196 if (!do_sync_req) {
2d21ac55
A
15197 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
15198 /*
15199 * clear out the clustering and read-ahead hints
15200 */
15201 vm_object_lock(object);
15202
15203 object->pages_created = 0;
15204 object->pages_used = 0;
15205 object->sequential = 0;
15206 object->last_alloc = 0;
15207
15208 vm_object_unlock(object);
15209 }
91447636
A
15210 vm_object_deallocate(object);
15211 continue;
1c79356b 15212 }
91447636 15213 msync_req_alloc(new_msr);
1c79356b 15214
91447636
A
15215 vm_object_lock(object);
15216 offset += object->paging_offset;
1c79356b 15217
91447636
A
15218 new_msr->offset = offset;
15219 new_msr->length = flush_size;
15220 new_msr->object = object;
15221 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
2d21ac55
A
15222 re_iterate:
15223
15224 /*
15225 * We can't sync this object if there isn't a pager. The
15226 * pager can disappear anytime we're not holding the object
15227 * lock. So this has to be checked anytime we goto re_iterate.
15228 */
15229
15230 pager = object->pager;
15231
15232 if (pager == MEMORY_OBJECT_NULL) {
15233 vm_object_unlock(object);
15234 vm_object_deallocate(object);
39236c6e
A
15235 msync_req_free(new_msr);
15236 new_msr = NULL;
2d21ac55
A
15237 continue;
15238 }
15239
91447636
A
15240 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
15241 /*
15242 * need to check for overlapping entry, if found, wait
15243 * on overlapping msr to be done, then reiterate
15244 */
15245 msr_lock(msr);
15246 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
15247 ((offset >= msr->offset &&
15248 offset < (msr->offset + msr->length)) ||
15249 (msr->offset >= offset &&
15250 msr->offset < (offset + flush_size))))
15251 {
15252 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
15253 msr_unlock(msr);
15254 vm_object_unlock(object);
15255 thread_block(THREAD_CONTINUE_NULL);
15256 vm_object_lock(object);
15257 goto re_iterate;
15258 }
15259 msr_unlock(msr);
15260 }/* queue_iterate */
1c79356b 15261
91447636 15262 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
2d21ac55
A
15263
15264 vm_object_paging_begin(object);
91447636 15265 vm_object_unlock(object);
1c79356b 15266
91447636
A
15267 queue_enter(&req_q, new_msr, msync_req_t, req_q);
15268
15269 (void) memory_object_synchronize(
2d21ac55
A
15270 pager,
15271 offset,
15272 flush_size,
15273 sync_flags & ~VM_SYNC_CONTIGUOUS);
15274
15275 vm_object_lock(object);
15276 vm_object_paging_end(object);
15277 vm_object_unlock(object);
91447636
A
15278 }/* while */
15279
15280 /*
15281 * wait for memory_object_sychronize_completed messages from pager(s)
15282 */
15283
15284 while (!queue_empty(&req_q)) {
15285 msr = (msync_req_t)queue_first(&req_q);
15286 msr_lock(msr);
15287 while(msr->flag != VM_MSYNC_DONE) {
15288 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
15289 msr_unlock(msr);
15290 thread_block(THREAD_CONTINUE_NULL);
15291 msr_lock(msr);
15292 }/* while */
15293 queue_remove(&req_q, msr, msync_req_t, req_q);
15294 msr_unlock(msr);
15295 vm_object_deallocate(msr->object);
15296 msync_req_free(msr);
15297 }/* queue_iterate */
15298
15299 /* for proper msync() behaviour */
15300 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
15301 return(KERN_INVALID_ADDRESS);
15302
15303 return(KERN_SUCCESS);
15304}/* vm_msync */
1c79356b 15305
1c79356b 15306/*
91447636
A
15307 * Routine: convert_port_entry_to_map
15308 * Purpose:
15309 * Convert from a port specifying an entry or a task
15310 * to a map. Doesn't consume the port ref; produces a map ref,
15311 * which may be null. Unlike convert_port_to_map, the
15312 * port may be task or a named entry backed.
15313 * Conditions:
15314 * Nothing locked.
1c79356b 15315 */
1c79356b 15316
1c79356b 15317
91447636
A
15318vm_map_t
15319convert_port_entry_to_map(
15320 ipc_port_t port)
15321{
15322 vm_map_t map;
15323 vm_named_entry_t named_entry;
2d21ac55 15324 uint32_t try_failed_count = 0;
1c79356b 15325
91447636
A
15326 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15327 while(TRUE) {
15328 ip_lock(port);
15329 if(ip_active(port) && (ip_kotype(port)
2d21ac55 15330 == IKOT_NAMED_ENTRY)) {
91447636 15331 named_entry =
2d21ac55 15332 (vm_named_entry_t)port->ip_kobject;
b0d623f7 15333 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15334 ip_unlock(port);
2d21ac55
A
15335
15336 try_failed_count++;
15337 mutex_pause(try_failed_count);
91447636
A
15338 continue;
15339 }
15340 named_entry->ref_count++;
b0d623f7 15341 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
15342 ip_unlock(port);
15343 if ((named_entry->is_sub_map) &&
2d21ac55
A
15344 (named_entry->protection
15345 & VM_PROT_WRITE)) {
91447636
A
15346 map = named_entry->backing.map;
15347 } else {
15348 mach_destroy_memory_entry(port);
15349 return VM_MAP_NULL;
15350 }
15351 vm_map_reference_swap(map);
15352 mach_destroy_memory_entry(port);
15353 break;
15354 }
15355 else
15356 return VM_MAP_NULL;
15357 }
1c79356b 15358 }
91447636
A
15359 else
15360 map = convert_port_to_map(port);
1c79356b 15361
91447636
A
15362 return map;
15363}
1c79356b 15364
91447636
A
15365/*
15366 * Routine: convert_port_entry_to_object
15367 * Purpose:
15368 * Convert from a port specifying a named entry to an
15369 * object. Doesn't consume the port ref; produces a map ref,
15370 * which may be null.
15371 * Conditions:
15372 * Nothing locked.
15373 */
1c79356b 15374
1c79356b 15375
91447636
A
15376vm_object_t
15377convert_port_entry_to_object(
15378 ipc_port_t port)
15379{
39236c6e 15380 vm_object_t object = VM_OBJECT_NULL;
91447636 15381 vm_named_entry_t named_entry;
39236c6e
A
15382 uint32_t try_failed_count = 0;
15383
15384 if (IP_VALID(port) &&
15385 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15386 try_again:
15387 ip_lock(port);
15388 if (ip_active(port) &&
15389 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15390 named_entry = (vm_named_entry_t)port->ip_kobject;
15391 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15392 ip_unlock(port);
39236c6e
A
15393 try_failed_count++;
15394 mutex_pause(try_failed_count);
15395 goto try_again;
15396 }
15397 named_entry->ref_count++;
15398 lck_mtx_unlock(&(named_entry)->Lock);
15399 ip_unlock(port);
15400 if (!(named_entry->is_sub_map) &&
15401 !(named_entry->is_pager) &&
15402 !(named_entry->is_copy) &&
15403 (named_entry->protection & VM_PROT_WRITE)) {
15404 object = named_entry->backing.object;
15405 vm_object_reference(object);
91447636 15406 }
39236c6e 15407 mach_destroy_memory_entry(port);
1c79356b 15408 }
1c79356b 15409 }
91447636
A
15410
15411 return object;
1c79356b 15412}
9bccf70c
A
15413
15414/*
91447636
A
15415 * Export routines to other components for the things we access locally through
15416 * macros.
9bccf70c 15417 */
91447636
A
15418#undef current_map
15419vm_map_t
15420current_map(void)
9bccf70c 15421{
91447636 15422 return (current_map_fast());
9bccf70c
A
15423}
15424
15425/*
15426 * vm_map_reference:
15427 *
15428 * Most code internal to the osfmk will go through a
15429 * macro defining this. This is always here for the
15430 * use of other kernel components.
15431 */
15432#undef vm_map_reference
15433void
15434vm_map_reference(
39037602 15435 vm_map_t map)
9bccf70c
A
15436{
15437 if (map == VM_MAP_NULL)
15438 return;
15439
b0d623f7 15440 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15441#if TASK_SWAPPER
15442 assert(map->res_count > 0);
15443 assert(map->ref_count >= map->res_count);
15444 map->res_count++;
15445#endif
15446 map->ref_count++;
b0d623f7 15447 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15448}
15449
15450/*
15451 * vm_map_deallocate:
15452 *
15453 * Removes a reference from the specified map,
15454 * destroying it if no references remain.
15455 * The map should not be locked.
15456 */
15457void
15458vm_map_deallocate(
39037602 15459 vm_map_t map)
9bccf70c
A
15460{
15461 unsigned int ref;
15462
15463 if (map == VM_MAP_NULL)
15464 return;
15465
b0d623f7 15466 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15467 ref = --map->ref_count;
15468 if (ref > 0) {
15469 vm_map_res_deallocate(map);
b0d623f7 15470 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15471 return;
15472 }
15473 assert(map->ref_count == 0);
b0d623f7 15474 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15475
15476#if TASK_SWAPPER
15477 /*
15478 * The map residence count isn't decremented here because
15479 * the vm_map_delete below will traverse the entire map,
15480 * deleting entries, and the residence counts on objects
15481 * and sharing maps will go away then.
15482 */
15483#endif
15484
2d21ac55 15485 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 15486}
91447636 15487
91447636 15488
0c530ab8
A
15489void
15490vm_map_disable_NX(vm_map_t map)
15491{
15492 if (map == NULL)
15493 return;
15494 if (map->pmap == NULL)
15495 return;
15496
15497 pmap_disable_NX(map->pmap);
15498}
15499
6d2010ae
A
15500void
15501vm_map_disallow_data_exec(vm_map_t map)
15502{
15503 if (map == NULL)
15504 return;
15505
15506 map->map_disallow_data_exec = TRUE;
15507}
15508
0c530ab8
A
15509/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15510 * more descriptive.
15511 */
15512void
15513vm_map_set_32bit(vm_map_t map)
15514{
15515 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15516}
15517
15518
15519void
15520vm_map_set_64bit(vm_map_t map)
15521{
15522 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15523}
15524
15525vm_map_offset_t
3e170ce0 15526vm_compute_max_offset(boolean_t is64)
0c530ab8
A
15527{
15528 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15529}
15530
39236c6e
A
15531uint64_t
15532vm_map_get_max_aslr_slide_pages(vm_map_t map)
15533{
15534 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15535}
15536
0c530ab8 15537boolean_t
2d21ac55
A
15538vm_map_is_64bit(
15539 vm_map_t map)
15540{
15541 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15542}
15543
15544boolean_t
316670eb
A
15545vm_map_has_hard_pagezero(
15546 vm_map_t map,
15547 vm_map_offset_t pagezero_size)
0c530ab8
A
15548{
15549 /*
15550 * XXX FBDP
15551 * We should lock the VM map (for read) here but we can get away
15552 * with it for now because there can't really be any race condition:
15553 * the VM map's min_offset is changed only when the VM map is created
15554 * and when the zero page is established (when the binary gets loaded),
15555 * and this routine gets called only when the task terminates and the
15556 * VM map is being torn down, and when a new map is created via
15557 * load_machfile()/execve().
15558 */
316670eb 15559 return (map->min_offset >= pagezero_size);
0c530ab8
A
15560}
15561
316670eb
A
15562/*
15563 * Raise a VM map's maximun offset.
15564 */
15565kern_return_t
15566vm_map_raise_max_offset(
15567 vm_map_t map,
15568 vm_map_offset_t new_max_offset)
15569{
15570 kern_return_t ret;
15571
15572 vm_map_lock(map);
15573 ret = KERN_INVALID_ADDRESS;
15574
15575 if (new_max_offset >= map->max_offset) {
15576 if (!vm_map_is_64bit(map)) {
15577 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15578 map->max_offset = new_max_offset;
15579 ret = KERN_SUCCESS;
15580 }
15581 } else {
15582 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15583 map->max_offset = new_max_offset;
15584 ret = KERN_SUCCESS;
15585 }
15586 }
15587 }
15588
15589 vm_map_unlock(map);
15590 return ret;
15591}
15592
15593
0c530ab8
A
15594/*
15595 * Raise a VM map's minimum offset.
15596 * To strictly enforce "page zero" reservation.
15597 */
15598kern_return_t
15599vm_map_raise_min_offset(
15600 vm_map_t map,
15601 vm_map_offset_t new_min_offset)
15602{
15603 vm_map_entry_t first_entry;
15604
39236c6e
A
15605 new_min_offset = vm_map_round_page(new_min_offset,
15606 VM_MAP_PAGE_MASK(map));
0c530ab8
A
15607
15608 vm_map_lock(map);
15609
15610 if (new_min_offset < map->min_offset) {
15611 /*
15612 * Can't move min_offset backwards, as that would expose
15613 * a part of the address space that was previously, and for
15614 * possibly good reasons, inaccessible.
15615 */
15616 vm_map_unlock(map);
15617 return KERN_INVALID_ADDRESS;
15618 }
3e170ce0
A
15619 if (new_min_offset >= map->max_offset) {
15620 /* can't go beyond the end of the address space */
15621 vm_map_unlock(map);
15622 return KERN_INVALID_ADDRESS;
15623 }
0c530ab8
A
15624
15625 first_entry = vm_map_first_entry(map);
15626 if (first_entry != vm_map_to_entry(map) &&
15627 first_entry->vme_start < new_min_offset) {
15628 /*
15629 * Some memory was already allocated below the new
15630 * minimun offset. It's too late to change it now...
15631 */
15632 vm_map_unlock(map);
15633 return KERN_NO_SPACE;
15634 }
15635
15636 map->min_offset = new_min_offset;
15637
3e170ce0
A
15638 assert(map->holes_list);
15639 map->holes_list->start = new_min_offset;
15640 assert(new_min_offset < map->holes_list->end);
15641
0c530ab8
A
15642 vm_map_unlock(map);
15643
15644 return KERN_SUCCESS;
15645}
2d21ac55
A
15646
15647/*
15648 * Set the limit on the maximum amount of user wired memory allowed for this map.
15649 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15650 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
15651 * don't have to reach over to the BSD data structures.
15652 */
15653
15654void
15655vm_map_set_user_wire_limit(vm_map_t map,
15656 vm_size_t limit)
15657{
15658 map->user_wire_limit = limit;
15659}
593a1d5f 15660
b0d623f7
A
15661
15662void vm_map_switch_protect(vm_map_t map,
15663 boolean_t val)
593a1d5f
A
15664{
15665 vm_map_lock(map);
b0d623f7 15666 map->switch_protect=val;
593a1d5f 15667 vm_map_unlock(map);
b0d623f7 15668}
b7266188 15669
39236c6e
A
15670/*
15671 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15672 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15673 * bump both counters.
15674 */
15675void
15676vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15677{
15678 pmap_t pmap = vm_map_pmap(map);
15679
fe8ab488 15680 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15681 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15682}
15683
15684void
15685vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15686{
15687 pmap_t pmap = vm_map_pmap(map);
15688
fe8ab488 15689 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15690 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15691}
15692
b7266188
A
15693/* Add (generate) code signature for memory range */
15694#if CONFIG_DYNAMIC_CODE_SIGNING
15695kern_return_t vm_map_sign(vm_map_t map,
15696 vm_map_offset_t start,
15697 vm_map_offset_t end)
15698{
15699 vm_map_entry_t entry;
15700 vm_page_t m;
15701 vm_object_t object;
15702
15703 /*
15704 * Vet all the input parameters and current type and state of the
15705 * underlaying object. Return with an error if anything is amiss.
15706 */
15707 if (map == VM_MAP_NULL)
15708 return(KERN_INVALID_ARGUMENT);
15709
15710 vm_map_lock_read(map);
15711
15712 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15713 /*
15714 * Must pass a valid non-submap address.
15715 */
15716 vm_map_unlock_read(map);
15717 return(KERN_INVALID_ADDRESS);
15718 }
15719
15720 if((entry->vme_start > start) || (entry->vme_end < end)) {
15721 /*
15722 * Map entry doesn't cover the requested range. Not handling
15723 * this situation currently.
15724 */
15725 vm_map_unlock_read(map);
15726 return(KERN_INVALID_ARGUMENT);
15727 }
15728
3e170ce0 15729 object = VME_OBJECT(entry);
b7266188
A
15730 if (object == VM_OBJECT_NULL) {
15731 /*
15732 * Object must already be present or we can't sign.
15733 */
15734 vm_map_unlock_read(map);
15735 return KERN_INVALID_ARGUMENT;
15736 }
15737
15738 vm_object_lock(object);
15739 vm_map_unlock_read(map);
15740
15741 while(start < end) {
15742 uint32_t refmod;
15743
3e170ce0
A
15744 m = vm_page_lookup(object,
15745 start - entry->vme_start + VME_OFFSET(entry));
b7266188
A
15746 if (m==VM_PAGE_NULL) {
15747 /* shoud we try to fault a page here? we can probably
15748 * demand it exists and is locked for this request */
15749 vm_object_unlock(object);
15750 return KERN_FAILURE;
15751 }
15752 /* deal with special page status */
15753 if (m->busy ||
15754 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15755 vm_object_unlock(object);
15756 return KERN_FAILURE;
15757 }
15758
15759 /* Page is OK... now "validate" it */
15760 /* This is the place where we'll call out to create a code
15761 * directory, later */
15762 m->cs_validated = TRUE;
15763
15764 /* The page is now "clean" for codesigning purposes. That means
15765 * we don't consider it as modified (wpmapped) anymore. But
15766 * we'll disconnect the page so we note any future modification
15767 * attempts. */
15768 m->wpmapped = FALSE;
39037602 15769 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
b7266188
A
15770
15771 /* Pull the dirty status from the pmap, since we cleared the
15772 * wpmapped bit */
15773 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
316670eb 15774 SET_PAGE_DIRTY(m, FALSE);
b7266188
A
15775 }
15776
15777 /* On to the next page */
15778 start += PAGE_SIZE;
15779 }
15780 vm_object_unlock(object);
15781
15782 return KERN_SUCCESS;
15783}
15784#endif
6d2010ae 15785
fe8ab488
A
15786kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15787{
15788 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
15789 vm_map_entry_t next_entry;
15790 kern_return_t kr = KERN_SUCCESS;
15791 vm_map_t zap_map;
15792
15793 vm_map_lock(map);
15794
15795 /*
15796 * We use a "zap_map" to avoid having to unlock
15797 * the "map" in vm_map_delete().
15798 */
15799 zap_map = vm_map_create(PMAP_NULL,
15800 map->min_offset,
15801 map->max_offset,
15802 map->hdr.entries_pageable);
15803
15804 if (zap_map == VM_MAP_NULL) {
15805 return KERN_RESOURCE_SHORTAGE;
15806 }
15807
15808 vm_map_set_page_shift(zap_map,
15809 VM_MAP_PAGE_SHIFT(map));
3e170ce0 15810 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
15811
15812 for (entry = vm_map_first_entry(map);
15813 entry != vm_map_to_entry(map);
15814 entry = next_entry) {
15815 next_entry = entry->vme_next;
15816
3e170ce0
A
15817 if (VME_OBJECT(entry) &&
15818 !entry->is_sub_map &&
15819 (VME_OBJECT(entry)->internal == TRUE) &&
15820 (VME_OBJECT(entry)->ref_count == 1)) {
fe8ab488 15821
3e170ce0
A
15822 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15823 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488
A
15824
15825 (void)vm_map_delete(map,
15826 entry->vme_start,
15827 entry->vme_end,
15828 VM_MAP_REMOVE_SAVE_ENTRIES,
15829 zap_map);
15830 }
15831 }
15832
15833 vm_map_unlock(map);
15834
15835 /*
15836 * Get rid of the "zap_maps" and all the map entries that
15837 * they may still contain.
15838 */
15839 if (zap_map != VM_MAP_NULL) {
15840 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15841 zap_map = VM_MAP_NULL;
15842 }
15843
15844 return kr;
15845}
15846
6d2010ae 15847
39037602
A
15848#if DEVELOPMENT || DEBUG
15849
15850int
15851vm_map_disconnect_page_mappings(
15852 vm_map_t map,
15853 boolean_t do_unnest)
6d2010ae
A
15854{
15855 vm_map_entry_t entry;
39037602
A
15856 int page_count = 0;
15857
15858 if (do_unnest == TRUE) {
15859#ifndef NO_NESTED_PMAP
15860 vm_map_lock(map);
15861
15862 for (entry = vm_map_first_entry(map);
15863 entry != vm_map_to_entry(map);
15864 entry = entry->vme_next) {
15865
15866 if (entry->is_sub_map && entry->use_pmap) {
15867 /*
15868 * Make sure the range between the start of this entry and
15869 * the end of this entry is no longer nested, so that
15870 * we will only remove mappings from the pmap in use by this
15871 * this task
15872 */
15873 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
15874 }
15875 }
15876 vm_map_unlock(map);
15877#endif
15878 }
6d2010ae 15879 vm_map_lock_read(map);
39037602
A
15880
15881 page_count = map->pmap->stats.resident_count;
15882
6d2010ae
A
15883 for (entry = vm_map_first_entry(map);
15884 entry != vm_map_to_entry(map);
15885 entry = entry->vme_next) {
6d2010ae 15886
39037602
A
15887 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
15888 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
15889 continue;
15890 }
39037602
A
15891 if (entry->is_sub_map)
15892 assert(!entry->use_pmap);
6d2010ae 15893
39037602 15894 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 15895 }
6d2010ae
A
15896 vm_map_unlock_read(map);
15897
39037602 15898 return page_count;
6d2010ae
A
15899}
15900
39037602
A
15901#endif
15902
15903
15904#if CONFIG_FREEZE
15905
15906
3e170ce0
A
15907int c_freezer_swapout_count;
15908int c_freezer_compression_count = 0;
15909AbsoluteTime c_freezer_last_yield_ts = 0;
15910
6d2010ae
A
15911kern_return_t vm_map_freeze(
15912 vm_map_t map,
15913 unsigned int *purgeable_count,
15914 unsigned int *wired_count,
15915 unsigned int *clean_count,
15916 unsigned int *dirty_count,
39037602 15917 __unused unsigned int dirty_budget,
6d2010ae
A
15918 boolean_t *has_shared)
15919{
39236c6e
A
15920 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
15921 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
15922
15923 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15924 *has_shared = FALSE;
15925
6d2010ae
A
15926 /*
15927 * We need the exclusive lock here so that we can
15928 * block any page faults or lookups while we are
15929 * in the middle of freezing this vm map.
15930 */
15931 vm_map_lock(map);
15932
39037602
A
15933 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
15934
15935 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15936 kr = KERN_NO_SPACE;
15937 goto done;
6d2010ae 15938 }
39037602 15939
3e170ce0
A
15940 c_freezer_compression_count = 0;
15941 clock_get_uptime(&c_freezer_last_yield_ts);
15942
6d2010ae
A
15943 for (entry2 = vm_map_first_entry(map);
15944 entry2 != vm_map_to_entry(map);
15945 entry2 = entry2->vme_next) {
15946
3e170ce0 15947 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 15948
39037602 15949 if (src_object &&
3e170ce0 15950 !entry2->is_sub_map &&
39037602 15951 !src_object->phys_contiguous) {
39236c6e 15952 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 15953
39037602 15954 if (src_object->internal == TRUE) {
3e170ce0 15955
39037602
A
15956 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
15957 /*
15958 * Pages belonging to this object could be swapped to disk.
15959 * Make sure it's not a shared object because we could end
15960 * up just bringing it back in again.
15961 */
15962 if (src_object->ref_count > 1) {
15963 continue;
3e170ce0 15964 }
3e170ce0 15965 }
39037602 15966 vm_object_compressed_freezer_pageout(src_object);
3e170ce0
A
15967
15968 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15969 kr = KERN_NO_SPACE;
15970 break;
39236c6e 15971 }
6d2010ae
A
15972 }
15973 }
15974 }
6d2010ae
A
15975done:
15976 vm_map_unlock(map);
6d2010ae 15977
39037602
A
15978 vm_object_compressed_freezer_done();
15979
15980 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3e170ce0
A
15981 /*
15982 * reset the counter tracking the # of swapped c_segs
15983 * because we are now done with this freeze session and task.
15984 */
15985 c_freezer_swapout_count = 0;
15986 }
6d2010ae
A
15987 return kr;
15988}
15989
6d2010ae 15990#endif
e2d2fc5c 15991
e2d2fc5c
A
15992/*
15993 * vm_map_entry_should_cow_for_true_share:
15994 *
15995 * Determines if the map entry should be clipped and setup for copy-on-write
15996 * to avoid applying "true_share" to a large VM object when only a subset is
15997 * targeted.
15998 *
15999 * For now, we target only the map entries created for the Objective C
16000 * Garbage Collector, which initially have the following properties:
16001 * - alias == VM_MEMORY_MALLOC
16002 * - wired_count == 0
16003 * - !needs_copy
16004 * and a VM object with:
16005 * - internal
16006 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
16007 * - !true_share
16008 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
16009 *
16010 * Only non-kernel map entries.
e2d2fc5c
A
16011 */
16012boolean_t
16013vm_map_entry_should_cow_for_true_share(
16014 vm_map_entry_t entry)
16015{
16016 vm_object_t object;
16017
16018 if (entry->is_sub_map) {
16019 /* entry does not point at a VM object */
16020 return FALSE;
16021 }
16022
16023 if (entry->needs_copy) {
16024 /* already set for copy_on_write: done! */
16025 return FALSE;
16026 }
16027
3e170ce0
A
16028 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
16029 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 16030 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
16031 return FALSE;
16032 }
16033
16034 if (entry->wired_count) {
16035 /* wired: can't change the map entry... */
fe8ab488 16036 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
16037 return FALSE;
16038 }
16039
3e170ce0 16040 object = VME_OBJECT(entry);
e2d2fc5c
A
16041
16042 if (object == VM_OBJECT_NULL) {
16043 /* no object yet... */
16044 return FALSE;
16045 }
16046
16047 if (!object->internal) {
16048 /* not an internal object */
16049 return FALSE;
16050 }
16051
16052 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16053 /* not the default copy strategy */
16054 return FALSE;
16055 }
16056
16057 if (object->true_share) {
16058 /* already true_share: too late to avoid it */
16059 return FALSE;
16060 }
16061
3e170ce0 16062 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
16063 object->vo_size != ANON_CHUNK_SIZE) {
16064 /* ... not an object created for the ObjC Garbage Collector */
16065 return FALSE;
16066 }
16067
3e170ce0 16068 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
16069 object->vo_size != 2048 * 4096) {
16070 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
16071 return FALSE;
16072 }
16073
16074 /*
16075 * All the criteria match: we have a large object being targeted for "true_share".
16076 * To limit the adverse side-effects linked with "true_share", tell the caller to
16077 * try and avoid setting up the entire object for "true_share" by clipping the
16078 * targeted range and setting it up for copy-on-write.
16079 */
16080 return TRUE;
16081}
39236c6e
A
16082
16083vm_map_offset_t
16084vm_map_round_page_mask(
16085 vm_map_offset_t offset,
16086 vm_map_offset_t mask)
16087{
16088 return VM_MAP_ROUND_PAGE(offset, mask);
16089}
16090
16091vm_map_offset_t
16092vm_map_trunc_page_mask(
16093 vm_map_offset_t offset,
16094 vm_map_offset_t mask)
16095{
16096 return VM_MAP_TRUNC_PAGE(offset, mask);
16097}
16098
3e170ce0
A
16099boolean_t
16100vm_map_page_aligned(
16101 vm_map_offset_t offset,
16102 vm_map_offset_t mask)
16103{
16104 return ((offset) & mask) == 0;
16105}
16106
39236c6e
A
16107int
16108vm_map_page_shift(
16109 vm_map_t map)
16110{
16111 return VM_MAP_PAGE_SHIFT(map);
16112}
16113
16114int
16115vm_map_page_size(
16116 vm_map_t map)
16117{
16118 return VM_MAP_PAGE_SIZE(map);
16119}
16120
3e170ce0 16121vm_map_offset_t
39236c6e
A
16122vm_map_page_mask(
16123 vm_map_t map)
16124{
16125 return VM_MAP_PAGE_MASK(map);
16126}
16127
16128kern_return_t
16129vm_map_set_page_shift(
16130 vm_map_t map,
16131 int pageshift)
16132{
16133 if (map->hdr.nentries != 0) {
16134 /* too late to change page size */
16135 return KERN_FAILURE;
16136 }
16137
16138 map->hdr.page_shift = pageshift;
16139
16140 return KERN_SUCCESS;
16141}
16142
16143kern_return_t
16144vm_map_query_volatile(
16145 vm_map_t map,
16146 mach_vm_size_t *volatile_virtual_size_p,
16147 mach_vm_size_t *volatile_resident_size_p,
3e170ce0
A
16148 mach_vm_size_t *volatile_compressed_size_p,
16149 mach_vm_size_t *volatile_pmap_size_p,
16150 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e
A
16151{
16152 mach_vm_size_t volatile_virtual_size;
16153 mach_vm_size_t volatile_resident_count;
3e170ce0 16154 mach_vm_size_t volatile_compressed_count;
39236c6e 16155 mach_vm_size_t volatile_pmap_count;
3e170ce0 16156 mach_vm_size_t volatile_compressed_pmap_count;
39236c6e
A
16157 mach_vm_size_t resident_count;
16158 vm_map_entry_t entry;
16159 vm_object_t object;
16160
16161 /* map should be locked by caller */
16162
16163 volatile_virtual_size = 0;
16164 volatile_resident_count = 0;
3e170ce0 16165 volatile_compressed_count = 0;
39236c6e 16166 volatile_pmap_count = 0;
3e170ce0 16167 volatile_compressed_pmap_count = 0;
39236c6e
A
16168
16169 for (entry = vm_map_first_entry(map);
16170 entry != vm_map_to_entry(map);
16171 entry = entry->vme_next) {
4bd07ac2
A
16172 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
16173
39236c6e
A
16174 if (entry->is_sub_map) {
16175 continue;
16176 }
16177 if (! (entry->protection & VM_PROT_WRITE)) {
16178 continue;
16179 }
3e170ce0 16180 object = VME_OBJECT(entry);
39236c6e
A
16181 if (object == VM_OBJECT_NULL) {
16182 continue;
16183 }
3e170ce0
A
16184 if (object->purgable != VM_PURGABLE_VOLATILE &&
16185 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
16186 continue;
16187 }
3e170ce0 16188 if (VME_OFFSET(entry)) {
39236c6e
A
16189 /*
16190 * If the map entry has been split and the object now
16191 * appears several times in the VM map, we don't want
16192 * to count the object's resident_page_count more than
16193 * once. We count it only for the first one, starting
16194 * at offset 0 and ignore the other VM map entries.
16195 */
16196 continue;
16197 }
16198 resident_count = object->resident_page_count;
3e170ce0 16199 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
16200 resident_count = 0;
16201 } else {
3e170ce0 16202 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
16203 }
16204
16205 volatile_virtual_size += entry->vme_end - entry->vme_start;
16206 volatile_resident_count += resident_count;
3e170ce0
A
16207 if (object->pager) {
16208 volatile_compressed_count +=
16209 vm_compressor_pager_get_count(object->pager);
16210 }
4bd07ac2
A
16211 pmap_compressed_bytes = 0;
16212 pmap_resident_bytes =
16213 pmap_query_resident(map->pmap,
16214 entry->vme_start,
16215 entry->vme_end,
16216 &pmap_compressed_bytes);
16217 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
16218 volatile_compressed_pmap_count += (pmap_compressed_bytes
16219 / PAGE_SIZE);
39236c6e
A
16220 }
16221
16222 /* map is still locked on return */
16223
16224 *volatile_virtual_size_p = volatile_virtual_size;
16225 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 16226 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 16227 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 16228 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
16229
16230 return KERN_SUCCESS;
16231}
fe8ab488 16232
3e170ce0
A
16233void
16234vm_map_sizes(vm_map_t map,
16235 vm_map_size_t * psize,
16236 vm_map_size_t * pfree,
16237 vm_map_size_t * plargest_free)
16238{
16239 vm_map_entry_t entry;
16240 vm_map_offset_t prev;
16241 vm_map_size_t free, total_free, largest_free;
16242 boolean_t end;
16243
39037602
A
16244 if (!map)
16245 {
16246 *psize = *pfree = *plargest_free = 0;
16247 return;
16248 }
3e170ce0
A
16249 total_free = largest_free = 0;
16250
16251 vm_map_lock_read(map);
16252 if (psize) *psize = map->max_offset - map->min_offset;
16253
16254 prev = map->min_offset;
16255 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16256 {
16257 end = (entry == vm_map_to_entry(map));
16258
16259 if (end) free = entry->vme_end - prev;
16260 else free = entry->vme_start - prev;
16261
16262 total_free += free;
16263 if (free > largest_free) largest_free = free;
16264
16265 if (end) break;
16266 prev = entry->vme_end;
16267 }
16268 vm_map_unlock_read(map);
16269 if (pfree) *pfree = total_free;
16270 if (plargest_free) *plargest_free = largest_free;
16271}
16272
fe8ab488
A
16273#if VM_SCAN_FOR_SHADOW_CHAIN
16274int vm_map_shadow_max(vm_map_t map);
16275int vm_map_shadow_max(
16276 vm_map_t map)
16277{
16278 int shadows, shadows_max;
16279 vm_map_entry_t entry;
16280 vm_object_t object, next_object;
16281
16282 if (map == NULL)
16283 return 0;
16284
16285 shadows_max = 0;
16286
16287 vm_map_lock_read(map);
16288
16289 for (entry = vm_map_first_entry(map);
16290 entry != vm_map_to_entry(map);
16291 entry = entry->vme_next) {
16292 if (entry->is_sub_map) {
16293 continue;
16294 }
3e170ce0 16295 object = VME_OBJECT(entry);
fe8ab488
A
16296 if (object == NULL) {
16297 continue;
16298 }
16299 vm_object_lock_shared(object);
16300 for (shadows = 0;
16301 object->shadow != NULL;
16302 shadows++, object = next_object) {
16303 next_object = object->shadow;
16304 vm_object_lock_shared(next_object);
16305 vm_object_unlock(object);
16306 }
16307 vm_object_unlock(object);
16308 if (shadows > shadows_max) {
16309 shadows_max = shadows;
16310 }
16311 }
16312
16313 vm_map_unlock_read(map);
16314
16315 return shadows_max;
16316}
16317#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602
A
16318
16319void vm_commit_pagezero_status(vm_map_t lmap) {
16320 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
16321}