]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-3248.30.4.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b
A
84#include <kern/assert.h>
85#include <kern/counters.h>
91447636 86#include <kern/kalloc.h>
1c79356b 87#include <kern/zalloc.h>
91447636
A
88
89#include <vm/cpm.h>
39236c6e 90#include <vm/vm_compressor_pager.h>
1c79356b
A
91#include <vm/vm_init.h>
92#include <vm/vm_fault.h>
93#include <vm/vm_map.h>
94#include <vm/vm_object.h>
95#include <vm/vm_page.h>
b0d623f7 96#include <vm/vm_pageout.h>
1c79356b
A
97#include <vm/vm_kern.h>
98#include <ipc/ipc_port.h>
99#include <kern/sched_prim.h>
100#include <kern/misc_protos.h>
1c79356b
A
101#include <kern/xpr.h>
102
91447636
A
103#include <mach/vm_map_server.h>
104#include <mach/mach_host_server.h>
2d21ac55 105#include <vm/vm_protos.h>
b0d623f7 106#include <vm/vm_purgeable_internal.h>
91447636 107
91447636 108#include <vm/vm_protos.h>
2d21ac55 109#include <vm/vm_shared_region.h>
6d2010ae 110#include <vm/vm_map_store.h>
91447636 111
3e170ce0 112
316670eb 113extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
114/* Internal prototypes
115 */
2d21ac55 116
91447636
A
117static void vm_map_simplify_range(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end); /* forward */
121
122static boolean_t vm_map_range_check(
2d21ac55
A
123 vm_map_t map,
124 vm_map_offset_t start,
125 vm_map_offset_t end,
126 vm_map_entry_t *entry);
1c79356b 127
91447636 128static vm_map_entry_t _vm_map_entry_create(
7ddcb079 129 struct vm_map_header *map_header, boolean_t map_locked);
1c79356b 130
91447636 131static void _vm_map_entry_dispose(
2d21ac55
A
132 struct vm_map_header *map_header,
133 vm_map_entry_t entry);
1c79356b 134
91447636 135static void vm_map_pmap_enter(
2d21ac55
A
136 vm_map_t map,
137 vm_map_offset_t addr,
138 vm_map_offset_t end_addr,
139 vm_object_t object,
140 vm_object_offset_t offset,
141 vm_prot_t protection);
1c79356b 142
91447636 143static void _vm_map_clip_end(
2d21ac55
A
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t end);
91447636
A
147
148static void _vm_map_clip_start(
2d21ac55
A
149 struct vm_map_header *map_header,
150 vm_map_entry_t entry,
151 vm_map_offset_t start);
1c79356b 152
91447636 153static void vm_map_entry_delete(
2d21ac55
A
154 vm_map_t map,
155 vm_map_entry_t entry);
1c79356b 156
91447636 157static kern_return_t vm_map_delete(
2d21ac55
A
158 vm_map_t map,
159 vm_map_offset_t start,
160 vm_map_offset_t end,
161 int flags,
162 vm_map_t zap_map);
1c79356b 163
91447636 164static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
165 vm_map_t dst_map,
166 vm_map_entry_t entry,
167 vm_map_copy_t copy,
39236c6e
A
168 vm_map_address_t start,
169 boolean_t discard_on_success);
1c79356b 170
91447636 171static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
172 vm_map_t dst_map,
173 vm_map_entry_t tmp_entry,
174 vm_map_copy_t copy,
175 vm_map_offset_t start,
176 pmap_t pmap);
1c79356b 177
91447636 178static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
179 vm_map_t src_map,
180 vm_map_address_t src_addr,
181 vm_map_size_t len,
182 boolean_t src_destroy,
183 vm_map_copy_t *copy_result); /* OUT */
1c79356b 184
91447636 185static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
186 vm_map_t map,
187 vm_map_address_t *addr, /* IN/OUT */
188 vm_map_copy_t copy,
39236c6e
A
189 boolean_t overwrite,
190 boolean_t consume_on_success);
1c79356b 191
91447636 192static void vm_map_fork_share(
2d21ac55
A
193 vm_map_t old_map,
194 vm_map_entry_t old_entry,
195 vm_map_t new_map);
1c79356b 196
91447636 197static boolean_t vm_map_fork_copy(
2d21ac55
A
198 vm_map_t old_map,
199 vm_map_entry_t *old_entry_p,
200 vm_map_t new_map);
1c79356b 201
0c530ab8 202void vm_map_region_top_walk(
2d21ac55
A
203 vm_map_entry_t entry,
204 vm_region_top_info_t top);
1c79356b 205
0c530ab8 206void vm_map_region_walk(
2d21ac55
A
207 vm_map_t map,
208 vm_map_offset_t va,
209 vm_map_entry_t entry,
210 vm_object_offset_t offset,
211 vm_object_size_t range,
212 vm_region_extended_info_t extended,
39236c6e
A
213 boolean_t look_for_pages,
214 mach_msg_type_number_t count);
91447636
A
215
216static kern_return_t vm_map_wire_nested(
2d21ac55
A
217 vm_map_t map,
218 vm_map_offset_t start,
219 vm_map_offset_t end,
3e170ce0 220 vm_prot_t caller_prot,
2d21ac55
A
221 boolean_t user_wire,
222 pmap_t map_pmap,
fe8ab488
A
223 vm_map_offset_t pmap_addr,
224 ppnum_t *physpage_p);
91447636
A
225
226static kern_return_t vm_map_unwire_nested(
2d21ac55
A
227 vm_map_t map,
228 vm_map_offset_t start,
229 vm_map_offset_t end,
230 boolean_t user_wire,
231 pmap_t map_pmap,
232 vm_map_offset_t pmap_addr);
91447636
A
233
234static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
235 vm_map_t dst_map,
236 vm_map_offset_t dst_addr,
237 vm_map_size_t dst_size);
91447636
A
238
239static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
240 vm_map_t dst_map,
241 vm_map_offset_t dst_addr,
242 vm_map_copy_t copy,
243 boolean_t interruptible,
6d2010ae
A
244 pmap_t pmap,
245 boolean_t discard_on_success);
91447636
A
246
247static kern_return_t vm_map_remap_extract(
2d21ac55
A
248 vm_map_t map,
249 vm_map_offset_t addr,
250 vm_map_size_t size,
251 boolean_t copy,
252 struct vm_map_header *map_header,
253 vm_prot_t *cur_protection,
254 vm_prot_t *max_protection,
255 vm_inherit_t inheritance,
256 boolean_t pageable);
91447636
A
257
258static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
259 vm_map_t map,
260 vm_map_address_t *address,
261 vm_map_size_t size,
262 vm_map_offset_t mask,
060df5ea 263 int flags,
2d21ac55 264 vm_map_entry_t *map_entry);
91447636
A
265
266static void vm_map_region_look_for_page(
2d21ac55
A
267 vm_map_t map,
268 vm_map_offset_t va,
269 vm_object_t object,
270 vm_object_offset_t offset,
271 int max_refcnt,
272 int depth,
39236c6e
A
273 vm_region_extended_info_t extended,
274 mach_msg_type_number_t count);
91447636
A
275
276static int vm_map_region_count_obj_refs(
2d21ac55
A
277 vm_map_entry_t entry,
278 vm_object_t object);
1c79356b 279
b0d623f7
A
280
281static kern_return_t vm_map_willneed(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286static kern_return_t vm_map_reuse_pages(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291static kern_return_t vm_map_reusable_pages(
292 vm_map_t map,
293 vm_map_offset_t start,
294 vm_map_offset_t end);
295
296static kern_return_t vm_map_can_reuse(
297 vm_map_t map,
298 vm_map_offset_t start,
299 vm_map_offset_t end);
300
3e170ce0
A
301#if MACH_ASSERT
302static kern_return_t vm_map_pageout(
303 vm_map_t map,
304 vm_map_offset_t start,
305 vm_map_offset_t end);
306#endif /* MACH_ASSERT */
6d2010ae 307
1c79356b
A
308/*
309 * Macros to copy a vm_map_entry. We must be careful to correctly
310 * manage the wired page count. vm_map_entry_copy() creates a new
311 * map entry to the same memory - the wired count in the new entry
312 * must be set to zero. vm_map_entry_copy_full() creates a new
313 * entry that is identical to the old entry. This preserves the
314 * wire count; it's used for map splitting and zone changing in
315 * vm_map_copyout.
316 */
316670eb 317
7ddcb079
A
318#define vm_map_entry_copy(NEW,OLD) \
319MACRO_BEGIN \
320boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55
A
321 *(NEW) = *(OLD); \
322 (NEW)->is_shared = FALSE; \
323 (NEW)->needs_wakeup = FALSE; \
324 (NEW)->in_transition = FALSE; \
325 (NEW)->wired_count = 0; \
326 (NEW)->user_wired_count = 0; \
b0d623f7 327 (NEW)->permanent = FALSE; \
316670eb 328 (NEW)->used_for_jit = FALSE; \
fe8ab488
A
329 (NEW)->from_reserved_zone = _vmec_reserved; \
330 (NEW)->iokit_acct = FALSE; \
3e170ce0
A
331 (NEW)->vme_resilient_codesign = FALSE; \
332 (NEW)->vme_resilient_media = FALSE; \
1c79356b
A
333MACRO_END
334
7ddcb079
A
335#define vm_map_entry_copy_full(NEW,OLD) \
336MACRO_BEGIN \
337boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
338(*(NEW) = *(OLD)); \
339(NEW)->from_reserved_zone = _vmecf_reserved; \
340MACRO_END
1c79356b 341
2d21ac55
A
342/*
343 * Decide if we want to allow processes to execute from their data or stack areas.
344 * override_nx() returns true if we do. Data/stack execution can be enabled independently
345 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
346 * or allow_stack_exec to enable data execution for that type of data area for that particular
347 * ABI (or both by or'ing the flags together). These are initialized in the architecture
348 * specific pmap files since the default behavior varies according to architecture. The
349 * main reason it varies is because of the need to provide binary compatibility with old
350 * applications that were written before these restrictions came into being. In the old
351 * days, an app could execute anything it could read, but this has slowly been tightened
352 * up over time. The default behavior is:
353 *
354 * 32-bit PPC apps may execute from both stack and data areas
355 * 32-bit Intel apps may exeucte from data areas but not stack
356 * 64-bit PPC/Intel apps may not execute from either data or stack
357 *
358 * An application on any architecture may override these defaults by explicitly
359 * adding PROT_EXEC permission to the page in question with the mprotect(2)
360 * system call. This code here just determines what happens when an app tries to
361 * execute from a page that lacks execute permission.
362 *
363 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
364 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
365 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
366 * execution from data areas for a particular binary even if the arch normally permits it. As
367 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
368 * to support some complicated use cases, notably browsers with out-of-process plugins that
369 * are not all NX-safe.
2d21ac55
A
370 */
371
372extern int allow_data_exec, allow_stack_exec;
373
374int
375override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
376{
377 int current_abi;
378
3e170ce0
A
379 if (map->pmap == kernel_pmap) return FALSE;
380
2d21ac55
A
381 /*
382 * Determine if the app is running in 32 or 64 bit mode.
383 */
384
385 if (vm_map_is_64bit(map))
386 current_abi = VM_ABI_64;
387 else
388 current_abi = VM_ABI_32;
389
390 /*
391 * Determine if we should allow the execution based on whether it's a
392 * stack or data area and the current architecture.
393 */
394
395 if (user_tag == VM_MEMORY_STACK)
396 return allow_stack_exec & current_abi;
397
6d2010ae 398 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
399}
400
401
1c79356b
A
402/*
403 * Virtual memory maps provide for the mapping, protection,
404 * and sharing of virtual memory objects. In addition,
405 * this module provides for an efficient virtual copy of
406 * memory from one map to another.
407 *
408 * Synchronization is required prior to most operations.
409 *
410 * Maps consist of an ordered doubly-linked list of simple
411 * entries; a single hint is used to speed up lookups.
412 *
413 * Sharing maps have been deleted from this version of Mach.
414 * All shared objects are now mapped directly into the respective
415 * maps. This requires a change in the copy on write strategy;
416 * the asymmetric (delayed) strategy is used for shared temporary
417 * objects instead of the symmetric (shadow) strategy. All maps
418 * are now "top level" maps (either task map, kernel map or submap
419 * of the kernel map).
420 *
421 * Since portions of maps are specified by start/end addreses,
422 * which may not align with existing map entries, all
423 * routines merely "clip" entries to these start/end values.
424 * [That is, an entry is split into two, bordering at a
425 * start or end value.] Note that these clippings may not
426 * always be necessary (as the two resulting entries are then
427 * not changed); however, the clipping is done for convenience.
428 * No attempt is currently made to "glue back together" two
429 * abutting entries.
430 *
431 * The symmetric (shadow) copy strategy implements virtual copy
432 * by copying VM object references from one map to
433 * another, and then marking both regions as copy-on-write.
434 * It is important to note that only one writeable reference
435 * to a VM object region exists in any map when this strategy
436 * is used -- this means that shadow object creation can be
437 * delayed until a write operation occurs. The symmetric (delayed)
438 * strategy allows multiple maps to have writeable references to
439 * the same region of a vm object, and hence cannot delay creating
440 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
441 * Copying of permanent objects is completely different; see
442 * vm_object_copy_strategically() in vm_object.c.
443 */
444
91447636
A
445static zone_t vm_map_zone; /* zone for vm_map structures */
446static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
7ddcb079
A
447static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
448 * allocations */
91447636 449static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
3e170ce0 450zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
451
452
453/*
454 * Placeholder object for submap operations. This object is dropped
455 * into the range by a call to vm_map_find, and removed when
456 * vm_map_submap creates the submap.
457 */
458
459vm_object_t vm_submap_object;
460
91447636 461static void *map_data;
b0d623f7 462static vm_size_t map_data_size;
91447636 463static void *kentry_data;
b0d623f7 464static vm_size_t kentry_data_size;
3e170ce0
A
465static void *map_holes_data;
466static vm_size_t map_holes_data_size;
1c79356b 467
b0d623f7 468#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
1c79356b 469
55e303ae 470/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 471unsigned int not_in_kdp = 1;
55e303ae 472
6d2010ae
A
473unsigned int vm_map_set_cache_attr_count = 0;
474
475kern_return_t
476vm_map_set_cache_attr(
477 vm_map_t map,
478 vm_map_offset_t va)
479{
480 vm_map_entry_t map_entry;
481 vm_object_t object;
482 kern_return_t kr = KERN_SUCCESS;
483
484 vm_map_lock_read(map);
485
486 if (!vm_map_lookup_entry(map, va, &map_entry) ||
487 map_entry->is_sub_map) {
488 /*
489 * that memory is not properly mapped
490 */
491 kr = KERN_INVALID_ARGUMENT;
492 goto done;
493 }
3e170ce0 494 object = VME_OBJECT(map_entry);
6d2010ae
A
495
496 if (object == VM_OBJECT_NULL) {
497 /*
498 * there should be a VM object here at this point
499 */
500 kr = KERN_INVALID_ARGUMENT;
501 goto done;
502 }
503 vm_object_lock(object);
504 object->set_cache_attr = TRUE;
505 vm_object_unlock(object);
506
507 vm_map_set_cache_attr_count++;
508done:
509 vm_map_unlock_read(map);
510
511 return kr;
512}
513
514
593a1d5f
A
515#if CONFIG_CODE_DECRYPTION
516/*
517 * vm_map_apple_protected:
518 * This remaps the requested part of the object with an object backed by
519 * the decrypting pager.
520 * crypt_info contains entry points and session data for the crypt module.
521 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
522 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
523 */
0c530ab8
A
524kern_return_t
525vm_map_apple_protected(
3e170ce0
A
526 vm_map_t map,
527 vm_map_offset_t start,
528 vm_map_offset_t end,
529 vm_object_offset_t crypto_backing_offset,
593a1d5f 530 struct pager_crypt_info *crypt_info)
0c530ab8
A
531{
532 boolean_t map_locked;
533 kern_return_t kr;
534 vm_map_entry_t map_entry;
3e170ce0
A
535 struct vm_map_entry tmp_entry;
536 memory_object_t unprotected_mem_obj;
0c530ab8
A
537 vm_object_t protected_object;
538 vm_map_offset_t map_addr;
3e170ce0
A
539 vm_map_offset_t start_aligned, end_aligned;
540 vm_object_offset_t crypto_start, crypto_end;
541 int vm_flags;
0c530ab8 542
3e170ce0
A
543 map_locked = FALSE;
544 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 545
3e170ce0
A
546 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
547 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
548 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
549 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 550
3e170ce0
A
551 assert(start_aligned == start);
552 assert(end_aligned == end);
b0d623f7 553
3e170ce0
A
554 map_addr = start_aligned;
555 for (map_addr = start_aligned;
556 map_addr < end;
557 map_addr = tmp_entry.vme_end) {
558 vm_map_lock(map);
559 map_locked = TRUE;
b0d623f7 560
3e170ce0
A
561 /* lookup the protected VM object */
562 if (!vm_map_lookup_entry(map,
563 map_addr,
564 &map_entry) ||
565 map_entry->is_sub_map ||
566 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
567 !(map_entry->protection & VM_PROT_EXECUTE)) {
568 /* that memory is not properly mapped */
569 kr = KERN_INVALID_ARGUMENT;
570 goto done;
571 }
b0d623f7 572
3e170ce0
A
573 /* get the protected object to be decrypted */
574 protected_object = VME_OBJECT(map_entry);
575 if (protected_object == VM_OBJECT_NULL) {
576 /* there should be a VM object here at this point */
577 kr = KERN_INVALID_ARGUMENT;
578 goto done;
579 }
580 /* ensure protected object stays alive while map is unlocked */
581 vm_object_reference(protected_object);
582
583 /* limit the map entry to the area we want to cover */
584 vm_map_clip_start(map, map_entry, start_aligned);
585 vm_map_clip_end(map, map_entry, end_aligned);
586
587 tmp_entry = *map_entry;
588 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
589 vm_map_unlock(map);
590 map_locked = FALSE;
591
592 /*
593 * This map entry might be only partially encrypted
594 * (if not fully "page-aligned").
595 */
596 crypto_start = 0;
597 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
598 if (tmp_entry.vme_start < start) {
599 if (tmp_entry.vme_start != start_aligned) {
600 kr = KERN_INVALID_ADDRESS;
601 }
602 crypto_start += (start - tmp_entry.vme_start);
603 }
604 if (tmp_entry.vme_end > end) {
605 if (tmp_entry.vme_end != end_aligned) {
606 kr = KERN_INVALID_ADDRESS;
607 }
608 crypto_end -= (tmp_entry.vme_end - end);
609 }
610
611 /*
612 * This "extra backing offset" is needed to get the decryption
613 * routine to use the right key. It adjusts for the possibly
614 * relative offset of an interposed "4K" pager...
615 */
616 if (crypto_backing_offset == (vm_object_offset_t) -1) {
617 crypto_backing_offset = VME_OFFSET(&tmp_entry);
618 }
0c530ab8 619
3e170ce0
A
620 /*
621 * Lookup (and create if necessary) the protected memory object
622 * matching that VM object.
623 * If successful, this also grabs a reference on the memory object,
624 * to guarantee that it doesn't go away before we get a chance to map
625 * it.
626 */
627 unprotected_mem_obj = apple_protect_pager_setup(
628 protected_object,
629 VME_OFFSET(&tmp_entry),
630 crypto_backing_offset,
631 crypt_info,
632 crypto_start,
633 crypto_end);
634
635 /* release extra ref on protected object */
636 vm_object_deallocate(protected_object);
637
638 if (unprotected_mem_obj == NULL) {
639 kr = KERN_FAILURE;
640 goto done;
641 }
642
643 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
644
645 /* map this memory object in place of the current one */
646 map_addr = tmp_entry.vme_start;
647 kr = vm_map_enter_mem_object(map,
648 &map_addr,
649 (tmp_entry.vme_end -
650 tmp_entry.vme_start),
651 (mach_vm_offset_t) 0,
652 vm_flags,
653 (ipc_port_t) unprotected_mem_obj,
654 0,
655 TRUE,
656 tmp_entry.protection,
657 tmp_entry.max_protection,
658 tmp_entry.inheritance);
659 assert(kr == KERN_SUCCESS);
660 assert(map_addr == tmp_entry.vme_start);
661
662#if VM_MAP_DEBUG_APPLE_PROTECT
663 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: "
664 "backing:[object:%p,offset:0x%llx,"
665 "crypto_backing_offset:0x%llx,"
666 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
667 map,
668 (uint64_t) map_addr,
669 (uint64_t) (map_addr + (tmp_entry.vme_end -
670 tmp_entry.vme_start)),
671 unprotected_mem_obj,
672 protected_object,
673 VME_OFFSET(&tmp_entry),
674 crypto_backing_offset,
675 crypto_start,
676 crypto_end);
677#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
678
679 /*
680 * Release the reference obtained by
681 * apple_protect_pager_setup().
682 * The mapping (if it succeeded) is now holding a reference on
683 * the memory object.
684 */
685 memory_object_deallocate(unprotected_mem_obj);
686 unprotected_mem_obj = MEMORY_OBJECT_NULL;
687
688 /* continue with next map entry */
689 crypto_backing_offset += (tmp_entry.vme_end -
690 tmp_entry.vme_start);
691 crypto_backing_offset -= crypto_start;
692 }
693 kr = KERN_SUCCESS;
0c530ab8
A
694
695done:
696 if (map_locked) {
3e170ce0 697 vm_map_unlock(map);
0c530ab8
A
698 }
699 return kr;
700}
593a1d5f 701#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
702
703
b0d623f7
A
704lck_grp_t vm_map_lck_grp;
705lck_grp_attr_t vm_map_lck_grp_attr;
706lck_attr_t vm_map_lck_attr;
fe8ab488 707lck_attr_t vm_map_lck_rw_attr;
b0d623f7
A
708
709
593a1d5f
A
710/*
711 * vm_map_init:
712 *
713 * Initialize the vm_map module. Must be called before
714 * any other vm_map routines.
715 *
716 * Map and entry structures are allocated from zones -- we must
717 * initialize those zones.
718 *
719 * There are three zones of interest:
720 *
721 * vm_map_zone: used to allocate maps.
722 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 723 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
724 *
725 * The kernel allocates map entries from a special zone that is initially
726 * "crammed" with memory. It would be difficult (perhaps impossible) for
727 * the kernel to allocate more memory to a entry zone when it became
728 * empty since the very act of allocating memory implies the creation
729 * of a new entry.
730 */
1c79356b
A
731void
732vm_map_init(
733 void)
734{
7ddcb079 735 vm_size_t entry_zone_alloc_size;
316670eb
A
736 const char *mez_name = "VM map entries";
737
2d21ac55
A
738 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
739 PAGE_SIZE, "maps");
0b4c1975 740 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
7ddcb079
A
741#if defined(__LP64__)
742 entry_zone_alloc_size = PAGE_SIZE * 5;
743#else
744 entry_zone_alloc_size = PAGE_SIZE * 6;
745#endif
91447636 746 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
7ddcb079 747 1024*1024, entry_zone_alloc_size,
316670eb 748 mez_name);
0b4c1975 749 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 750 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 751 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 752
7ddcb079
A
753 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
754 kentry_data_size * 64, kentry_data_size,
755 "Reserved VM map entries");
756 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
1c79356b 757
91447636 758 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
7ddcb079 759 16*1024, PAGE_SIZE, "VM map copies");
0b4c1975 760 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 761
3e170ce0
A
762 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
763 16*1024, PAGE_SIZE, "VM map holes");
764 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
765
1c79356b
A
766 /*
767 * Cram the map and kentry zones with initial data.
7ddcb079 768 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
769 */
770 zone_change(vm_map_zone, Z_COLLECT, FALSE);
7ddcb079
A
771
772 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
773 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
774 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
775 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
776 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 777 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 778 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 779
3e170ce0
A
780 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
781 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
782 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
783 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
784 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
785 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
786
787 /*
788 * Add the stolen memory to zones, adjust zone size and stolen counts.
789 */
7ddcb079
A
790 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
791 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
3e170ce0
A
792 zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
793 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
794
b0d623f7
A
795 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
796 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
797 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 798
fe8ab488
A
799 lck_attr_setdefault(&vm_map_lck_rw_attr);
800 lck_attr_cleardebug(&vm_map_lck_rw_attr);
801
316670eb
A
802#if CONFIG_FREEZE
803 default_freezer_init();
804#endif /* CONFIG_FREEZE */
1c79356b
A
805}
806
807void
808vm_map_steal_memory(
809 void)
810{
7ddcb079
A
811 uint32_t kentry_initial_pages;
812
b0d623f7 813 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
814 map_data = pmap_steal_memory(map_data_size);
815
1c79356b 816 /*
7ddcb079
A
817 * kentry_initial_pages corresponds to the number of kernel map entries
818 * required during bootstrap until the asynchronous replenishment
819 * scheme is activated and/or entries are available from the general
820 * map entry pool.
1c79356b 821 */
7ddcb079
A
822#if defined(__LP64__)
823 kentry_initial_pages = 10;
824#else
825 kentry_initial_pages = 6;
1c79356b 826#endif
316670eb
A
827
828#if CONFIG_GZALLOC
829 /* If using the guard allocator, reserve more memory for the kernel
830 * reserved map entry pool.
831 */
832 if (gzalloc_enabled())
833 kentry_initial_pages *= 1024;
834#endif
835
7ddcb079 836 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 837 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
838
839 map_holes_data_size = kentry_data_size;
840 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
841}
842
3e170ce0
A
843void
844vm_kernel_reserved_entry_init(void) {
7ddcb079 845 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
3e170ce0
A
846 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
847}
848
849void
850vm_map_disable_hole_optimization(vm_map_t map)
851{
852 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
853
854 if (map->holelistenabled) {
855
856 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
857
858 while (hole_entry != NULL) {
859
860 next_hole_entry = hole_entry->vme_next;
861
862 hole_entry->vme_next = NULL;
863 hole_entry->vme_prev = NULL;
864 zfree(vm_map_holes_zone, hole_entry);
865
866 if (next_hole_entry == head_entry) {
867 hole_entry = NULL;
868 } else {
869 hole_entry = next_hole_entry;
870 }
871 }
872
873 map->holes_list = NULL;
874 map->holelistenabled = FALSE;
875
876 map->first_free = vm_map_first_entry(map);
877 SAVE_HINT_HOLE_WRITE(map, NULL);
878 }
879}
880
881boolean_t
882vm_kernel_map_is_kernel(vm_map_t map) {
883 return (map->pmap == kernel_pmap);
7ddcb079
A
884}
885
1c79356b
A
886/*
887 * vm_map_create:
888 *
889 * Creates and returns a new empty VM map with
890 * the given physical map structure, and having
891 * the given lower and upper address bounds.
892 */
3e170ce0
A
893
894boolean_t vm_map_supports_hole_optimization = TRUE;
895
1c79356b
A
896vm_map_t
897vm_map_create(
91447636
A
898 pmap_t pmap,
899 vm_map_offset_t min,
900 vm_map_offset_t max,
901 boolean_t pageable)
1c79356b 902{
2d21ac55 903 static int color_seed = 0;
1c79356b 904 register vm_map_t result;
3e170ce0 905 struct vm_map_links *hole_entry = NULL;
1c79356b
A
906
907 result = (vm_map_t) zalloc(vm_map_zone);
908 if (result == VM_MAP_NULL)
909 panic("vm_map_create");
910
911 vm_map_first_entry(result) = vm_map_to_entry(result);
912 vm_map_last_entry(result) = vm_map_to_entry(result);
913 result->hdr.nentries = 0;
914 result->hdr.entries_pageable = pageable;
915
6d2010ae
A
916 vm_map_store_init( &(result->hdr) );
917
39236c6e
A
918 result->hdr.page_shift = PAGE_SHIFT;
919
1c79356b 920 result->size = 0;
2d21ac55
A
921 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
922 result->user_wire_size = 0;
1c79356b
A
923 result->ref_count = 1;
924#if TASK_SWAPPER
925 result->res_count = 1;
926 result->sw_state = MAP_SW_IN;
927#endif /* TASK_SWAPPER */
928 result->pmap = pmap;
929 result->min_offset = min;
930 result->max_offset = max;
931 result->wiring_required = FALSE;
932 result->no_zero_fill = FALSE;
316670eb 933 result->mapped_in_other_pmaps = FALSE;
1c79356b 934 result->wait_for_space = FALSE;
b0d623f7 935 result->switch_protect = FALSE;
6d2010ae
A
936 result->disable_vmentry_reuse = FALSE;
937 result->map_disallow_data_exec = FALSE;
938 result->highest_entry_end = 0;
1c79356b
A
939 result->first_free = vm_map_to_entry(result);
940 result->hint = vm_map_to_entry(result);
2d21ac55 941 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae 942 result->jit_entry_exists = FALSE;
3e170ce0
A
943
944 if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
945 hole_entry = zalloc(vm_map_holes_zone);
946
947 hole_entry->start = min;
948 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
949 result->holes_list = result->hole_hint = hole_entry;
950 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
951 result->holelistenabled = TRUE;
952
953 } else {
954
955 result->holelistenabled = FALSE;
956 }
957
6d2010ae 958#if CONFIG_FREEZE
316670eb 959 result->default_freezer_handle = NULL;
6d2010ae 960#endif
1c79356b 961 vm_map_lock_init(result);
b0d623f7
A
962 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
963
1c79356b
A
964 return(result);
965}
966
967/*
968 * vm_map_entry_create: [ internal use only ]
969 *
970 * Allocates a VM map entry for insertion in the
971 * given map (or map copy). No fields are filled.
972 */
7ddcb079 973#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 974
7ddcb079
A
975#define vm_map_copy_entry_create(copy, map_locked) \
976 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
977unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 978
91447636 979static vm_map_entry_t
1c79356b 980_vm_map_entry_create(
7ddcb079 981 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 982{
7ddcb079
A
983 zone_t zone;
984 vm_map_entry_t entry;
1c79356b 985
7ddcb079
A
986 zone = vm_map_entry_zone;
987
988 assert(map_header->entries_pageable ? !map_locked : TRUE);
989
990 if (map_header->entries_pageable) {
991 entry = (vm_map_entry_t) zalloc(zone);
992 }
993 else {
994 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
995
996 if (entry == VM_MAP_ENTRY_NULL) {
997 zone = vm_map_entry_reserved_zone;
998 entry = (vm_map_entry_t) zalloc(zone);
999 OSAddAtomic(1, &reserved_zalloc_count);
1000 } else
1001 OSAddAtomic(1, &nonreserved_zalloc_count);
1002 }
1c79356b 1003
1c79356b
A
1004 if (entry == VM_MAP_ENTRY_NULL)
1005 panic("vm_map_entry_create");
7ddcb079
A
1006 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1007
6d2010ae 1008 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
316670eb 1009#if MAP_ENTRY_CREATION_DEBUG
39236c6e
A
1010 entry->vme_creation_maphdr = map_header;
1011 fastbacktrace(&entry->vme_creation_bt[0],
1012 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
316670eb 1013#endif
1c79356b
A
1014 return(entry);
1015}
1016
1017/*
1018 * vm_map_entry_dispose: [ internal use only ]
1019 *
1020 * Inverse of vm_map_entry_create.
2d21ac55
A
1021 *
1022 * write map lock held so no need to
1023 * do anything special to insure correctness
1024 * of the stores
1c79356b
A
1025 */
1026#define vm_map_entry_dispose(map, entry) \
6d2010ae 1027 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
1028
1029#define vm_map_copy_entry_dispose(map, entry) \
1030 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1031
91447636 1032static void
1c79356b
A
1033_vm_map_entry_dispose(
1034 register struct vm_map_header *map_header,
1035 register vm_map_entry_t entry)
1036{
1037 register zone_t zone;
1038
7ddcb079 1039 if (map_header->entries_pageable || !(entry->from_reserved_zone))
2d21ac55 1040 zone = vm_map_entry_zone;
1c79356b 1041 else
7ddcb079
A
1042 zone = vm_map_entry_reserved_zone;
1043
1044 if (!map_header->entries_pageable) {
1045 if (zone == vm_map_entry_zone)
1046 OSAddAtomic(-1, &nonreserved_zalloc_count);
1047 else
1048 OSAddAtomic(-1, &reserved_zalloc_count);
1049 }
1c79356b 1050
91447636 1051 zfree(zone, entry);
1c79356b
A
1052}
1053
91447636 1054#if MACH_ASSERT
91447636 1055static boolean_t first_free_check = FALSE;
6d2010ae 1056boolean_t
1c79356b
A
1057first_free_is_valid(
1058 vm_map_t map)
1059{
1c79356b
A
1060 if (!first_free_check)
1061 return TRUE;
2d21ac55 1062
6d2010ae 1063 return( first_free_is_valid_store( map ));
1c79356b 1064}
91447636 1065#endif /* MACH_ASSERT */
1c79356b 1066
1c79356b
A
1067
1068#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1069 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
1070
1071#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1072 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1073
1c79356b 1074#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1075/*
1076 * vm_map_res_reference:
1077 *
1078 * Adds another valid residence count to the given map.
1079 *
1080 * Map is locked so this function can be called from
1081 * vm_map_swapin.
1082 *
1083 */
1084void vm_map_res_reference(register vm_map_t map)
1085{
1086 /* assert map is locked */
1087 assert(map->res_count >= 0);
1088 assert(map->ref_count >= map->res_count);
1089 if (map->res_count == 0) {
b0d623f7 1090 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1091 vm_map_lock(map);
1092 vm_map_swapin(map);
b0d623f7 1093 lck_mtx_lock(&map->s_lock);
1c79356b
A
1094 ++map->res_count;
1095 vm_map_unlock(map);
1096 } else
1097 ++map->res_count;
1098}
1099
1100/*
1101 * vm_map_reference_swap:
1102 *
1103 * Adds valid reference and residence counts to the given map.
1104 *
1105 * The map may not be in memory (i.e. zero residence count).
1106 *
1107 */
1108void vm_map_reference_swap(register vm_map_t map)
1109{
1110 assert(map != VM_MAP_NULL);
b0d623f7 1111 lck_mtx_lock(&map->s_lock);
1c79356b
A
1112 assert(map->res_count >= 0);
1113 assert(map->ref_count >= map->res_count);
1114 map->ref_count++;
1115 vm_map_res_reference(map);
b0d623f7 1116 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1117}
1118
1119/*
1120 * vm_map_res_deallocate:
1121 *
1122 * Decrement residence count on a map; possibly causing swapout.
1123 *
1124 * The map must be in memory (i.e. non-zero residence count).
1125 *
1126 * The map is locked, so this function is callable from vm_map_deallocate.
1127 *
1128 */
1129void vm_map_res_deallocate(register vm_map_t map)
1130{
1131 assert(map->res_count > 0);
1132 if (--map->res_count == 0) {
b0d623f7 1133 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1134 vm_map_lock(map);
1135 vm_map_swapout(map);
1136 vm_map_unlock(map);
b0d623f7 1137 lck_mtx_lock(&map->s_lock);
1c79356b
A
1138 }
1139 assert(map->ref_count >= map->res_count);
1140}
1141#endif /* MACH_ASSERT && TASK_SWAPPER */
1142
1c79356b
A
1143/*
1144 * vm_map_destroy:
1145 *
1146 * Actually destroy a map.
1147 */
1148void
1149vm_map_destroy(
2d21ac55
A
1150 vm_map_t map,
1151 int flags)
91447636 1152{
1c79356b 1153 vm_map_lock(map);
2d21ac55 1154
3e170ce0
A
1155 /* final cleanup: no need to unnest shared region */
1156 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1157
2d21ac55
A
1158 /* clean up regular map entries */
1159 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1160 flags, VM_MAP_NULL);
1161 /* clean up leftover special mappings (commpage, etc...) */
2d21ac55
A
1162 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1163 flags, VM_MAP_NULL);
6d2010ae
A
1164
1165#if CONFIG_FREEZE
316670eb
A
1166 if (map->default_freezer_handle) {
1167 default_freezer_handle_deallocate(map->default_freezer_handle);
1168 map->default_freezer_handle = NULL;
6d2010ae
A
1169 }
1170#endif
3e170ce0 1171 vm_map_disable_hole_optimization(map);
1c79356b
A
1172 vm_map_unlock(map);
1173
2d21ac55
A
1174 assert(map->hdr.nentries == 0);
1175
55e303ae
A
1176 if(map->pmap)
1177 pmap_destroy(map->pmap);
1c79356b 1178
91447636 1179 zfree(vm_map_zone, map);
1c79356b
A
1180}
1181
1182#if TASK_SWAPPER
1183/*
1184 * vm_map_swapin/vm_map_swapout
1185 *
1186 * Swap a map in and out, either referencing or releasing its resources.
1187 * These functions are internal use only; however, they must be exported
1188 * because they may be called from macros, which are exported.
1189 *
1190 * In the case of swapout, there could be races on the residence count,
1191 * so if the residence count is up, we return, assuming that a
1192 * vm_map_deallocate() call in the near future will bring us back.
1193 *
1194 * Locking:
1195 * -- We use the map write lock for synchronization among races.
1196 * -- The map write lock, and not the simple s_lock, protects the
1197 * swap state of the map.
1198 * -- If a map entry is a share map, then we hold both locks, in
1199 * hierarchical order.
1200 *
1201 * Synchronization Notes:
1202 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1203 * will block on the map lock and proceed when swapout is through.
1204 * 2) A vm_map_reference() call at this time is illegal, and will
1205 * cause a panic. vm_map_reference() is only allowed on resident
1206 * maps, since it refuses to block.
1207 * 3) A vm_map_swapin() call during a swapin will block, and
1208 * proceeed when the first swapin is done, turning into a nop.
1209 * This is the reason the res_count is not incremented until
1210 * after the swapin is complete.
1211 * 4) There is a timing hole after the checks of the res_count, before
1212 * the map lock is taken, during which a swapin may get the lock
1213 * before a swapout about to happen. If this happens, the swapin
1214 * will detect the state and increment the reference count, causing
1215 * the swapout to be a nop, thereby delaying it until a later
1216 * vm_map_deallocate. If the swapout gets the lock first, then
1217 * the swapin will simply block until the swapout is done, and
1218 * then proceed.
1219 *
1220 * Because vm_map_swapin() is potentially an expensive operation, it
1221 * should be used with caution.
1222 *
1223 * Invariants:
1224 * 1) A map with a residence count of zero is either swapped, or
1225 * being swapped.
1226 * 2) A map with a non-zero residence count is either resident,
1227 * or being swapped in.
1228 */
1229
1230int vm_map_swap_enable = 1;
1231
1232void vm_map_swapin (vm_map_t map)
1233{
1234 register vm_map_entry_t entry;
2d21ac55 1235
1c79356b
A
1236 if (!vm_map_swap_enable) /* debug */
1237 return;
1238
1239 /*
1240 * Map is locked
1241 * First deal with various races.
1242 */
1243 if (map->sw_state == MAP_SW_IN)
1244 /*
1245 * we raced with swapout and won. Returning will incr.
1246 * the res_count, turning the swapout into a nop.
1247 */
1248 return;
1249
1250 /*
1251 * The residence count must be zero. If we raced with another
1252 * swapin, the state would have been IN; if we raced with a
1253 * swapout (after another competing swapin), we must have lost
1254 * the race to get here (see above comment), in which case
1255 * res_count is still 0.
1256 */
1257 assert(map->res_count == 0);
1258
1259 /*
1260 * There are no intermediate states of a map going out or
1261 * coming in, since the map is locked during the transition.
1262 */
1263 assert(map->sw_state == MAP_SW_OUT);
1264
1265 /*
1266 * We now operate upon each map entry. If the entry is a sub-
1267 * or share-map, we call vm_map_res_reference upon it.
1268 * If the entry is an object, we call vm_object_res_reference
1269 * (this may iterate through the shadow chain).
1270 * Note that we hold the map locked the entire time,
1271 * even if we get back here via a recursive call in
1272 * vm_map_res_reference.
1273 */
1274 entry = vm_map_first_entry(map);
1275
1276 while (entry != vm_map_to_entry(map)) {
3e170ce0 1277 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1278 if (entry->is_sub_map) {
3e170ce0 1279 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1280 lck_mtx_lock(&lmap->s_lock);
1c79356b 1281 vm_map_res_reference(lmap);
b0d623f7 1282 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1283 } else {
3e170ce0 1284 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1285 vm_object_lock(object);
1286 /*
1287 * This call may iterate through the
1288 * shadow chain.
1289 */
1290 vm_object_res_reference(object);
1291 vm_object_unlock(object);
1292 }
1293 }
1294 entry = entry->vme_next;
1295 }
1296 assert(map->sw_state == MAP_SW_OUT);
1297 map->sw_state = MAP_SW_IN;
1298}
1299
1300void vm_map_swapout(vm_map_t map)
1301{
1302 register vm_map_entry_t entry;
1303
1304 /*
1305 * Map is locked
1306 * First deal with various races.
1307 * If we raced with a swapin and lost, the residence count
1308 * will have been incremented to 1, and we simply return.
1309 */
b0d623f7 1310 lck_mtx_lock(&map->s_lock);
1c79356b 1311 if (map->res_count != 0) {
b0d623f7 1312 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1313 return;
1314 }
b0d623f7 1315 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1316
1317 /*
1318 * There are no intermediate states of a map going out or
1319 * coming in, since the map is locked during the transition.
1320 */
1321 assert(map->sw_state == MAP_SW_IN);
1322
1323 if (!vm_map_swap_enable)
1324 return;
1325
1326 /*
1327 * We now operate upon each map entry. If the entry is a sub-
1328 * or share-map, we call vm_map_res_deallocate upon it.
1329 * If the entry is an object, we call vm_object_res_deallocate
1330 * (this may iterate through the shadow chain).
1331 * Note that we hold the map locked the entire time,
1332 * even if we get back here via a recursive call in
1333 * vm_map_res_deallocate.
1334 */
1335 entry = vm_map_first_entry(map);
1336
1337 while (entry != vm_map_to_entry(map)) {
3e170ce0 1338 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1339 if (entry->is_sub_map) {
3e170ce0 1340 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1341 lck_mtx_lock(&lmap->s_lock);
1c79356b 1342 vm_map_res_deallocate(lmap);
b0d623f7 1343 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1344 } else {
3e170ce0 1345 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1346 vm_object_lock(object);
1347 /*
1348 * This call may take a long time,
1349 * since it could actively push
1350 * out pages (if we implement it
1351 * that way).
1352 */
1353 vm_object_res_deallocate(object);
1354 vm_object_unlock(object);
1355 }
1356 }
1357 entry = entry->vme_next;
1358 }
1359 assert(map->sw_state == MAP_SW_IN);
1360 map->sw_state = MAP_SW_OUT;
1361}
1362
1363#endif /* TASK_SWAPPER */
1364
1c79356b
A
1365/*
1366 * vm_map_lookup_entry: [ internal use only ]
1367 *
6d2010ae
A
1368 * Calls into the vm map store layer to find the map
1369 * entry containing (or immediately preceding) the
1370 * specified address in the given map; the entry is returned
1c79356b
A
1371 * in the "entry" parameter. The boolean
1372 * result indicates whether the address is
1373 * actually contained in the map.
1374 */
1375boolean_t
1376vm_map_lookup_entry(
91447636
A
1377 register vm_map_t map,
1378 register vm_map_offset_t address,
1c79356b
A
1379 vm_map_entry_t *entry) /* OUT */
1380{
6d2010ae 1381 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1382}
1383
1384/*
1385 * Routine: vm_map_find_space
1386 * Purpose:
1387 * Allocate a range in the specified virtual address map,
1388 * returning the entry allocated for that range.
1389 * Used by kmem_alloc, etc.
1390 *
1391 * The map must be NOT be locked. It will be returned locked
1392 * on KERN_SUCCESS, unlocked on failure.
1393 *
1394 * If an entry is allocated, the object/offset fields
1395 * are initialized to zero.
1396 */
1397kern_return_t
1398vm_map_find_space(
1399 register vm_map_t map,
91447636
A
1400 vm_map_offset_t *address, /* OUT */
1401 vm_map_size_t size,
1402 vm_map_offset_t mask,
0c530ab8 1403 int flags,
1c79356b
A
1404 vm_map_entry_t *o_entry) /* OUT */
1405{
3e170ce0 1406 vm_map_entry_t entry, new_entry;
91447636
A
1407 register vm_map_offset_t start;
1408 register vm_map_offset_t end;
3e170ce0 1409 vm_map_entry_t hole_entry;
91447636
A
1410
1411 if (size == 0) {
1412 *address = 0;
1413 return KERN_INVALID_ARGUMENT;
1414 }
1c79356b 1415
2d21ac55
A
1416 if (flags & VM_FLAGS_GUARD_AFTER) {
1417 /* account for the back guard page in the size */
39236c6e 1418 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1419 }
1420
7ddcb079 1421 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1422
1423 /*
1424 * Look for the first possible address; if there's already
1425 * something at this address, we have to start after it.
1426 */
1427
1428 vm_map_lock(map);
1429
6d2010ae
A
1430 if( map->disable_vmentry_reuse == TRUE) {
1431 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1432 } else {
3e170ce0
A
1433 if (map->holelistenabled) {
1434 hole_entry = (vm_map_entry_t)map->holes_list;
1435
1436 if (hole_entry == NULL) {
1437 /*
1438 * No more space in the map?
1439 */
1440 vm_map_entry_dispose(map, new_entry);
1441 vm_map_unlock(map);
1442 return(KERN_NO_SPACE);
1443 }
1444
1445 entry = hole_entry;
1446 start = entry->vme_start;
1447 } else {
1448 assert(first_free_is_valid(map));
1449 if ((entry = map->first_free) == vm_map_to_entry(map))
1450 start = map->min_offset;
1451 else
1452 start = entry->vme_end;
1453 }
6d2010ae 1454 }
1c79356b
A
1455
1456 /*
1457 * In any case, the "entry" always precedes
1458 * the proposed new region throughout the loop:
1459 */
1460
1461 while (TRUE) {
1462 register vm_map_entry_t next;
1463
1464 /*
1465 * Find the end of the proposed new region.
1466 * Be sure we didn't go beyond the end, or
1467 * wrap around the address.
1468 */
1469
2d21ac55
A
1470 if (flags & VM_FLAGS_GUARD_BEFORE) {
1471 /* reserve space for the front guard page */
39236c6e 1472 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1473 }
1c79356b 1474 end = ((start + mask) & ~mask);
2d21ac55 1475
1c79356b
A
1476 if (end < start) {
1477 vm_map_entry_dispose(map, new_entry);
1478 vm_map_unlock(map);
1479 return(KERN_NO_SPACE);
1480 }
1481 start = end;
1482 end += size;
1483
1484 if ((end > map->max_offset) || (end < start)) {
1485 vm_map_entry_dispose(map, new_entry);
1486 vm_map_unlock(map);
1487 return(KERN_NO_SPACE);
1488 }
1489
1c79356b 1490 next = entry->vme_next;
1c79356b 1491
3e170ce0
A
1492 if (map->holelistenabled) {
1493 if (entry->vme_end >= end)
1494 break;
1495 } else {
1496 /*
1497 * If there are no more entries, we must win.
1498 *
1499 * OR
1500 *
1501 * If there is another entry, it must be
1502 * after the end of the potential new region.
1503 */
1c79356b 1504
3e170ce0
A
1505 if (next == vm_map_to_entry(map))
1506 break;
1507
1508 if (next->vme_start >= end)
1509 break;
1510 }
1c79356b
A
1511
1512 /*
1513 * Didn't fit -- move to the next entry.
1514 */
1515
1516 entry = next;
3e170ce0
A
1517
1518 if (map->holelistenabled) {
1519 if (entry == (vm_map_entry_t) map->holes_list) {
1520 /*
1521 * Wrapped around
1522 */
1523 vm_map_entry_dispose(map, new_entry);
1524 vm_map_unlock(map);
1525 return(KERN_NO_SPACE);
1526 }
1527 start = entry->vme_start;
1528 } else {
1529 start = entry->vme_end;
1530 }
1531 }
1532
1533 if (map->holelistenabled) {
1534 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1535 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1536 }
1c79356b
A
1537 }
1538
1539 /*
1540 * At this point,
1541 * "start" and "end" should define the endpoints of the
1542 * available new range, and
1543 * "entry" should refer to the region before the new
1544 * range, and
1545 *
1546 * the map should be locked.
1547 */
1548
2d21ac55
A
1549 if (flags & VM_FLAGS_GUARD_BEFORE) {
1550 /* go back for the front guard page */
39236c6e 1551 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1552 }
1c79356b
A
1553 *address = start;
1554
e2d2fc5c 1555 assert(start < end);
1c79356b
A
1556 new_entry->vme_start = start;
1557 new_entry->vme_end = end;
1558 assert(page_aligned(new_entry->vme_start));
1559 assert(page_aligned(new_entry->vme_end));
39236c6e
A
1560 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1561 VM_MAP_PAGE_MASK(map)));
1562 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1563 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1564
1565 new_entry->is_shared = FALSE;
1566 new_entry->is_sub_map = FALSE;
fe8ab488 1567 new_entry->use_pmap = TRUE;
3e170ce0
A
1568 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1569 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1570
1571 new_entry->needs_copy = FALSE;
1572
1573 new_entry->inheritance = VM_INHERIT_DEFAULT;
1574 new_entry->protection = VM_PROT_DEFAULT;
1575 new_entry->max_protection = VM_PROT_ALL;
1576 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1577 new_entry->wired_count = 0;
1578 new_entry->user_wired_count = 0;
1579
1580 new_entry->in_transition = FALSE;
1581 new_entry->needs_wakeup = FALSE;
2d21ac55 1582 new_entry->no_cache = FALSE;
b0d623f7 1583 new_entry->permanent = FALSE;
39236c6e
A
1584 new_entry->superpage_size = FALSE;
1585 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1586 new_entry->map_aligned = TRUE;
1587 } else {
1588 new_entry->map_aligned = FALSE;
1589 }
2d21ac55 1590
3e170ce0 1591 new_entry->used_for_jit = FALSE;
b0d623f7 1592 new_entry->zero_wired_pages = FALSE;
fe8ab488 1593 new_entry->iokit_acct = FALSE;
3e170ce0
A
1594 new_entry->vme_resilient_codesign = FALSE;
1595 new_entry->vme_resilient_media = FALSE;
1c79356b 1596
3e170ce0
A
1597 int alias;
1598 VM_GET_FLAGS_ALIAS(flags, alias);
1599 VME_ALIAS_SET(new_entry, alias);
0c530ab8 1600
1c79356b
A
1601 /*
1602 * Insert the new entry into the list
1603 */
1604
6d2010ae 1605 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1606
1607 map->size += size;
1608
1609 /*
1610 * Update the lookup hint
1611 */
0c530ab8 1612 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1613
1614 *o_entry = new_entry;
1615 return(KERN_SUCCESS);
1616}
1617
1618int vm_map_pmap_enter_print = FALSE;
1619int vm_map_pmap_enter_enable = FALSE;
1620
1621/*
91447636 1622 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1623 *
1624 * Description:
1625 * Force pages from the specified object to be entered into
1626 * the pmap at the specified address if they are present.
1627 * As soon as a page not found in the object the scan ends.
1628 *
1629 * Returns:
1630 * Nothing.
1631 *
1632 * In/out conditions:
1633 * The source map should not be locked on entry.
1634 */
fe8ab488 1635__unused static void
1c79356b
A
1636vm_map_pmap_enter(
1637 vm_map_t map,
91447636
A
1638 register vm_map_offset_t addr,
1639 register vm_map_offset_t end_addr,
1c79356b
A
1640 register vm_object_t object,
1641 vm_object_offset_t offset,
1642 vm_prot_t protection)
1643{
2d21ac55
A
1644 int type_of_fault;
1645 kern_return_t kr;
0b4e3aa0 1646
55e303ae
A
1647 if(map->pmap == 0)
1648 return;
1649
1c79356b
A
1650 while (addr < end_addr) {
1651 register vm_page_t m;
1652
fe8ab488
A
1653
1654 /*
1655 * TODO:
1656 * From vm_map_enter(), we come into this function without the map
1657 * lock held or the object lock held.
1658 * We haven't taken a reference on the object either.
1659 * We should do a proper lookup on the map to make sure
1660 * that things are sane before we go locking objects that
1661 * could have been deallocated from under us.
1662 */
1663
1c79356b 1664 vm_object_lock(object);
1c79356b
A
1665
1666 m = vm_page_lookup(object, offset);
91447636
A
1667 /*
1668 * ENCRYPTED SWAP:
1669 * The user should never see encrypted data, so do not
1670 * enter an encrypted page in the page table.
1671 */
1672 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
2d21ac55
A
1673 m->fictitious ||
1674 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1675 vm_object_unlock(object);
1676 return;
1677 }
1678
1c79356b
A
1679 if (vm_map_pmap_enter_print) {
1680 printf("vm_map_pmap_enter:");
2d21ac55
A
1681 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1682 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1683 }
2d21ac55 1684 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae 1685 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
fe8ab488
A
1686 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1687 0, /* XXX need user tag / alias? */
1688 0, /* alternate accounting? */
1689 NULL,
2d21ac55 1690 &type_of_fault);
1c79356b 1691
1c79356b
A
1692 vm_object_unlock(object);
1693
1694 offset += PAGE_SIZE_64;
1695 addr += PAGE_SIZE;
1696 }
1697}
1698
91447636
A
1699boolean_t vm_map_pmap_is_empty(
1700 vm_map_t map,
1701 vm_map_offset_t start,
1702 vm_map_offset_t end);
1703boolean_t vm_map_pmap_is_empty(
1704 vm_map_t map,
1705 vm_map_offset_t start,
1706 vm_map_offset_t end)
1707{
2d21ac55
A
1708#ifdef MACHINE_PMAP_IS_EMPTY
1709 return pmap_is_empty(map->pmap, start, end);
1710#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1711 vm_map_offset_t offset;
1712 ppnum_t phys_page;
1713
1714 if (map->pmap == NULL) {
1715 return TRUE;
1716 }
2d21ac55 1717
91447636
A
1718 for (offset = start;
1719 offset < end;
1720 offset += PAGE_SIZE) {
1721 phys_page = pmap_find_phys(map->pmap, offset);
1722 if (phys_page) {
1723 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1724 "page %d at 0x%llx\n",
2d21ac55
A
1725 map, (long long)start, (long long)end,
1726 phys_page, (long long)offset);
91447636
A
1727 return FALSE;
1728 }
1729 }
1730 return TRUE;
2d21ac55 1731#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1732}
1733
316670eb
A
1734#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1735kern_return_t
1736vm_map_random_address_for_size(
1737 vm_map_t map,
1738 vm_map_offset_t *address,
1739 vm_map_size_t size)
1740{
1741 kern_return_t kr = KERN_SUCCESS;
1742 int tries = 0;
1743 vm_map_offset_t random_addr = 0;
1744 vm_map_offset_t hole_end;
1745
1746 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1747 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1748 vm_map_size_t vm_hole_size = 0;
1749 vm_map_size_t addr_space_size;
1750
1751 addr_space_size = vm_map_max(map) - vm_map_min(map);
1752
1753 assert(page_aligned(size));
1754
1755 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1756 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e
A
1757 random_addr = vm_map_trunc_page(
1758 vm_map_min(map) +(random_addr % addr_space_size),
1759 VM_MAP_PAGE_MASK(map));
316670eb
A
1760
1761 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1762 if (prev_entry == vm_map_to_entry(map)) {
1763 next_entry = vm_map_first_entry(map);
1764 } else {
1765 next_entry = prev_entry->vme_next;
1766 }
1767 if (next_entry == vm_map_to_entry(map)) {
1768 hole_end = vm_map_max(map);
1769 } else {
1770 hole_end = next_entry->vme_start;
1771 }
1772 vm_hole_size = hole_end - random_addr;
1773 if (vm_hole_size >= size) {
1774 *address = random_addr;
1775 break;
1776 }
1777 }
1778 tries++;
1779 }
1780
1781 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1782 kr = KERN_NO_SPACE;
1783 }
1784 return kr;
1785}
1786
1c79356b
A
1787/*
1788 * Routine: vm_map_enter
1789 *
1790 * Description:
1791 * Allocate a range in the specified virtual address map.
1792 * The resulting range will refer to memory defined by
1793 * the given memory object and offset into that object.
1794 *
1795 * Arguments are as defined in the vm_map call.
1796 */
91447636
A
1797int _map_enter_debug = 0;
1798static unsigned int vm_map_enter_restore_successes = 0;
1799static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1800kern_return_t
1801vm_map_enter(
91447636 1802 vm_map_t map,
593a1d5f 1803 vm_map_offset_t *address, /* IN/OUT */
91447636 1804 vm_map_size_t size,
593a1d5f 1805 vm_map_offset_t mask,
1c79356b
A
1806 int flags,
1807 vm_object_t object,
1808 vm_object_offset_t offset,
1809 boolean_t needs_copy,
1810 vm_prot_t cur_protection,
1811 vm_prot_t max_protection,
1812 vm_inherit_t inheritance)
1813{
91447636 1814 vm_map_entry_t entry, new_entry;
2d21ac55 1815 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1816 vm_map_offset_t end, tmp_end;
b0d623f7
A
1817 vm_map_offset_t tmp2_start, tmp2_end;
1818 vm_map_offset_t step;
1c79356b 1819 kern_return_t result = KERN_SUCCESS;
91447636
A
1820 vm_map_t zap_old_map = VM_MAP_NULL;
1821 vm_map_t zap_new_map = VM_MAP_NULL;
1822 boolean_t map_locked = FALSE;
1823 boolean_t pmap_empty = TRUE;
1824 boolean_t new_mapping_established = FALSE;
fe8ab488 1825 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
91447636
A
1826 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1827 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1828 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55
A
1829 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1830 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
b0d623f7 1831 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
316670eb 1832 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
fe8ab488 1833 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
3e170ce0
A
1834 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1835 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
b0d623f7 1836 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3e170ce0 1837 vm_tag_t alias, user_alias;
2d21ac55 1838 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f 1839 kern_return_t kr;
39236c6e 1840 boolean_t clear_map_aligned = FALSE;
3e170ce0 1841 vm_map_entry_t hole_entry;
593a1d5f 1842
b0d623f7
A
1843 if (superpage_size) {
1844 switch (superpage_size) {
1845 /*
1846 * Note that the current implementation only supports
1847 * a single size for superpages, SUPERPAGE_SIZE, per
1848 * architecture. As soon as more sizes are supposed
1849 * to be supported, SUPERPAGE_SIZE has to be replaced
1850 * with a lookup of the size depending on superpage_size.
1851 */
1852#ifdef __x86_64__
6d2010ae
A
1853 case SUPERPAGE_SIZE_ANY:
1854 /* handle it like 2 MB and round up to page size */
1855 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
1856 case SUPERPAGE_SIZE_2MB:
1857 break;
1858#endif
1859 default:
1860 return KERN_INVALID_ARGUMENT;
1861 }
1862 mask = SUPERPAGE_SIZE-1;
1863 if (size & (SUPERPAGE_SIZE-1))
1864 return KERN_INVALID_ARGUMENT;
1865 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1866 }
1867
6d2010ae 1868
1c79356b 1869
3e170ce0
A
1870 if (resilient_codesign || resilient_media) {
1871 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1872 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1873 return KERN_PROTECTION_FAILURE;
1874 }
1875 }
1876
2d21ac55
A
1877 if (is_submap) {
1878 if (purgable) {
1879 /* submaps can not be purgeable */
1880 return KERN_INVALID_ARGUMENT;
1881 }
1882 if (object == VM_OBJECT_NULL) {
1883 /* submaps can not be created lazily */
1884 return KERN_INVALID_ARGUMENT;
1885 }
1886 }
1887 if (flags & VM_FLAGS_ALREADY) {
1888 /*
1889 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1890 * is already present. For it to be meaningul, the requested
1891 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1892 * we shouldn't try and remove what was mapped there first
1893 * (!VM_FLAGS_OVERWRITE).
1894 */
1895 if ((flags & VM_FLAGS_ANYWHERE) ||
1896 (flags & VM_FLAGS_OVERWRITE)) {
1897 return KERN_INVALID_ARGUMENT;
1898 }
1899 }
1900
6d2010ae 1901 effective_min_offset = map->min_offset;
b0d623f7 1902
2d21ac55
A
1903 if (flags & VM_FLAGS_BEYOND_MAX) {
1904 /*
b0d623f7 1905 * Allow an insertion beyond the map's max offset.
2d21ac55
A
1906 */
1907 if (vm_map_is_64bit(map))
1908 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1909 else
1910 effective_max_offset = 0x00000000FFFFF000ULL;
1911 } else {
1912 effective_max_offset = map->max_offset;
1913 }
1914
1915 if (size == 0 ||
1916 (offset & PAGE_MASK_64) != 0) {
91447636
A
1917 *address = 0;
1918 return KERN_INVALID_ARGUMENT;
1919 }
1920
1c79356b 1921 VM_GET_FLAGS_ALIAS(flags, alias);
3e170ce0
A
1922 if (map->pmap == kernel_pmap) {
1923 user_alias = VM_KERN_MEMORY_NONE;
1924 } else {
1925 user_alias = alias;
1926 }
2d21ac55 1927
1c79356b
A
1928#define RETURN(value) { result = value; goto BailOut; }
1929
1930 assert(page_aligned(*address));
1931 assert(page_aligned(size));
91447636 1932
39236c6e
A
1933 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1934 /*
1935 * In most cases, the caller rounds the size up to the
1936 * map's page size.
1937 * If we get a size that is explicitly not map-aligned here,
1938 * we'll have to respect the caller's wish and mark the
1939 * mapping as "not map-aligned" to avoid tripping the
1940 * map alignment checks later.
1941 */
1942 clear_map_aligned = TRUE;
1943 }
fe8ab488
A
1944 if (!anywhere &&
1945 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1946 /*
1947 * We've been asked to map at a fixed address and that
1948 * address is not aligned to the map's specific alignment.
1949 * The caller should know what it's doing (i.e. most likely
1950 * mapping some fragmented copy map, transferring memory from
1951 * a VM map with a different alignment), so clear map_aligned
1952 * for this new VM map entry and proceed.
1953 */
1954 clear_map_aligned = TRUE;
1955 }
39236c6e 1956
91447636
A
1957 /*
1958 * Only zero-fill objects are allowed to be purgable.
1959 * LP64todo - limit purgable objects to 32-bits for now
1960 */
1961 if (purgable &&
1962 (offset != 0 ||
1963 (object != VM_OBJECT_NULL &&
6d2010ae 1964 (object->vo_size != size ||
2d21ac55 1965 object->purgable == VM_PURGABLE_DENY))
b0d623f7 1966 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
1967 return KERN_INVALID_ARGUMENT;
1968
1969 if (!anywhere && overwrite) {
1970 /*
1971 * Create a temporary VM map to hold the old mappings in the
1972 * affected area while we create the new one.
1973 * This avoids releasing the VM map lock in
1974 * vm_map_entry_delete() and allows atomicity
1975 * when we want to replace some mappings with a new one.
1976 * It also allows us to restore the old VM mappings if the
1977 * new mapping fails.
1978 */
1979 zap_old_map = vm_map_create(PMAP_NULL,
1980 *address,
1981 *address + size,
b0d623f7 1982 map->hdr.entries_pageable);
39236c6e 1983 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 1984 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
1985 }
1986
2d21ac55 1987StartAgain: ;
1c79356b
A
1988
1989 start = *address;
1990
1991 if (anywhere) {
1992 vm_map_lock(map);
91447636 1993 map_locked = TRUE;
6d2010ae 1994
316670eb
A
1995 if (entry_for_jit) {
1996 if (map->jit_entry_exists) {
1997 result = KERN_INVALID_ARGUMENT;
1998 goto BailOut;
1999 }
2000 /*
2001 * Get a random start address.
2002 */
2003 result = vm_map_random_address_for_size(map, address, size);
2004 if (result != KERN_SUCCESS) {
2005 goto BailOut;
2006 }
2007 start = *address;
6d2010ae 2008 }
1c79356b 2009
316670eb 2010
1c79356b
A
2011 /*
2012 * Calculate the first possible address.
2013 */
2014
2d21ac55
A
2015 if (start < effective_min_offset)
2016 start = effective_min_offset;
2017 if (start > effective_max_offset)
1c79356b
A
2018 RETURN(KERN_NO_SPACE);
2019
2020 /*
2021 * Look for the first possible address;
2022 * if there's already something at this
2023 * address, we have to start after it.
2024 */
2025
6d2010ae
A
2026 if( map->disable_vmentry_reuse == TRUE) {
2027 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2028 } else {
6d2010ae 2029
3e170ce0
A
2030 if (map->holelistenabled) {
2031 hole_entry = (vm_map_entry_t)map->holes_list;
2032
2033 if (hole_entry == NULL) {
2034 /*
2035 * No more space in the map?
2036 */
2037 result = KERN_NO_SPACE;
2038 goto BailOut;
2039 } else {
2040
2041 boolean_t found_hole = FALSE;
2042
2043 do {
2044 if (hole_entry->vme_start >= start) {
2045 start = hole_entry->vme_start;
2046 found_hole = TRUE;
2047 break;
2048 }
2049
2050 if (hole_entry->vme_end > start) {
2051 found_hole = TRUE;
2052 break;
2053 }
2054 hole_entry = hole_entry->vme_next;
2055
2056 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2057
2058 if (found_hole == FALSE) {
2059 result = KERN_NO_SPACE;
2060 goto BailOut;
2061 }
2062
2063 entry = hole_entry;
6d2010ae 2064
3e170ce0
A
2065 if (start == 0)
2066 start += PAGE_SIZE_64;
2067 }
6d2010ae 2068 } else {
3e170ce0
A
2069 assert(first_free_is_valid(map));
2070
2071 entry = map->first_free;
2072
2073 if (entry == vm_map_to_entry(map)) {
6d2010ae 2074 entry = NULL;
3e170ce0
A
2075 } else {
2076 if (entry->vme_next == vm_map_to_entry(map)){
2077 /*
2078 * Hole at the end of the map.
2079 */
2080 entry = NULL;
2081 } else {
2082 if (start < (entry->vme_next)->vme_start ) {
2083 start = entry->vme_end;
2084 start = vm_map_round_page(start,
2085 VM_MAP_PAGE_MASK(map));
2086 } else {
2087 /*
2088 * Need to do a lookup.
2089 */
2090 entry = NULL;
2091 }
2092 }
2093 }
2094
2095 if (entry == NULL) {
2096 vm_map_entry_t tmp_entry;
2097 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2098 assert(!entry_for_jit);
2099 start = tmp_entry->vme_end;
39236c6e
A
2100 start = vm_map_round_page(start,
2101 VM_MAP_PAGE_MASK(map));
6d2010ae 2102 }
3e170ce0 2103 entry = tmp_entry;
316670eb 2104 }
6d2010ae 2105 }
1c79356b
A
2106 }
2107
2108 /*
2109 * In any case, the "entry" always precedes
2110 * the proposed new region throughout the
2111 * loop:
2112 */
2113
2114 while (TRUE) {
2115 register vm_map_entry_t next;
2116
2d21ac55 2117 /*
1c79356b
A
2118 * Find the end of the proposed new region.
2119 * Be sure we didn't go beyond the end, or
2120 * wrap around the address.
2121 */
2122
2123 end = ((start + mask) & ~mask);
39236c6e
A
2124 end = vm_map_round_page(end,
2125 VM_MAP_PAGE_MASK(map));
1c79356b
A
2126 if (end < start)
2127 RETURN(KERN_NO_SPACE);
2128 start = end;
39236c6e
A
2129 assert(VM_MAP_PAGE_ALIGNED(start,
2130 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2131 end += size;
2132
2d21ac55 2133 if ((end > effective_max_offset) || (end < start)) {
1c79356b 2134 if (map->wait_for_space) {
fe8ab488 2135 assert(!keep_map_locked);
2d21ac55
A
2136 if (size <= (effective_max_offset -
2137 effective_min_offset)) {
1c79356b
A
2138 assert_wait((event_t)map,
2139 THREAD_ABORTSAFE);
2140 vm_map_unlock(map);
91447636
A
2141 map_locked = FALSE;
2142 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2143 goto StartAgain;
2144 }
2145 }
2146 RETURN(KERN_NO_SPACE);
2147 }
2148
1c79356b 2149 next = entry->vme_next;
1c79356b 2150
3e170ce0
A
2151 if (map->holelistenabled) {
2152 if (entry->vme_end >= end)
2153 break;
2154 } else {
2155 /*
2156 * If there are no more entries, we must win.
2157 *
2158 * OR
2159 *
2160 * If there is another entry, it must be
2161 * after the end of the potential new region.
2162 */
1c79356b 2163
3e170ce0
A
2164 if (next == vm_map_to_entry(map))
2165 break;
2166
2167 if (next->vme_start >= end)
2168 break;
2169 }
1c79356b
A
2170
2171 /*
2172 * Didn't fit -- move to the next entry.
2173 */
2174
2175 entry = next;
3e170ce0
A
2176
2177 if (map->holelistenabled) {
2178 if (entry == (vm_map_entry_t) map->holes_list) {
2179 /*
2180 * Wrapped around
2181 */
2182 result = KERN_NO_SPACE;
2183 goto BailOut;
2184 }
2185 start = entry->vme_start;
2186 } else {
2187 start = entry->vme_end;
2188 }
2189
39236c6e
A
2190 start = vm_map_round_page(start,
2191 VM_MAP_PAGE_MASK(map));
1c79356b 2192 }
3e170ce0
A
2193
2194 if (map->holelistenabled) {
2195 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2196 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2197 }
2198 }
2199
1c79356b 2200 *address = start;
39236c6e
A
2201 assert(VM_MAP_PAGE_ALIGNED(*address,
2202 VM_MAP_PAGE_MASK(map)));
1c79356b 2203 } else {
1c79356b
A
2204 /*
2205 * Verify that:
2206 * the address doesn't itself violate
2207 * the mask requirement.
2208 */
2209
2210 vm_map_lock(map);
91447636 2211 map_locked = TRUE;
1c79356b
A
2212 if ((start & mask) != 0)
2213 RETURN(KERN_NO_SPACE);
2214
2215 /*
2216 * ... the address is within bounds
2217 */
2218
2219 end = start + size;
2220
2d21ac55
A
2221 if ((start < effective_min_offset) ||
2222 (end > effective_max_offset) ||
1c79356b
A
2223 (start >= end)) {
2224 RETURN(KERN_INVALID_ADDRESS);
2225 }
2226
91447636
A
2227 if (overwrite && zap_old_map != VM_MAP_NULL) {
2228 /*
2229 * Fixed mapping and "overwrite" flag: attempt to
2230 * remove all existing mappings in the specified
2231 * address range, saving them in our "zap_old_map".
2232 */
2233 (void) vm_map_delete(map, start, end,
fe8ab488
A
2234 (VM_MAP_REMOVE_SAVE_ENTRIES |
2235 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2236 zap_old_map);
2237 }
2238
1c79356b
A
2239 /*
2240 * ... the starting address isn't allocated
2241 */
2242
2d21ac55
A
2243 if (vm_map_lookup_entry(map, start, &entry)) {
2244 if (! (flags & VM_FLAGS_ALREADY)) {
2245 RETURN(KERN_NO_SPACE);
2246 }
2247 /*
2248 * Check if what's already there is what we want.
2249 */
2250 tmp_start = start;
2251 tmp_offset = offset;
2252 if (entry->vme_start < start) {
2253 tmp_start -= start - entry->vme_start;
2254 tmp_offset -= start - entry->vme_start;
2255
2256 }
2257 for (; entry->vme_start < end;
2258 entry = entry->vme_next) {
4a3eedf9
A
2259 /*
2260 * Check if the mapping's attributes
2261 * match the existing map entry.
2262 */
2d21ac55
A
2263 if (entry == vm_map_to_entry(map) ||
2264 entry->vme_start != tmp_start ||
2265 entry->is_sub_map != is_submap ||
3e170ce0 2266 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2267 entry->needs_copy != needs_copy ||
2268 entry->protection != cur_protection ||
2269 entry->max_protection != max_protection ||
2270 entry->inheritance != inheritance ||
fe8ab488 2271 entry->iokit_acct != iokit_acct ||
3e170ce0 2272 VME_ALIAS(entry) != alias) {
2d21ac55
A
2273 /* not the same mapping ! */
2274 RETURN(KERN_NO_SPACE);
2275 }
4a3eedf9
A
2276 /*
2277 * Check if the same object is being mapped.
2278 */
2279 if (is_submap) {
3e170ce0 2280 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2281 (vm_map_t) object) {
2282 /* not the same submap */
2283 RETURN(KERN_NO_SPACE);
2284 }
2285 } else {
3e170ce0 2286 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2287 /* not the same VM object... */
2288 vm_object_t obj2;
2289
3e170ce0 2290 obj2 = VME_OBJECT(entry);
4a3eedf9
A
2291 if ((obj2 == VM_OBJECT_NULL ||
2292 obj2->internal) &&
2293 (object == VM_OBJECT_NULL ||
2294 object->internal)) {
2295 /*
2296 * ... but both are
2297 * anonymous memory,
2298 * so equivalent.
2299 */
2300 } else {
2301 RETURN(KERN_NO_SPACE);
2302 }
2303 }
2304 }
2305
2d21ac55
A
2306 tmp_offset += entry->vme_end - entry->vme_start;
2307 tmp_start += entry->vme_end - entry->vme_start;
2308 if (entry->vme_end >= end) {
2309 /* reached the end of our mapping */
2310 break;
2311 }
2312 }
2313 /* it all matches: let's use what's already there ! */
2314 RETURN(KERN_MEMORY_PRESENT);
2315 }
1c79356b
A
2316
2317 /*
2318 * ... the next region doesn't overlap the
2319 * end point.
2320 */
2321
2322 if ((entry->vme_next != vm_map_to_entry(map)) &&
2323 (entry->vme_next->vme_start < end))
2324 RETURN(KERN_NO_SPACE);
2325 }
2326
2327 /*
2328 * At this point,
2329 * "start" and "end" should define the endpoints of the
2330 * available new range, and
2331 * "entry" should refer to the region before the new
2332 * range, and
2333 *
2334 * the map should be locked.
2335 */
2336
2337 /*
2338 * See whether we can avoid creating a new entry (and object) by
2339 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2340 * extend from below.] Note that we can never extend/join
2341 * purgable objects because they need to remain distinct
2342 * entities in order to implement their "volatile object"
2343 * semantics.
1c79356b
A
2344 */
2345
316670eb 2346 if (purgable || entry_for_jit) {
91447636 2347 if (object == VM_OBJECT_NULL) {
3e170ce0 2348
91447636
A
2349 object = vm_object_allocate(size);
2350 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
fe8ab488 2351 object->true_share = TRUE;
316670eb 2352 if (purgable) {
fe8ab488 2353 task_t owner;
316670eb 2354 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2355 if (map->pmap == kernel_pmap) {
2356 /*
2357 * Purgeable mappings made in a kernel
2358 * map are "owned" by the kernel itself
2359 * rather than the current user task
2360 * because they're likely to be used by
2361 * more than this user task (see
2362 * execargs_purgeable_allocate(), for
2363 * example).
2364 */
2365 owner = kernel_task;
2366 } else {
2367 owner = current_task();
2368 }
2369 assert(object->vo_purgeable_owner == NULL);
2370 assert(object->resident_page_count == 0);
2371 assert(object->wired_page_count == 0);
2372 vm_object_lock(object);
2373 vm_purgeable_nonvolatile_enqueue(object, owner);
2374 vm_object_unlock(object);
316670eb 2375 }
91447636
A
2376 offset = (vm_object_offset_t)0;
2377 }
2d21ac55
A
2378 } else if ((is_submap == FALSE) &&
2379 (object == VM_OBJECT_NULL) &&
2380 (entry != vm_map_to_entry(map)) &&
2381 (entry->vme_end == start) &&
2382 (!entry->is_shared) &&
2383 (!entry->is_sub_map) &&
fe8ab488
A
2384 (!entry->in_transition) &&
2385 (!entry->needs_wakeup) &&
2386 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2d21ac55
A
2387 (entry->protection == cur_protection) &&
2388 (entry->max_protection == max_protection) &&
fe8ab488 2389 (entry->inheritance == inheritance) &&
3e170ce0
A
2390 ((user_alias == VM_MEMORY_REALLOC) ||
2391 (VME_ALIAS(entry) == alias)) &&
2d21ac55 2392 (entry->no_cache == no_cache) &&
fe8ab488
A
2393 (entry->permanent == permanent) &&
2394 (!entry->superpage_size && !superpage_size) &&
39236c6e
A
2395 /*
2396 * No coalescing if not map-aligned, to avoid propagating
2397 * that condition any further than needed:
2398 */
2399 (!entry->map_aligned || !clear_map_aligned) &&
fe8ab488
A
2400 (!entry->zero_wired_pages) &&
2401 (!entry->used_for_jit && !entry_for_jit) &&
2402 (entry->iokit_acct == iokit_acct) &&
3e170ce0
A
2403 (!entry->vme_resilient_codesign) &&
2404 (!entry->vme_resilient_media) &&
fe8ab488 2405
b0d623f7 2406 ((entry->vme_end - entry->vme_start) + size <=
3e170ce0 2407 (user_alias == VM_MEMORY_REALLOC ?
b0d623f7
A
2408 ANON_CHUNK_SIZE :
2409 NO_COALESCE_LIMIT)) &&
fe8ab488 2410
2d21ac55 2411 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2412 if (vm_object_coalesce(VME_OBJECT(entry),
2d21ac55 2413 VM_OBJECT_NULL,
3e170ce0 2414 VME_OFFSET(entry),
2d21ac55
A
2415 (vm_object_offset_t) 0,
2416 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2417 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2418
2419 /*
2420 * Coalesced the two objects - can extend
2421 * the previous map entry to include the
2422 * new range.
2423 */
2424 map->size += (end - entry->vme_end);
e2d2fc5c 2425 assert(entry->vme_start < end);
39236c6e
A
2426 assert(VM_MAP_PAGE_ALIGNED(end,
2427 VM_MAP_PAGE_MASK(map)));
3e170ce0
A
2428 if (__improbable(vm_debug_events))
2429 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
1c79356b 2430 entry->vme_end = end;
3e170ce0
A
2431 if (map->holelistenabled) {
2432 vm_map_store_update_first_free(map, entry, TRUE);
2433 } else {
2434 vm_map_store_update_first_free(map, map->first_free, TRUE);
2435 }
fe8ab488 2436 new_mapping_established = TRUE;
1c79356b
A
2437 RETURN(KERN_SUCCESS);
2438 }
2439 }
2440
b0d623f7
A
2441 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2442 new_entry = NULL;
2443
2444 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2445 tmp2_end = tmp2_start + step;
2446 /*
2447 * Create a new entry
2448 * LP64todo - for now, we can only allocate 4GB internal objects
2449 * because the default pager can't page bigger ones. Remove this
2450 * when it can.
2451 *
2452 * XXX FBDP
2453 * The reserved "page zero" in each process's address space can
2454 * be arbitrarily large. Splitting it into separate 4GB objects and
2455 * therefore different VM map entries serves no purpose and just
2456 * slows down operations on the VM map, so let's not split the
2457 * allocation into 4GB chunks if the max protection is NONE. That
2458 * memory should never be accessible, so it will never get to the
2459 * default pager.
2460 */
2461 tmp_start = tmp2_start;
2462 if (object == VM_OBJECT_NULL &&
2463 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2464 max_protection != VM_PROT_NONE &&
2465 superpage_size == 0)
2466 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2467 else
2468 tmp_end = tmp2_end;
2469 do {
2470 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2471 object, offset, needs_copy,
2472 FALSE, FALSE,
2473 cur_protection, max_protection,
2474 VM_BEHAVIOR_DEFAULT,
316670eb 2475 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
6d2010ae 2476 0, no_cache,
39236c6e
A
2477 permanent,
2478 superpage_size,
fe8ab488
A
2479 clear_map_aligned,
2480 is_submap);
3e170ce0
A
2481
2482 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2483 VME_ALIAS_SET(new_entry, alias);
2484
316670eb 2485 if (entry_for_jit){
6d2010ae
A
2486 if (!(map->jit_entry_exists)){
2487 new_entry->used_for_jit = TRUE;
2488 map->jit_entry_exists = TRUE;
2489 }
2490 }
2491
3e170ce0
A
2492 if (resilient_codesign &&
2493 ! ((cur_protection | max_protection) &
2494 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2495 new_entry->vme_resilient_codesign = TRUE;
2496 }
2497
2498 if (resilient_media &&
2499 ! ((cur_protection | max_protection) &
2500 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2501 new_entry->vme_resilient_media = TRUE;
2502 }
2503
fe8ab488
A
2504 assert(!new_entry->iokit_acct);
2505 if (!is_submap &&
2506 object != VM_OBJECT_NULL &&
2507 object->purgable != VM_PURGABLE_DENY) {
2508 assert(new_entry->use_pmap);
2509 assert(!new_entry->iokit_acct);
2510 /*
2511 * Turn off pmap accounting since
2512 * purgeable objects have their
2513 * own ledgers.
2514 */
2515 new_entry->use_pmap = FALSE;
2516 } else if (!is_submap &&
2517 iokit_acct) {
2518 /* alternate accounting */
2519 assert(!new_entry->iokit_acct);
2520 assert(new_entry->use_pmap);
2521 new_entry->iokit_acct = TRUE;
2522 new_entry->use_pmap = FALSE;
2523 vm_map_iokit_mapped_region(
2524 map,
2525 (new_entry->vme_end -
2526 new_entry->vme_start));
2527 } else if (!is_submap) {
2528 assert(!new_entry->iokit_acct);
2529 assert(new_entry->use_pmap);
2530 }
2531
b0d623f7
A
2532 if (is_submap) {
2533 vm_map_t submap;
2534 boolean_t submap_is_64bit;
2535 boolean_t use_pmap;
2536
fe8ab488
A
2537 assert(new_entry->is_sub_map);
2538 assert(!new_entry->use_pmap);
2539 assert(!new_entry->iokit_acct);
b0d623f7
A
2540 submap = (vm_map_t) object;
2541 submap_is_64bit = vm_map_is_64bit(submap);
3e170ce0 2542 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
fe8ab488 2543#ifndef NO_NESTED_PMAP
b0d623f7 2544 if (use_pmap && submap->pmap == NULL) {
316670eb 2545 ledger_t ledger = map->pmap->ledger;
b0d623f7 2546 /* we need a sub pmap to nest... */
316670eb
A
2547 submap->pmap = pmap_create(ledger, 0,
2548 submap_is_64bit);
b0d623f7
A
2549 if (submap->pmap == NULL) {
2550 /* let's proceed without nesting... */
2551 }
2d21ac55 2552 }
b0d623f7
A
2553 if (use_pmap && submap->pmap != NULL) {
2554 kr = pmap_nest(map->pmap,
2555 submap->pmap,
2556 tmp_start,
2557 tmp_start,
2558 tmp_end - tmp_start);
2559 if (kr != KERN_SUCCESS) {
2560 printf("vm_map_enter: "
2561 "pmap_nest(0x%llx,0x%llx) "
2562 "error 0x%x\n",
2563 (long long)tmp_start,
2564 (long long)tmp_end,
2565 kr);
2566 } else {
2567 /* we're now nested ! */
2568 new_entry->use_pmap = TRUE;
2569 pmap_empty = FALSE;
2570 }
2571 }
fe8ab488 2572#endif /* NO_NESTED_PMAP */
2d21ac55 2573 }
b0d623f7
A
2574 entry = new_entry;
2575
2576 if (superpage_size) {
2577 vm_page_t pages, m;
2578 vm_object_t sp_object;
2579
3e170ce0 2580 VME_OFFSET_SET(entry, 0);
b0d623f7
A
2581
2582 /* allocate one superpage */
2583 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 2584 if (kr != KERN_SUCCESS) {
3e170ce0
A
2585 /* deallocate whole range... */
2586 new_mapping_established = TRUE;
2587 /* ... but only up to "tmp_end" */
2588 size -= end - tmp_end;
b0d623f7
A
2589 RETURN(kr);
2590 }
2591
2592 /* create one vm_object per superpage */
2593 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2594 sp_object->phys_contiguous = TRUE;
6d2010ae 2595 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
3e170ce0 2596 VME_OBJECT_SET(entry, sp_object);
fe8ab488 2597 assert(entry->use_pmap);
b0d623f7
A
2598
2599 /* enter the base pages into the object */
2600 vm_object_lock(sp_object);
2601 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2602 m = pages;
2603 pmap_zero_page(m->phys_page);
2604 pages = NEXT_PAGE(m);
2605 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3e170ce0 2606 vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 2607 }
b0d623f7 2608 vm_object_unlock(sp_object);
2d21ac55 2609 }
b0d623f7
A
2610 } while (tmp_end != tmp2_end &&
2611 (tmp_start = tmp_end) &&
2612 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2613 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2614 }
91447636 2615
91447636 2616 new_mapping_established = TRUE;
1c79356b 2617
fe8ab488
A
2618BailOut:
2619 assert(map_locked == TRUE);
2d21ac55 2620
593a1d5f
A
2621 if (result == KERN_SUCCESS) {
2622 vm_prot_t pager_prot;
2623 memory_object_t pager;
91447636 2624
fe8ab488 2625#if DEBUG
593a1d5f
A
2626 if (pmap_empty &&
2627 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2628 assert(vm_map_pmap_is_empty(map,
2629 *address,
2630 *address+size));
2631 }
fe8ab488 2632#endif /* DEBUG */
593a1d5f
A
2633
2634 /*
2635 * For "named" VM objects, let the pager know that the
2636 * memory object is being mapped. Some pagers need to keep
2637 * track of this, to know when they can reclaim the memory
2638 * object, for example.
2639 * VM calls memory_object_map() for each mapping (specifying
2640 * the protection of each mapping) and calls
2641 * memory_object_last_unmap() when all the mappings are gone.
2642 */
2643 pager_prot = max_protection;
2644 if (needs_copy) {
2645 /*
2646 * Copy-On-Write mapping: won't modify
2647 * the memory object.
2648 */
2649 pager_prot &= ~VM_PROT_WRITE;
2650 }
2651 if (!is_submap &&
2652 object != VM_OBJECT_NULL &&
2653 object->named &&
2654 object->pager != MEMORY_OBJECT_NULL) {
2655 vm_object_lock(object);
2656 pager = object->pager;
2657 if (object->named &&
2658 pager != MEMORY_OBJECT_NULL) {
2659 assert(object->pager_ready);
2660 vm_object_mapping_wait(object, THREAD_UNINT);
2661 vm_object_mapping_begin(object);
2662 vm_object_unlock(object);
2663
2664 kr = memory_object_map(pager, pager_prot);
2665 assert(kr == KERN_SUCCESS);
2666
2667 vm_object_lock(object);
2668 vm_object_mapping_end(object);
2669 }
2670 vm_object_unlock(object);
2671 }
fe8ab488
A
2672 }
2673
2674 assert(map_locked == TRUE);
2675
2676 if (!keep_map_locked) {
2677 vm_map_unlock(map);
2678 map_locked = FALSE;
2679 }
2680
2681 /*
2682 * We can't hold the map lock if we enter this block.
2683 */
2684
2685 if (result == KERN_SUCCESS) {
2686
2687 /* Wire down the new entry if the user
2688 * requested all new map entries be wired.
2689 */
2690 if ((map->wiring_required)||(superpage_size)) {
2691 assert(!keep_map_locked);
2692 pmap_empty = FALSE; /* pmap won't be empty */
2693 kr = vm_map_wire(map, start, end,
3e170ce0
A
2694 new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2695 TRUE);
fe8ab488
A
2696 result = kr;
2697 }
2698
2699 }
2700
2701 if (result != KERN_SUCCESS) {
91447636
A
2702 if (new_mapping_established) {
2703 /*
2704 * We have to get rid of the new mappings since we
2705 * won't make them available to the user.
2706 * Try and do that atomically, to minimize the risk
2707 * that someone else create new mappings that range.
2708 */
2709 zap_new_map = vm_map_create(PMAP_NULL,
2710 *address,
2711 *address + size,
b0d623f7 2712 map->hdr.entries_pageable);
39236c6e
A
2713 vm_map_set_page_shift(zap_new_map,
2714 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
2715 vm_map_disable_hole_optimization(zap_new_map);
2716
91447636
A
2717 if (!map_locked) {
2718 vm_map_lock(map);
2719 map_locked = TRUE;
2720 }
2721 (void) vm_map_delete(map, *address, *address+size,
fe8ab488
A
2722 (VM_MAP_REMOVE_SAVE_ENTRIES |
2723 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2724 zap_new_map);
2725 }
2726 if (zap_old_map != VM_MAP_NULL &&
2727 zap_old_map->hdr.nentries != 0) {
2728 vm_map_entry_t entry1, entry2;
2729
2730 /*
2731 * The new mapping failed. Attempt to restore
2732 * the old mappings, saved in the "zap_old_map".
2733 */
2734 if (!map_locked) {
2735 vm_map_lock(map);
2736 map_locked = TRUE;
2737 }
2738
2739 /* first check if the coast is still clear */
2740 start = vm_map_first_entry(zap_old_map)->vme_start;
2741 end = vm_map_last_entry(zap_old_map)->vme_end;
2742 if (vm_map_lookup_entry(map, start, &entry1) ||
2743 vm_map_lookup_entry(map, end, &entry2) ||
2744 entry1 != entry2) {
2745 /*
2746 * Part of that range has already been
2747 * re-mapped: we can't restore the old
2748 * mappings...
2749 */
2750 vm_map_enter_restore_failures++;
2751 } else {
2752 /*
2753 * Transfer the saved map entries from
2754 * "zap_old_map" to the original "map",
2755 * inserting them all after "entry1".
2756 */
2757 for (entry2 = vm_map_first_entry(zap_old_map);
2758 entry2 != vm_map_to_entry(zap_old_map);
2759 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2760 vm_map_size_t entry_size;
2761
2762 entry_size = (entry2->vme_end -
2763 entry2->vme_start);
6d2010ae 2764 vm_map_store_entry_unlink(zap_old_map,
91447636 2765 entry2);
2d21ac55 2766 zap_old_map->size -= entry_size;
6d2010ae 2767 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2768 map->size += entry_size;
91447636
A
2769 entry1 = entry2;
2770 }
2771 if (map->wiring_required) {
2772 /*
2773 * XXX TODO: we should rewire the
2774 * old pages here...
2775 */
2776 }
2777 vm_map_enter_restore_successes++;
2778 }
2779 }
2780 }
2781
fe8ab488
A
2782 /*
2783 * The caller is responsible for releasing the lock if it requested to
2784 * keep the map locked.
2785 */
2786 if (map_locked && !keep_map_locked) {
91447636
A
2787 vm_map_unlock(map);
2788 }
2789
2790 /*
2791 * Get rid of the "zap_maps" and all the map entries that
2792 * they may still contain.
2793 */
2794 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 2795 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2796 zap_old_map = VM_MAP_NULL;
2797 }
2798 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 2799 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2800 zap_new_map = VM_MAP_NULL;
2801 }
2802
2803 return result;
1c79356b
A
2804
2805#undef RETURN
2806}
2807
3e170ce0 2808
fe8ab488
A
2809/*
2810 * Counters for the prefault optimization.
2811 */
2812int64_t vm_prefault_nb_pages = 0;
2813int64_t vm_prefault_nb_bailout = 0;
2814
2815static kern_return_t
2816vm_map_enter_mem_object_helper(
2d21ac55
A
2817 vm_map_t target_map,
2818 vm_map_offset_t *address,
2819 vm_map_size_t initial_size,
2820 vm_map_offset_t mask,
2821 int flags,
2822 ipc_port_t port,
2823 vm_object_offset_t offset,
2824 boolean_t copy,
2825 vm_prot_t cur_protection,
2826 vm_prot_t max_protection,
fe8ab488
A
2827 vm_inherit_t inheritance,
2828 upl_page_list_ptr_t page_list,
2829 unsigned int page_list_count)
91447636 2830{
2d21ac55
A
2831 vm_map_address_t map_addr;
2832 vm_map_size_t map_size;
2833 vm_object_t object;
2834 vm_object_size_t size;
2835 kern_return_t result;
6d2010ae 2836 boolean_t mask_cur_protection, mask_max_protection;
fe8ab488 2837 boolean_t try_prefault = (page_list_count != 0);
3e170ce0 2838 vm_map_offset_t offset_in_mapping = 0;
6d2010ae
A
2839
2840 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2841 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2842 cur_protection &= ~VM_PROT_IS_MASK;
2843 max_protection &= ~VM_PROT_IS_MASK;
91447636
A
2844
2845 /*
2d21ac55 2846 * Check arguments for validity
91447636 2847 */
2d21ac55
A
2848 if ((target_map == VM_MAP_NULL) ||
2849 (cur_protection & ~VM_PROT_ALL) ||
2850 (max_protection & ~VM_PROT_ALL) ||
2851 (inheritance > VM_INHERIT_LAST_VALID) ||
fe8ab488 2852 (try_prefault && (copy || !page_list)) ||
3e170ce0 2853 initial_size == 0) {
2d21ac55 2854 return KERN_INVALID_ARGUMENT;
3e170ce0 2855 }
6d2010ae 2856
3e170ce0
A
2857 {
2858 map_addr = vm_map_trunc_page(*address,
2859 VM_MAP_PAGE_MASK(target_map));
2860 map_size = vm_map_round_page(initial_size,
2861 VM_MAP_PAGE_MASK(target_map));
2862 }
39236c6e 2863 size = vm_object_round_page(initial_size);
593a1d5f 2864
2d21ac55
A
2865 /*
2866 * Find the vm object (if any) corresponding to this port.
2867 */
2868 if (!IP_VALID(port)) {
2869 object = VM_OBJECT_NULL;
2870 offset = 0;
2871 copy = FALSE;
2872 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2873 vm_named_entry_t named_entry;
2874
2875 named_entry = (vm_named_entry_t) port->ip_kobject;
39236c6e 2876
3e170ce0
A
2877 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2878 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
2879 offset += named_entry->data_offset;
2880 }
2881
2d21ac55
A
2882 /* a few checks to make sure user is obeying rules */
2883 if (size == 0) {
2884 if (offset >= named_entry->size)
2885 return KERN_INVALID_RIGHT;
2886 size = named_entry->size - offset;
2887 }
6d2010ae
A
2888 if (mask_max_protection) {
2889 max_protection &= named_entry->protection;
2890 }
2891 if (mask_cur_protection) {
2892 cur_protection &= named_entry->protection;
2893 }
2d21ac55
A
2894 if ((named_entry->protection & max_protection) !=
2895 max_protection)
2896 return KERN_INVALID_RIGHT;
2897 if ((named_entry->protection & cur_protection) !=
2898 cur_protection)
2899 return KERN_INVALID_RIGHT;
22ba694c
A
2900 if (offset + size < offset) {
2901 /* overflow */
2902 return KERN_INVALID_ARGUMENT;
2903 }
3e170ce0 2904 if (named_entry->size < (offset + initial_size)) {
2d21ac55 2905 return KERN_INVALID_ARGUMENT;
3e170ce0 2906 }
2d21ac55 2907
39236c6e
A
2908 if (named_entry->is_copy) {
2909 /* for a vm_map_copy, we can only map it whole */
2910 if ((size != named_entry->size) &&
2911 (vm_map_round_page(size,
2912 VM_MAP_PAGE_MASK(target_map)) ==
2913 named_entry->size)) {
2914 /* XXX FBDP use the rounded size... */
2915 size = vm_map_round_page(
2916 size,
2917 VM_MAP_PAGE_MASK(target_map));
2918 }
2919
fe8ab488
A
2920 if (!(flags & VM_FLAGS_ANYWHERE) &&
2921 (offset != 0 ||
2922 size != named_entry->size)) {
2923 /*
2924 * XXX for a mapping at a "fixed" address,
2925 * we can't trim after mapping the whole
2926 * memory entry, so reject a request for a
2927 * partial mapping.
2928 */
39236c6e
A
2929 return KERN_INVALID_ARGUMENT;
2930 }
2931 }
2932
2d21ac55
A
2933 /* the callers parameter offset is defined to be the */
2934 /* offset from beginning of named entry offset in object */
2935 offset = offset + named_entry->offset;
2936
39236c6e
A
2937 if (! VM_MAP_PAGE_ALIGNED(size,
2938 VM_MAP_PAGE_MASK(target_map))) {
2939 /*
2940 * Let's not map more than requested;
2941 * vm_map_enter() will handle this "not map-aligned"
2942 * case.
2943 */
2944 map_size = size;
2945 }
2946
2d21ac55
A
2947 named_entry_lock(named_entry);
2948 if (named_entry->is_sub_map) {
2949 vm_map_t submap;
2950
3e170ce0
A
2951 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2952 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
2953 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2954 }
2955
2d21ac55
A
2956 submap = named_entry->backing.map;
2957 vm_map_lock(submap);
2958 vm_map_reference(submap);
2959 vm_map_unlock(submap);
2960 named_entry_unlock(named_entry);
2961
2962 result = vm_map_enter(target_map,
2963 &map_addr,
2964 map_size,
2965 mask,
2966 flags | VM_FLAGS_SUBMAP,
2967 (vm_object_t) submap,
2968 offset,
2969 copy,
2970 cur_protection,
2971 max_protection,
2972 inheritance);
2973 if (result != KERN_SUCCESS) {
2974 vm_map_deallocate(submap);
2975 } else {
2976 /*
2977 * No need to lock "submap" just to check its
2978 * "mapped" flag: that flag is never reset
2979 * once it's been set and if we race, we'll
2980 * just end up setting it twice, which is OK.
2981 */
316670eb
A
2982 if (submap->mapped_in_other_pmaps == FALSE &&
2983 vm_map_pmap(submap) != PMAP_NULL &&
2984 vm_map_pmap(submap) !=
2985 vm_map_pmap(target_map)) {
2d21ac55 2986 /*
316670eb
A
2987 * This submap is being mapped in a map
2988 * that uses a different pmap.
2989 * Set its "mapped_in_other_pmaps" flag
2990 * to indicate that we now need to
2991 * remove mappings from all pmaps rather
2992 * than just the submap's pmap.
2d21ac55
A
2993 */
2994 vm_map_lock(submap);
316670eb 2995 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
2996 vm_map_unlock(submap);
2997 }
2998 *address = map_addr;
2999 }
3000 return result;
3001
3002 } else if (named_entry->is_pager) {
3003 unsigned int access;
3004 vm_prot_t protections;
3005 unsigned int wimg_mode;
2d21ac55
A
3006
3007 protections = named_entry->protection & VM_PROT_ALL;
3008 access = GET_MAP_MEM(named_entry->protection);
3009
3e170ce0
A
3010 if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3011 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3012 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3013 }
3014
2d21ac55
A
3015 object = vm_object_enter(named_entry->backing.pager,
3016 named_entry->size,
3017 named_entry->internal,
3018 FALSE,
3019 FALSE);
3020 if (object == VM_OBJECT_NULL) {
3021 named_entry_unlock(named_entry);
3022 return KERN_INVALID_OBJECT;
3023 }
3024
3025 /* JMM - drop reference on pager here */
3026
3027 /* create an extra ref for the named entry */
3028 vm_object_lock(object);
3029 vm_object_reference_locked(object);
3030 named_entry->backing.object = object;
3031 named_entry->is_pager = FALSE;
3032 named_entry_unlock(named_entry);
3033
3034 wimg_mode = object->wimg_bits;
6d2010ae 3035
2d21ac55
A
3036 if (access == MAP_MEM_IO) {
3037 wimg_mode = VM_WIMG_IO;
3038 } else if (access == MAP_MEM_COPYBACK) {
3039 wimg_mode = VM_WIMG_USE_DEFAULT;
316670eb
A
3040 } else if (access == MAP_MEM_INNERWBACK) {
3041 wimg_mode = VM_WIMG_INNERWBACK;
2d21ac55
A
3042 } else if (access == MAP_MEM_WTHRU) {
3043 wimg_mode = VM_WIMG_WTHRU;
3044 } else if (access == MAP_MEM_WCOMB) {
3045 wimg_mode = VM_WIMG_WCOMB;
3046 }
2d21ac55
A
3047
3048 /* wait for object (if any) to be ready */
3049 if (!named_entry->internal) {
3050 while (!object->pager_ready) {
3051 vm_object_wait(
3052 object,
3053 VM_OBJECT_EVENT_PAGER_READY,
3054 THREAD_UNINT);
3055 vm_object_lock(object);
3056 }
3057 }
3058
6d2010ae
A
3059 if (object->wimg_bits != wimg_mode)
3060 vm_object_change_wimg_mode(object, wimg_mode);
2d21ac55 3061
fe8ab488
A
3062#if VM_OBJECT_TRACKING_OP_TRUESHARE
3063 if (!object->true_share &&
3064 vm_object_tracking_inited) {
3065 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3066 int num = 0;
3067
3068 num = OSBacktrace(bt,
3069 VM_OBJECT_TRACKING_BTDEPTH);
3070 btlog_add_entry(vm_object_tracking_btlog,
3071 object,
3072 VM_OBJECT_TRACKING_OP_TRUESHARE,
3073 bt,
3074 num);
3075 }
3076#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3077
2d21ac55 3078 object->true_share = TRUE;
6d2010ae 3079
2d21ac55
A
3080 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3081 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3082 vm_object_unlock(object);
39236c6e
A
3083
3084 } else if (named_entry->is_copy) {
3085 kern_return_t kr;
3086 vm_map_copy_t copy_map;
3087 vm_map_entry_t copy_entry;
3088 vm_map_offset_t copy_addr;
3089
3090 if (flags & ~(VM_FLAGS_FIXED |
3091 VM_FLAGS_ANYWHERE |
3092 VM_FLAGS_OVERWRITE |
3e170ce0 3093 VM_FLAGS_RETURN_4K_DATA_ADDR |
39236c6e
A
3094 VM_FLAGS_RETURN_DATA_ADDR)) {
3095 named_entry_unlock(named_entry);
3096 return KERN_INVALID_ARGUMENT;
3097 }
3098
3e170ce0
A
3099 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3100 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3101 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3102 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3103 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3104 offset = vm_object_trunc_page(offset);
3105 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3106 }
3107
3108 copy_map = named_entry->backing.copy;
3109 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3110 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3111 /* unsupported type; should not happen */
3112 printf("vm_map_enter_mem_object: "
3113 "memory_entry->backing.copy "
3114 "unsupported type 0x%x\n",
3115 copy_map->type);
3116 named_entry_unlock(named_entry);
3117 return KERN_INVALID_ARGUMENT;
3118 }
3119
3120 /* reserve a contiguous range */
3121 kr = vm_map_enter(target_map,
3122 &map_addr,
fe8ab488
A
3123 /* map whole mem entry, trim later: */
3124 named_entry->size,
39236c6e
A
3125 mask,
3126 flags & (VM_FLAGS_ANYWHERE |
3127 VM_FLAGS_OVERWRITE |
3e170ce0 3128 VM_FLAGS_RETURN_4K_DATA_ADDR |
39236c6e
A
3129 VM_FLAGS_RETURN_DATA_ADDR),
3130 VM_OBJECT_NULL,
3131 0,
3132 FALSE, /* copy */
3133 cur_protection,
3134 max_protection,
3135 inheritance);
3136 if (kr != KERN_SUCCESS) {
3137 named_entry_unlock(named_entry);
3138 return kr;
3139 }
3140
3141 copy_addr = map_addr;
3142
3143 for (copy_entry = vm_map_copy_first_entry(copy_map);
3144 copy_entry != vm_map_copy_to_entry(copy_map);
3145 copy_entry = copy_entry->vme_next) {
3146 int remap_flags = 0;
3147 vm_map_t copy_submap;
3148 vm_object_t copy_object;
3149 vm_map_size_t copy_size;
3150 vm_object_offset_t copy_offset;
3151
3e170ce0 3152 copy_offset = VME_OFFSET(copy_entry);
39236c6e
A
3153 copy_size = (copy_entry->vme_end -
3154 copy_entry->vme_start);
3155
3156 /* sanity check */
fe8ab488
A
3157 if ((copy_addr + copy_size) >
3158 (map_addr +
3159 named_entry->size /* XXX full size */ )) {
39236c6e
A
3160 /* over-mapping too much !? */
3161 kr = KERN_INVALID_ARGUMENT;
3162 /* abort */
3163 break;
3164 }
3165
3166 /* take a reference on the object */
3167 if (copy_entry->is_sub_map) {
3168 remap_flags |= VM_FLAGS_SUBMAP;
3e170ce0 3169 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
3170 vm_map_lock(copy_submap);
3171 vm_map_reference(copy_submap);
3172 vm_map_unlock(copy_submap);
3173 copy_object = (vm_object_t) copy_submap;
3174 } else {
3e170ce0 3175 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
3176 vm_object_reference(copy_object);
3177 }
3178
3179 /* over-map the object into destination */
3180 remap_flags |= flags;
3181 remap_flags |= VM_FLAGS_FIXED;
3182 remap_flags |= VM_FLAGS_OVERWRITE;
3183 remap_flags &= ~VM_FLAGS_ANYWHERE;
3184 kr = vm_map_enter(target_map,
3185 &copy_addr,
3186 copy_size,
3187 (vm_map_offset_t) 0,
3188 remap_flags,
3189 copy_object,
3190 copy_offset,
3191 copy,
3192 cur_protection,
3193 max_protection,
3194 inheritance);
3195 if (kr != KERN_SUCCESS) {
3196 if (copy_entry->is_sub_map) {
3197 vm_map_deallocate(copy_submap);
3198 } else {
3199 vm_object_deallocate(copy_object);
3200 }
3201 /* abort */
3202 break;
3203 }
3204
3205 /* next mapping */
3206 copy_addr += copy_size;
3207 }
3208
3209 if (kr == KERN_SUCCESS) {
3e170ce0
A
3210 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3211 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3212 *address = map_addr + offset_in_mapping;
3213 } else {
3214 *address = map_addr;
3215 }
fe8ab488
A
3216
3217 if (offset) {
3218 /*
3219 * Trim in front, from 0 to "offset".
3220 */
3221 vm_map_remove(target_map,
3222 map_addr,
3223 map_addr + offset,
3224 0);
3225 *address += offset;
3226 }
3227 if (offset + map_size < named_entry->size) {
3228 /*
3229 * Trim in back, from
3230 * "offset + map_size" to
3231 * "named_entry->size".
3232 */
3233 vm_map_remove(target_map,
3234 (map_addr +
3235 offset + map_size),
3236 (map_addr +
3237 named_entry->size),
3238 0);
3239 }
39236c6e
A
3240 }
3241 named_entry_unlock(named_entry);
3242
3243 if (kr != KERN_SUCCESS) {
3244 if (! (flags & VM_FLAGS_OVERWRITE)) {
3245 /* deallocate the contiguous range */
3246 (void) vm_deallocate(target_map,
3247 map_addr,
3248 map_size);
3249 }
3250 }
3251
3252 return kr;
3253
2d21ac55
A
3254 } else {
3255 /* This is the case where we are going to map */
3256 /* an already mapped object. If the object is */
3257 /* not ready it is internal. An external */
3258 /* object cannot be mapped until it is ready */
3259 /* we can therefore avoid the ready check */
3260 /* in this case. */
3e170ce0
A
3261 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3262 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3263 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3264 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3265 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3266 offset = vm_object_trunc_page(offset);
3267 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3268 }
3269
2d21ac55
A
3270 object = named_entry->backing.object;
3271 assert(object != VM_OBJECT_NULL);
3272 named_entry_unlock(named_entry);
3273 vm_object_reference(object);
3274 }
3275 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3276 /*
3277 * JMM - This is temporary until we unify named entries
3278 * and raw memory objects.
3279 *
3280 * Detected fake ip_kotype for a memory object. In
3281 * this case, the port isn't really a port at all, but
3282 * instead is just a raw memory object.
3283 */
3e170ce0
A
3284 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3285 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3286 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3287 }
3288
2d21ac55
A
3289 object = vm_object_enter((memory_object_t)port,
3290 size, FALSE, FALSE, FALSE);
3291 if (object == VM_OBJECT_NULL)
3292 return KERN_INVALID_OBJECT;
3293
3294 /* wait for object (if any) to be ready */
3295 if (object != VM_OBJECT_NULL) {
3296 if (object == kernel_object) {
3297 printf("Warning: Attempt to map kernel object"
3298 " by a non-private kernel entity\n");
3299 return KERN_INVALID_OBJECT;
3300 }
b0d623f7 3301 if (!object->pager_ready) {
2d21ac55 3302 vm_object_lock(object);
b0d623f7
A
3303
3304 while (!object->pager_ready) {
3305 vm_object_wait(object,
3306 VM_OBJECT_EVENT_PAGER_READY,
3307 THREAD_UNINT);
3308 vm_object_lock(object);
3309 }
3310 vm_object_unlock(object);
2d21ac55 3311 }
2d21ac55
A
3312 }
3313 } else {
3314 return KERN_INVALID_OBJECT;
3315 }
3316
593a1d5f
A
3317 if (object != VM_OBJECT_NULL &&
3318 object->named &&
3319 object->pager != MEMORY_OBJECT_NULL &&
3320 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3321 memory_object_t pager;
3322 vm_prot_t pager_prot;
3323 kern_return_t kr;
3324
3325 /*
3326 * For "named" VM objects, let the pager know that the
3327 * memory object is being mapped. Some pagers need to keep
3328 * track of this, to know when they can reclaim the memory
3329 * object, for example.
3330 * VM calls memory_object_map() for each mapping (specifying
3331 * the protection of each mapping) and calls
3332 * memory_object_last_unmap() when all the mappings are gone.
3333 */
3334 pager_prot = max_protection;
3335 if (copy) {
3336 /*
3337 * Copy-On-Write mapping: won't modify the
3338 * memory object.
3339 */
3340 pager_prot &= ~VM_PROT_WRITE;
3341 }
3342 vm_object_lock(object);
3343 pager = object->pager;
3344 if (object->named &&
3345 pager != MEMORY_OBJECT_NULL &&
3346 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3347 assert(object->pager_ready);
3348 vm_object_mapping_wait(object, THREAD_UNINT);
3349 vm_object_mapping_begin(object);
3350 vm_object_unlock(object);
3351
3352 kr = memory_object_map(pager, pager_prot);
3353 assert(kr == KERN_SUCCESS);
3354
3355 vm_object_lock(object);
3356 vm_object_mapping_end(object);
3357 }
3358 vm_object_unlock(object);
3359 }
3360
2d21ac55
A
3361 /*
3362 * Perform the copy if requested
3363 */
3364
3365 if (copy) {
3366 vm_object_t new_object;
3367 vm_object_offset_t new_offset;
3368
3e170ce0
A
3369 result = vm_object_copy_strategically(object, offset,
3370 map_size,
2d21ac55
A
3371 &new_object, &new_offset,
3372 &copy);
3373
3374
3375 if (result == KERN_MEMORY_RESTART_COPY) {
3376 boolean_t success;
3377 boolean_t src_needs_copy;
3378
3379 /*
3380 * XXX
3381 * We currently ignore src_needs_copy.
3382 * This really is the issue of how to make
3383 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3384 * non-kernel users to use. Solution forthcoming.
3385 * In the meantime, since we don't allow non-kernel
3386 * memory managers to specify symmetric copy,
3387 * we won't run into problems here.
3388 */
3389 new_object = object;
3390 new_offset = offset;
3391 success = vm_object_copy_quickly(&new_object,
3e170ce0
A
3392 new_offset,
3393 map_size,
2d21ac55
A
3394 &src_needs_copy,
3395 &copy);
3396 assert(success);
3397 result = KERN_SUCCESS;
3398 }
3399 /*
3400 * Throw away the reference to the
3401 * original object, as it won't be mapped.
3402 */
3403
3404 vm_object_deallocate(object);
3405
3e170ce0 3406 if (result != KERN_SUCCESS) {
2d21ac55 3407 return result;
3e170ce0 3408 }
2d21ac55
A
3409
3410 object = new_object;
3411 offset = new_offset;
3412 }
3413
fe8ab488
A
3414 /*
3415 * If users want to try to prefault pages, the mapping and prefault
3416 * needs to be atomic.
3417 */
3418 if (try_prefault)
3419 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3e170ce0
A
3420
3421 {
3422 result = vm_map_enter(target_map,
3423 &map_addr, map_size,
3424 (vm_map_offset_t)mask,
3425 flags,
3426 object, offset,
3427 copy,
3428 cur_protection, max_protection,
3429 inheritance);
3430 }
2d21ac55
A
3431 if (result != KERN_SUCCESS)
3432 vm_object_deallocate(object);
39236c6e 3433
fe8ab488
A
3434 /*
3435 * Try to prefault, and do not forget to release the vm map lock.
3436 */
3437 if (result == KERN_SUCCESS && try_prefault) {
3438 mach_vm_address_t va = map_addr;
3439 kern_return_t kr = KERN_SUCCESS;
3440 unsigned int i = 0;
3441
3442 for (i = 0; i < page_list_count; ++i) {
3443 if (UPL_VALID_PAGE(page_list, i)) {
3444 /*
3445 * If this function call failed, we should stop
3446 * trying to optimize, other calls are likely
3447 * going to fail too.
3448 *
3449 * We are not gonna report an error for such
3450 * failure though. That's an optimization, not
3451 * something critical.
3452 */
3453 kr = pmap_enter_options(target_map->pmap,
3454 va, UPL_PHYS_PAGE(page_list, i),
3455 cur_protection, VM_PROT_NONE,
3456 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3457 if (kr != KERN_SUCCESS) {
3458 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3e170ce0 3459 break;
fe8ab488
A
3460 }
3461 OSIncrementAtomic64(&vm_prefault_nb_pages);
3462 }
3463
3464 /* Next virtual address */
3465 va += PAGE_SIZE;
3466 }
fe8ab488
A
3467 vm_map_unlock(target_map);
3468 }
3469
3e170ce0
A
3470 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3471 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3472 *address = map_addr + offset_in_mapping;
3473 } else {
3474 *address = map_addr;
3475 }
2d21ac55
A
3476 return result;
3477}
3478
fe8ab488
A
3479kern_return_t
3480vm_map_enter_mem_object(
3481 vm_map_t target_map,
3482 vm_map_offset_t *address,
3483 vm_map_size_t initial_size,
3484 vm_map_offset_t mask,
3485 int flags,
3486 ipc_port_t port,
3487 vm_object_offset_t offset,
3488 boolean_t copy,
3489 vm_prot_t cur_protection,
3490 vm_prot_t max_protection,
3491 vm_inherit_t inheritance)
3492{
3493 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3494 port, offset, copy, cur_protection, max_protection,
3495 inheritance, NULL, 0);
3496}
b0d623f7 3497
fe8ab488
A
3498kern_return_t
3499vm_map_enter_mem_object_prefault(
3500 vm_map_t target_map,
3501 vm_map_offset_t *address,
3502 vm_map_size_t initial_size,
3503 vm_map_offset_t mask,
3504 int flags,
3505 ipc_port_t port,
3506 vm_object_offset_t offset,
3507 vm_prot_t cur_protection,
3508 vm_prot_t max_protection,
3509 upl_page_list_ptr_t page_list,
3510 unsigned int page_list_count)
3511{
3512 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3513 port, offset, FALSE, cur_protection, max_protection,
3514 VM_INHERIT_DEFAULT, page_list, page_list_count);
3515}
b0d623f7
A
3516
3517
3518kern_return_t
3519vm_map_enter_mem_object_control(
3520 vm_map_t target_map,
3521 vm_map_offset_t *address,
3522 vm_map_size_t initial_size,
3523 vm_map_offset_t mask,
3524 int flags,
3525 memory_object_control_t control,
3526 vm_object_offset_t offset,
3527 boolean_t copy,
3528 vm_prot_t cur_protection,
3529 vm_prot_t max_protection,
3530 vm_inherit_t inheritance)
3531{
3532 vm_map_address_t map_addr;
3533 vm_map_size_t map_size;
3534 vm_object_t object;
3535 vm_object_size_t size;
3536 kern_return_t result;
3537 memory_object_t pager;
3538 vm_prot_t pager_prot;
3539 kern_return_t kr;
3540
3541 /*
3542 * Check arguments for validity
3543 */
3544 if ((target_map == VM_MAP_NULL) ||
3545 (cur_protection & ~VM_PROT_ALL) ||
3546 (max_protection & ~VM_PROT_ALL) ||
3547 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 3548 initial_size == 0) {
b0d623f7 3549 return KERN_INVALID_ARGUMENT;
3e170ce0 3550 }
b0d623f7 3551
3e170ce0
A
3552 {
3553 map_addr = vm_map_trunc_page(*address,
3554 VM_MAP_PAGE_MASK(target_map));
3555 map_size = vm_map_round_page(initial_size,
3556 VM_MAP_PAGE_MASK(target_map));
3557 }
3558 size = vm_object_round_page(initial_size);
b0d623f7
A
3559
3560 object = memory_object_control_to_vm_object(control);
3561
3562 if (object == VM_OBJECT_NULL)
3563 return KERN_INVALID_OBJECT;
3564
3565 if (object == kernel_object) {
3566 printf("Warning: Attempt to map kernel object"
3567 " by a non-private kernel entity\n");
3568 return KERN_INVALID_OBJECT;
3569 }
3570
3571 vm_object_lock(object);
3572 object->ref_count++;
3573 vm_object_res_reference(object);
3574
3575 /*
3576 * For "named" VM objects, let the pager know that the
3577 * memory object is being mapped. Some pagers need to keep
3578 * track of this, to know when they can reclaim the memory
3579 * object, for example.
3580 * VM calls memory_object_map() for each mapping (specifying
3581 * the protection of each mapping) and calls
3582 * memory_object_last_unmap() when all the mappings are gone.
3583 */
3584 pager_prot = max_protection;
3585 if (copy) {
3586 pager_prot &= ~VM_PROT_WRITE;
3587 }
3588 pager = object->pager;
3589 if (object->named &&
3590 pager != MEMORY_OBJECT_NULL &&
3591 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3592 assert(object->pager_ready);
3593 vm_object_mapping_wait(object, THREAD_UNINT);
3594 vm_object_mapping_begin(object);
3595 vm_object_unlock(object);
3596
3597 kr = memory_object_map(pager, pager_prot);
3598 assert(kr == KERN_SUCCESS);
3599
3600 vm_object_lock(object);
3601 vm_object_mapping_end(object);
3602 }
3603 vm_object_unlock(object);
3604
3605 /*
3606 * Perform the copy if requested
3607 */
3608
3609 if (copy) {
3610 vm_object_t new_object;
3611 vm_object_offset_t new_offset;
3612
3613 result = vm_object_copy_strategically(object, offset, size,
3614 &new_object, &new_offset,
3615 &copy);
3616
3617
3618 if (result == KERN_MEMORY_RESTART_COPY) {
3619 boolean_t success;
3620 boolean_t src_needs_copy;
3621
3622 /*
3623 * XXX
3624 * We currently ignore src_needs_copy.
3625 * This really is the issue of how to make
3626 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3627 * non-kernel users to use. Solution forthcoming.
3628 * In the meantime, since we don't allow non-kernel
3629 * memory managers to specify symmetric copy,
3630 * we won't run into problems here.
3631 */
3632 new_object = object;
3633 new_offset = offset;
3634 success = vm_object_copy_quickly(&new_object,
3635 new_offset, size,
3636 &src_needs_copy,
3637 &copy);
3638 assert(success);
3639 result = KERN_SUCCESS;
3640 }
3641 /*
3642 * Throw away the reference to the
3643 * original object, as it won't be mapped.
3644 */
3645
3646 vm_object_deallocate(object);
3647
3e170ce0 3648 if (result != KERN_SUCCESS) {
b0d623f7 3649 return result;
3e170ce0 3650 }
b0d623f7
A
3651
3652 object = new_object;
3653 offset = new_offset;
3654 }
3655
3e170ce0
A
3656 {
3657 result = vm_map_enter(target_map,
3658 &map_addr, map_size,
3659 (vm_map_offset_t)mask,
3660 flags,
3661 object, offset,
3662 copy,
3663 cur_protection, max_protection,
3664 inheritance);
3665 }
b0d623f7
A
3666 if (result != KERN_SUCCESS)
3667 vm_object_deallocate(object);
3668 *address = map_addr;
3669
3670 return result;
3671}
3672
3673
2d21ac55
A
3674#if VM_CPM
3675
3676#ifdef MACH_ASSERT
3677extern pmap_paddr_t avail_start, avail_end;
3678#endif
3679
3680/*
3681 * Allocate memory in the specified map, with the caveat that
3682 * the memory is physically contiguous. This call may fail
3683 * if the system can't find sufficient contiguous memory.
3684 * This call may cause or lead to heart-stopping amounts of
3685 * paging activity.
3686 *
3687 * Memory obtained from this call should be freed in the
3688 * normal way, viz., via vm_deallocate.
3689 */
3690kern_return_t
3691vm_map_enter_cpm(
3692 vm_map_t map,
3693 vm_map_offset_t *addr,
3694 vm_map_size_t size,
3695 int flags)
3696{
3697 vm_object_t cpm_obj;
3698 pmap_t pmap;
3699 vm_page_t m, pages;
3700 kern_return_t kr;
3701 vm_map_offset_t va, start, end, offset;
3702#if MACH_ASSERT
316670eb 3703 vm_map_offset_t prev_addr = 0;
2d21ac55
A
3704#endif /* MACH_ASSERT */
3705
3706 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
3707 vm_tag_t tag;
3708
3709 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 3710
2d21ac55
A
3711 if (size == 0) {
3712 *addr = 0;
3713 return KERN_SUCCESS;
3714 }
3715 if (anywhere)
3716 *addr = vm_map_min(map);
3717 else
39236c6e
A
3718 *addr = vm_map_trunc_page(*addr,
3719 VM_MAP_PAGE_MASK(map));
3720 size = vm_map_round_page(size,
3721 VM_MAP_PAGE_MASK(map));
2d21ac55
A
3722
3723 /*
3724 * LP64todo - cpm_allocate should probably allow
3725 * allocations of >4GB, but not with the current
3726 * algorithm, so just cast down the size for now.
3727 */
3728 if (size > VM_MAX_ADDRESS)
3729 return KERN_RESOURCE_SHORTAGE;
3730 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 3731 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
3732 return kr;
3733
3734 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3735 assert(cpm_obj != VM_OBJECT_NULL);
3736 assert(cpm_obj->internal);
316670eb 3737 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
3738 assert(cpm_obj->can_persist == FALSE);
3739 assert(cpm_obj->pager_created == FALSE);
3740 assert(cpm_obj->pageout == FALSE);
3741 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
3742
3743 /*
3744 * Insert pages into object.
3745 */
3746
3747 vm_object_lock(cpm_obj);
3748 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3749 m = pages;
3750 pages = NEXT_PAGE(m);
0c530ab8 3751 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
3752
3753 assert(!m->gobbled);
3754 assert(!m->wanted);
3755 assert(!m->pageout);
3756 assert(!m->tabled);
b0d623f7 3757 assert(VM_PAGE_WIRED(m));
91447636
A
3758 /*
3759 * ENCRYPTED SWAP:
3760 * "m" is not supposed to be pageable, so it
3761 * should not be encrypted. It wouldn't be safe
3762 * to enter it in a new VM object while encrypted.
3763 */
3764 ASSERT_PAGE_DECRYPTED(m);
3765 assert(m->busy);
0c530ab8 3766 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
91447636
A
3767
3768 m->busy = FALSE;
3769 vm_page_insert(m, cpm_obj, offset);
3770 }
3771 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3772 vm_object_unlock(cpm_obj);
3773
3774 /*
3775 * Hang onto a reference on the object in case a
3776 * multi-threaded application for some reason decides
3777 * to deallocate the portion of the address space into
3778 * which we will insert this object.
3779 *
3780 * Unfortunately, we must insert the object now before
3781 * we can talk to the pmap module about which addresses
3782 * must be wired down. Hence, the race with a multi-
3783 * threaded app.
3784 */
3785 vm_object_reference(cpm_obj);
3786
3787 /*
3788 * Insert object into map.
3789 */
3790
3791 kr = vm_map_enter(
2d21ac55
A
3792 map,
3793 addr,
3794 size,
3795 (vm_map_offset_t)0,
3796 flags,
3797 cpm_obj,
3798 (vm_object_offset_t)0,
3799 FALSE,
3800 VM_PROT_ALL,
3801 VM_PROT_ALL,
3802 VM_INHERIT_DEFAULT);
91447636
A
3803
3804 if (kr != KERN_SUCCESS) {
3805 /*
3806 * A CPM object doesn't have can_persist set,
3807 * so all we have to do is deallocate it to
3808 * free up these pages.
3809 */
3810 assert(cpm_obj->pager_created == FALSE);
3811 assert(cpm_obj->can_persist == FALSE);
3812 assert(cpm_obj->pageout == FALSE);
3813 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3814 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3815 vm_object_deallocate(cpm_obj); /* kill creation ref */
3816 }
3817
3818 /*
3819 * Inform the physical mapping system that the
3820 * range of addresses may not fault, so that
3821 * page tables and such can be locked down as well.
3822 */
3823 start = *addr;
3824 end = start + size;
3825 pmap = vm_map_pmap(map);
3826 pmap_pageable(pmap, start, end, FALSE);
3827
3828 /*
3829 * Enter each page into the pmap, to avoid faults.
3830 * Note that this loop could be coded more efficiently,
3831 * if the need arose, rather than looking up each page
3832 * again.
3833 */
3834 for (offset = 0, va = start; offset < size;
3835 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
3836 int type_of_fault;
3837
91447636
A
3838 vm_object_lock(cpm_obj);
3839 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 3840 assert(m != VM_PAGE_NULL);
2d21ac55
A
3841
3842 vm_page_zero_fill(m);
3843
3844 type_of_fault = DBG_ZERO_FILL_FAULT;
3845
6d2010ae 3846 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
fe8ab488 3847 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
2d21ac55
A
3848 &type_of_fault);
3849
3850 vm_object_unlock(cpm_obj);
91447636
A
3851 }
3852
3853#if MACH_ASSERT
3854 /*
3855 * Verify ordering in address space.
3856 */
3857 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3858 vm_object_lock(cpm_obj);
3859 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3860 vm_object_unlock(cpm_obj);
3861 if (m == VM_PAGE_NULL)
316670eb
A
3862 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3863 cpm_obj, (uint64_t)offset);
91447636
A
3864 assert(m->tabled);
3865 assert(!m->busy);
3866 assert(!m->wanted);
3867 assert(!m->fictitious);
3868 assert(!m->private);
3869 assert(!m->absent);
3870 assert(!m->error);
3871 assert(!m->cleaning);
316670eb 3872 assert(!m->laundry);
91447636
A
3873 assert(!m->precious);
3874 assert(!m->clustered);
3875 if (offset != 0) {
3876 if (m->phys_page != prev_addr + 1) {
316670eb
A
3877 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3878 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3879 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3880 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
3881 panic("vm_allocate_cpm: pages not contig!");
3882 }
3883 }
3884 prev_addr = m->phys_page;
3885 }
3886#endif /* MACH_ASSERT */
3887
3888 vm_object_deallocate(cpm_obj); /* kill extra ref */
3889
3890 return kr;
3891}
3892
3893
3894#else /* VM_CPM */
3895
3896/*
3897 * Interface is defined in all cases, but unless the kernel
3898 * is built explicitly for this option, the interface does
3899 * nothing.
3900 */
3901
3902kern_return_t
3903vm_map_enter_cpm(
3904 __unused vm_map_t map,
3905 __unused vm_map_offset_t *addr,
3906 __unused vm_map_size_t size,
3907 __unused int flags)
3908{
3909 return KERN_FAILURE;
3910}
3911#endif /* VM_CPM */
3912
b0d623f7
A
3913/* Not used without nested pmaps */
3914#ifndef NO_NESTED_PMAP
2d21ac55
A
3915/*
3916 * Clip and unnest a portion of a nested submap mapping.
3917 */
b0d623f7
A
3918
3919
2d21ac55
A
3920static void
3921vm_map_clip_unnest(
3922 vm_map_t map,
3923 vm_map_entry_t entry,
3924 vm_map_offset_t start_unnest,
3925 vm_map_offset_t end_unnest)
3926{
b0d623f7
A
3927 vm_map_offset_t old_start_unnest = start_unnest;
3928 vm_map_offset_t old_end_unnest = end_unnest;
3929
2d21ac55 3930 assert(entry->is_sub_map);
3e170ce0 3931 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 3932 assert(entry->use_pmap);
2d21ac55 3933
b0d623f7
A
3934 /*
3935 * Query the platform for the optimal unnest range.
3936 * DRK: There's some duplication of effort here, since
3937 * callers may have adjusted the range to some extent. This
3938 * routine was introduced to support 1GiB subtree nesting
3939 * for x86 platforms, which can also nest on 2MiB boundaries
3940 * depending on size/alignment.
3941 */
3942 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3943 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3944 }
3945
2d21ac55
A
3946 if (entry->vme_start > start_unnest ||
3947 entry->vme_end < end_unnest) {
3948 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3949 "bad nested entry: start=0x%llx end=0x%llx\n",
3950 (long long)start_unnest, (long long)end_unnest,
3951 (long long)entry->vme_start, (long long)entry->vme_end);
3952 }
b0d623f7 3953
2d21ac55
A
3954 if (start_unnest > entry->vme_start) {
3955 _vm_map_clip_start(&map->hdr,
3956 entry,
3957 start_unnest);
3e170ce0
A
3958 if (map->holelistenabled) {
3959 vm_map_store_update_first_free(map, NULL, FALSE);
3960 } else {
3961 vm_map_store_update_first_free(map, map->first_free, FALSE);
3962 }
2d21ac55
A
3963 }
3964 if (entry->vme_end > end_unnest) {
3965 _vm_map_clip_end(&map->hdr,
3966 entry,
3967 end_unnest);
3e170ce0
A
3968 if (map->holelistenabled) {
3969 vm_map_store_update_first_free(map, NULL, FALSE);
3970 } else {
3971 vm_map_store_update_first_free(map, map->first_free, FALSE);
3972 }
2d21ac55
A
3973 }
3974
3975 pmap_unnest(map->pmap,
3976 entry->vme_start,
3977 entry->vme_end - entry->vme_start);
316670eb 3978 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
2d21ac55
A
3979 /* clean up parent map/maps */
3980 vm_map_submap_pmap_clean(
3981 map, entry->vme_start,
3982 entry->vme_end,
3e170ce0
A
3983 VME_SUBMAP(entry),
3984 VME_OFFSET(entry));
2d21ac55
A
3985 }
3986 entry->use_pmap = FALSE;
3e170ce0
A
3987 if ((map->pmap != kernel_pmap) &&
3988 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
3989 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 3990 }
2d21ac55 3991}
b0d623f7 3992#endif /* NO_NESTED_PMAP */
2d21ac55 3993
1c79356b
A
3994/*
3995 * vm_map_clip_start: [ internal use only ]
3996 *
3997 * Asserts that the given entry begins at or after
3998 * the specified address; if necessary,
3999 * it splits the entry into two.
4000 */
e2d2fc5c 4001void
2d21ac55
A
4002vm_map_clip_start(
4003 vm_map_t map,
4004 vm_map_entry_t entry,
4005 vm_map_offset_t startaddr)
4006{
0c530ab8 4007#ifndef NO_NESTED_PMAP
fe8ab488
A
4008 if (entry->is_sub_map &&
4009 entry->use_pmap &&
2d21ac55
A
4010 startaddr >= entry->vme_start) {
4011 vm_map_offset_t start_unnest, end_unnest;
4012
4013 /*
4014 * Make sure "startaddr" is no longer in a nested range
4015 * before we clip. Unnest only the minimum range the platform
4016 * can handle.
b0d623f7
A
4017 * vm_map_clip_unnest may perform additional adjustments to
4018 * the unnest range.
2d21ac55
A
4019 */
4020 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4021 end_unnest = start_unnest + pmap_nesting_size_min;
4022 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4023 }
4024#endif /* NO_NESTED_PMAP */
4025 if (startaddr > entry->vme_start) {
3e170ce0 4026 if (VME_OBJECT(entry) &&
2d21ac55 4027 !entry->is_sub_map &&
3e170ce0 4028 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4029 pmap_remove(map->pmap,
4030 (addr64_t)(entry->vme_start),
4031 (addr64_t)(entry->vme_end));
4032 }
4033 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
4034 if (map->holelistenabled) {
4035 vm_map_store_update_first_free(map, NULL, FALSE);
4036 } else {
4037 vm_map_store_update_first_free(map, map->first_free, FALSE);
4038 }
2d21ac55
A
4039 }
4040}
4041
1c79356b
A
4042
4043#define vm_map_copy_clip_start(copy, entry, startaddr) \
4044 MACRO_BEGIN \
4045 if ((startaddr) > (entry)->vme_start) \
4046 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4047 MACRO_END
4048
4049/*
4050 * This routine is called only when it is known that
4051 * the entry must be split.
4052 */
91447636 4053static void
1c79356b
A
4054_vm_map_clip_start(
4055 register struct vm_map_header *map_header,
4056 register vm_map_entry_t entry,
3e170ce0 4057 register vm_map_offset_t start)
1c79356b
A
4058{
4059 register vm_map_entry_t new_entry;
4060
4061 /*
4062 * Split off the front portion --
4063 * note that we must insert the new
4064 * entry BEFORE this one, so that
4065 * this entry has the specified starting
4066 * address.
4067 */
4068
fe8ab488
A
4069 if (entry->map_aligned) {
4070 assert(VM_MAP_PAGE_ALIGNED(start,
4071 VM_MAP_HDR_PAGE_MASK(map_header)));
4072 }
4073
7ddcb079 4074 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4075 vm_map_entry_copy_full(new_entry, entry);
4076
4077 new_entry->vme_end = start;
e2d2fc5c 4078 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 4079 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 4080 assert(start < entry->vme_end);
1c79356b
A
4081 entry->vme_start = start;
4082
6d2010ae 4083 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
4084
4085 if (entry->is_sub_map)
3e170ce0 4086 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4087 else
3e170ce0 4088 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4089}
4090
4091
4092/*
4093 * vm_map_clip_end: [ internal use only ]
4094 *
4095 * Asserts that the given entry ends at or before
4096 * the specified address; if necessary,
4097 * it splits the entry into two.
4098 */
e2d2fc5c 4099void
2d21ac55
A
4100vm_map_clip_end(
4101 vm_map_t map,
4102 vm_map_entry_t entry,
4103 vm_map_offset_t endaddr)
4104{
4105 if (endaddr > entry->vme_end) {
4106 /*
4107 * Within the scope of this clipping, limit "endaddr" to
4108 * the end of this map entry...
4109 */
4110 endaddr = entry->vme_end;
4111 }
4112#ifndef NO_NESTED_PMAP
fe8ab488 4113 if (entry->is_sub_map && entry->use_pmap) {
2d21ac55
A
4114 vm_map_offset_t start_unnest, end_unnest;
4115
4116 /*
4117 * Make sure the range between the start of this entry and
4118 * the new "endaddr" is no longer nested before we clip.
4119 * Unnest only the minimum range the platform can handle.
b0d623f7
A
4120 * vm_map_clip_unnest may perform additional adjustments to
4121 * the unnest range.
2d21ac55
A
4122 */
4123 start_unnest = entry->vme_start;
4124 end_unnest =
4125 (endaddr + pmap_nesting_size_min - 1) &
4126 ~(pmap_nesting_size_min - 1);
4127 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4128 }
4129#endif /* NO_NESTED_PMAP */
4130 if (endaddr < entry->vme_end) {
3e170ce0 4131 if (VME_OBJECT(entry) &&
2d21ac55 4132 !entry->is_sub_map &&
3e170ce0 4133 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4134 pmap_remove(map->pmap,
4135 (addr64_t)(entry->vme_start),
4136 (addr64_t)(entry->vme_end));
4137 }
4138 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
4139 if (map->holelistenabled) {
4140 vm_map_store_update_first_free(map, NULL, FALSE);
4141 } else {
4142 vm_map_store_update_first_free(map, map->first_free, FALSE);
4143 }
2d21ac55
A
4144 }
4145}
0c530ab8 4146
1c79356b
A
4147
4148#define vm_map_copy_clip_end(copy, entry, endaddr) \
4149 MACRO_BEGIN \
4150 if ((endaddr) < (entry)->vme_end) \
4151 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4152 MACRO_END
4153
4154/*
4155 * This routine is called only when it is known that
4156 * the entry must be split.
4157 */
91447636 4158static void
1c79356b
A
4159_vm_map_clip_end(
4160 register struct vm_map_header *map_header,
4161 register vm_map_entry_t entry,
2d21ac55 4162 register vm_map_offset_t end)
1c79356b
A
4163{
4164 register vm_map_entry_t new_entry;
4165
4166 /*
4167 * Create a new entry and insert it
4168 * AFTER the specified entry
4169 */
4170
fe8ab488
A
4171 if (entry->map_aligned) {
4172 assert(VM_MAP_PAGE_ALIGNED(end,
4173 VM_MAP_HDR_PAGE_MASK(map_header)));
4174 }
4175
7ddcb079 4176 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4177 vm_map_entry_copy_full(new_entry, entry);
4178
e2d2fc5c 4179 assert(entry->vme_start < end);
1c79356b 4180 new_entry->vme_start = entry->vme_end = end;
3e170ce0
A
4181 VME_OFFSET_SET(new_entry,
4182 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 4183 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 4184
6d2010ae 4185 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
4186
4187 if (entry->is_sub_map)
3e170ce0 4188 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4189 else
3e170ce0 4190 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4191}
4192
4193
4194/*
4195 * VM_MAP_RANGE_CHECK: [ internal use only ]
4196 *
4197 * Asserts that the starting and ending region
4198 * addresses fall within the valid range of the map.
4199 */
2d21ac55
A
4200#define VM_MAP_RANGE_CHECK(map, start, end) \
4201 MACRO_BEGIN \
4202 if (start < vm_map_min(map)) \
4203 start = vm_map_min(map); \
4204 if (end > vm_map_max(map)) \
4205 end = vm_map_max(map); \
4206 if (start > end) \
4207 start = end; \
4208 MACRO_END
1c79356b
A
4209
4210/*
4211 * vm_map_range_check: [ internal use only ]
4212 *
4213 * Check that the region defined by the specified start and
4214 * end addresses are wholly contained within a single map
4215 * entry or set of adjacent map entries of the spacified map,
4216 * i.e. the specified region contains no unmapped space.
4217 * If any or all of the region is unmapped, FALSE is returned.
4218 * Otherwise, TRUE is returned and if the output argument 'entry'
4219 * is not NULL it points to the map entry containing the start
4220 * of the region.
4221 *
4222 * The map is locked for reading on entry and is left locked.
4223 */
91447636 4224static boolean_t
1c79356b
A
4225vm_map_range_check(
4226 register vm_map_t map,
91447636
A
4227 register vm_map_offset_t start,
4228 register vm_map_offset_t end,
1c79356b
A
4229 vm_map_entry_t *entry)
4230{
4231 vm_map_entry_t cur;
91447636 4232 register vm_map_offset_t prev;
1c79356b
A
4233
4234 /*
4235 * Basic sanity checks first
4236 */
4237 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4238 return (FALSE);
4239
4240 /*
4241 * Check first if the region starts within a valid
4242 * mapping for the map.
4243 */
4244 if (!vm_map_lookup_entry(map, start, &cur))
4245 return (FALSE);
4246
4247 /*
4248 * Optimize for the case that the region is contained
4249 * in a single map entry.
4250 */
4251 if (entry != (vm_map_entry_t *) NULL)
4252 *entry = cur;
4253 if (end <= cur->vme_end)
4254 return (TRUE);
4255
4256 /*
4257 * If the region is not wholly contained within a
4258 * single entry, walk the entries looking for holes.
4259 */
4260 prev = cur->vme_end;
4261 cur = cur->vme_next;
4262 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4263 if (end <= cur->vme_end)
4264 return (TRUE);
4265 prev = cur->vme_end;
4266 cur = cur->vme_next;
4267 }
4268 return (FALSE);
4269}
4270
4271/*
4272 * vm_map_submap: [ kernel use only ]
4273 *
4274 * Mark the given range as handled by a subordinate map.
4275 *
4276 * This range must have been created with vm_map_find using
4277 * the vm_submap_object, and no other operations may have been
4278 * performed on this range prior to calling vm_map_submap.
4279 *
4280 * Only a limited number of operations can be performed
4281 * within this rage after calling vm_map_submap:
4282 * vm_fault
4283 * [Don't try vm_map_copyin!]
4284 *
4285 * To remove a submapping, one must first remove the
4286 * range from the superior map, and then destroy the
4287 * submap (if desired). [Better yet, don't try it.]
4288 */
4289kern_return_t
4290vm_map_submap(
fe8ab488 4291 vm_map_t map,
91447636
A
4292 vm_map_offset_t start,
4293 vm_map_offset_t end,
fe8ab488 4294 vm_map_t submap,
91447636 4295 vm_map_offset_t offset,
0c530ab8 4296#ifdef NO_NESTED_PMAP
91447636 4297 __unused
0c530ab8 4298#endif /* NO_NESTED_PMAP */
fe8ab488 4299 boolean_t use_pmap)
1c79356b
A
4300{
4301 vm_map_entry_t entry;
4302 register kern_return_t result = KERN_INVALID_ARGUMENT;
4303 register vm_object_t object;
4304
4305 vm_map_lock(map);
4306
2d21ac55 4307 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 4308 entry = entry->vme_next;
2d21ac55 4309 }
1c79356b 4310
2d21ac55
A
4311 if (entry == vm_map_to_entry(map) ||
4312 entry->is_sub_map) {
1c79356b
A
4313 vm_map_unlock(map);
4314 return KERN_INVALID_ARGUMENT;
4315 }
4316
2d21ac55 4317 vm_map_clip_start(map, entry, start);
1c79356b
A
4318 vm_map_clip_end(map, entry, end);
4319
4320 if ((entry->vme_start == start) && (entry->vme_end == end) &&
4321 (!entry->is_sub_map) &&
3e170ce0 4322 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
4323 (object->resident_page_count == 0) &&
4324 (object->copy == VM_OBJECT_NULL) &&
4325 (object->shadow == VM_OBJECT_NULL) &&
4326 (!object->pager_created)) {
3e170ce0
A
4327 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4328 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
4329 vm_object_deallocate(object);
4330 entry->is_sub_map = TRUE;
fe8ab488 4331 entry->use_pmap = FALSE;
3e170ce0 4332 VME_SUBMAP_SET(entry, submap);
2d21ac55 4333 vm_map_reference(submap);
316670eb
A
4334 if (submap->mapped_in_other_pmaps == FALSE &&
4335 vm_map_pmap(submap) != PMAP_NULL &&
4336 vm_map_pmap(submap) != vm_map_pmap(map)) {
4337 /*
4338 * This submap is being mapped in a map
4339 * that uses a different pmap.
4340 * Set its "mapped_in_other_pmaps" flag
4341 * to indicate that we now need to
4342 * remove mappings from all pmaps rather
4343 * than just the submap's pmap.
4344 */
4345 submap->mapped_in_other_pmaps = TRUE;
4346 }
2d21ac55 4347
0c530ab8 4348#ifndef NO_NESTED_PMAP
2d21ac55
A
4349 if (use_pmap) {
4350 /* nest if platform code will allow */
4351 if(submap->pmap == NULL) {
316670eb
A
4352 ledger_t ledger = map->pmap->ledger;
4353 submap->pmap = pmap_create(ledger,
4354 (vm_map_size_t) 0, FALSE);
2d21ac55
A
4355 if(submap->pmap == PMAP_NULL) {
4356 vm_map_unlock(map);
4357 return(KERN_NO_SPACE);
55e303ae 4358 }
55e303ae 4359 }
2d21ac55 4360 result = pmap_nest(map->pmap,
3e170ce0 4361 (VME_SUBMAP(entry))->pmap,
2d21ac55
A
4362 (addr64_t)start,
4363 (addr64_t)start,
4364 (uint64_t)(end - start));
4365 if(result)
4366 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4367 entry->use_pmap = TRUE;
4368 }
0c530ab8 4369#else /* NO_NESTED_PMAP */
2d21ac55 4370 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 4371#endif /* NO_NESTED_PMAP */
2d21ac55 4372 result = KERN_SUCCESS;
1c79356b
A
4373 }
4374 vm_map_unlock(map);
4375
4376 return(result);
4377}
4378
4379/*
4380 * vm_map_protect:
4381 *
4382 * Sets the protection of the specified address
4383 * region in the target map. If "set_max" is
4384 * specified, the maximum protection is to be set;
4385 * otherwise, only the current protection is affected.
4386 */
4387kern_return_t
4388vm_map_protect(
4389 register vm_map_t map,
91447636
A
4390 register vm_map_offset_t start,
4391 register vm_map_offset_t end,
1c79356b
A
4392 register vm_prot_t new_prot,
4393 register boolean_t set_max)
4394{
4395 register vm_map_entry_t current;
2d21ac55 4396 register vm_map_offset_t prev;
1c79356b
A
4397 vm_map_entry_t entry;
4398 vm_prot_t new_max;
1c79356b
A
4399
4400 XPR(XPR_VM_MAP,
2d21ac55 4401 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 4402 map, start, end, new_prot, set_max);
1c79356b
A
4403
4404 vm_map_lock(map);
4405
91447636
A
4406 /* LP64todo - remove this check when vm_map_commpage64()
4407 * no longer has to stuff in a map_entry for the commpage
4408 * above the map's max_offset.
4409 */
4410 if (start >= map->max_offset) {
4411 vm_map_unlock(map);
4412 return(KERN_INVALID_ADDRESS);
4413 }
4414
b0d623f7
A
4415 while(1) {
4416 /*
4417 * Lookup the entry. If it doesn't start in a valid
4418 * entry, return an error.
4419 */
4420 if (! vm_map_lookup_entry(map, start, &entry)) {
4421 vm_map_unlock(map);
4422 return(KERN_INVALID_ADDRESS);
4423 }
4424
4425 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4426 start = SUPERPAGE_ROUND_DOWN(start);
4427 continue;
4428 }
4429 break;
4430 }
4431 if (entry->superpage_size)
4432 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
4433
4434 /*
4435 * Make a first pass to check for protection and address
4436 * violations.
4437 */
4438
4439 current = entry;
4440 prev = current->vme_start;
4441 while ((current != vm_map_to_entry(map)) &&
4442 (current->vme_start < end)) {
4443
4444 /*
4445 * If there is a hole, return an error.
4446 */
4447 if (current->vme_start != prev) {
4448 vm_map_unlock(map);
4449 return(KERN_INVALID_ADDRESS);
4450 }
4451
4452 new_max = current->max_protection;
4453 if(new_prot & VM_PROT_COPY) {
4454 new_max |= VM_PROT_WRITE;
4455 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4456 vm_map_unlock(map);
4457 return(KERN_PROTECTION_FAILURE);
4458 }
4459 } else {
4460 if ((new_prot & new_max) != new_prot) {
4461 vm_map_unlock(map);
4462 return(KERN_PROTECTION_FAILURE);
4463 }
4464 }
4465
593a1d5f 4466
1c79356b
A
4467 prev = current->vme_end;
4468 current = current->vme_next;
4469 }
4470 if (end > prev) {
4471 vm_map_unlock(map);
4472 return(KERN_INVALID_ADDRESS);
4473 }
4474
4475 /*
4476 * Go back and fix up protections.
4477 * Clip to start here if the range starts within
4478 * the entry.
4479 */
4480
4481 current = entry;
2d21ac55
A
4482 if (current != vm_map_to_entry(map)) {
4483 /* clip and unnest if necessary */
4484 vm_map_clip_start(map, current, start);
1c79356b 4485 }
2d21ac55 4486
1c79356b
A
4487 while ((current != vm_map_to_entry(map)) &&
4488 (current->vme_start < end)) {
4489
4490 vm_prot_t old_prot;
4491
4492 vm_map_clip_end(map, current, end);
4493
fe8ab488
A
4494 if (current->is_sub_map) {
4495 /* clipping did unnest if needed */
4496 assert(!current->use_pmap);
4497 }
2d21ac55 4498
1c79356b
A
4499 old_prot = current->protection;
4500
4501 if(new_prot & VM_PROT_COPY) {
4502 /* caller is asking specifically to copy the */
4503 /* mapped data, this implies that max protection */
4504 /* will include write. Caller must be prepared */
4505 /* for loss of shared memory communication in the */
4506 /* target area after taking this step */
6d2010ae 4507
3e170ce0
A
4508 if (current->is_sub_map == FALSE &&
4509 VME_OBJECT(current) == VM_OBJECT_NULL) {
4510 VME_OBJECT_SET(current,
4511 vm_object_allocate(
4512 (vm_map_size_t)
4513 (current->vme_end -
4514 current->vme_start)));
4515 VME_OFFSET_SET(current, 0);
fe8ab488 4516 assert(current->use_pmap);
6d2010ae 4517 }
3e170ce0 4518 assert(current->wired_count == 0);
1c79356b
A
4519 current->needs_copy = TRUE;
4520 current->max_protection |= VM_PROT_WRITE;
4521 }
4522
4523 if (set_max)
4524 current->protection =
4525 (current->max_protection =
2d21ac55
A
4526 new_prot & ~VM_PROT_COPY) &
4527 old_prot;
1c79356b
A
4528 else
4529 current->protection = new_prot & ~VM_PROT_COPY;
4530
4531 /*
4532 * Update physical map if necessary.
4533 * If the request is to turn off write protection,
4534 * we won't do it for real (in pmap). This is because
4535 * it would cause copy-on-write to fail. We've already
4536 * set, the new protection in the map, so if a
4537 * write-protect fault occurred, it will be fixed up
4538 * properly, COW or not.
4539 */
1c79356b 4540 if (current->protection != old_prot) {
1c79356b
A
4541 /* Look one level in we support nested pmaps */
4542 /* from mapped submaps which are direct entries */
4543 /* in our map */
0c530ab8 4544
2d21ac55 4545 vm_prot_t prot;
0c530ab8 4546
2d21ac55
A
4547 prot = current->protection & ~VM_PROT_WRITE;
4548
3e170ce0 4549 if (override_nx(map, VME_ALIAS(current)) && prot)
0c530ab8 4550 prot |= VM_PROT_EXECUTE;
2d21ac55 4551
0c530ab8 4552 if (current->is_sub_map && current->use_pmap) {
3e170ce0 4553 pmap_protect(VME_SUBMAP(current)->pmap,
2d21ac55
A
4554 current->vme_start,
4555 current->vme_end,
4556 prot);
1c79356b 4557 } else {
2d21ac55
A
4558 pmap_protect(map->pmap,
4559 current->vme_start,
4560 current->vme_end,
4561 prot);
1c79356b 4562 }
1c79356b
A
4563 }
4564 current = current->vme_next;
4565 }
4566
5353443c 4567 current = entry;
91447636
A
4568 while ((current != vm_map_to_entry(map)) &&
4569 (current->vme_start <= end)) {
5353443c
A
4570 vm_map_simplify_entry(map, current);
4571 current = current->vme_next;
4572 }
4573
1c79356b
A
4574 vm_map_unlock(map);
4575 return(KERN_SUCCESS);
4576}
4577
4578/*
4579 * vm_map_inherit:
4580 *
4581 * Sets the inheritance of the specified address
4582 * range in the target map. Inheritance
4583 * affects how the map will be shared with
4584 * child maps at the time of vm_map_fork.
4585 */
4586kern_return_t
4587vm_map_inherit(
4588 register vm_map_t map,
91447636
A
4589 register vm_map_offset_t start,
4590 register vm_map_offset_t end,
1c79356b
A
4591 register vm_inherit_t new_inheritance)
4592{
4593 register vm_map_entry_t entry;
4594 vm_map_entry_t temp_entry;
4595
4596 vm_map_lock(map);
4597
4598 VM_MAP_RANGE_CHECK(map, start, end);
4599
4600 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4601 entry = temp_entry;
1c79356b
A
4602 }
4603 else {
4604 temp_entry = temp_entry->vme_next;
4605 entry = temp_entry;
4606 }
4607
4608 /* first check entire range for submaps which can't support the */
4609 /* given inheritance. */
4610 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4611 if(entry->is_sub_map) {
91447636
A
4612 if(new_inheritance == VM_INHERIT_COPY) {
4613 vm_map_unlock(map);
1c79356b 4614 return(KERN_INVALID_ARGUMENT);
91447636 4615 }
1c79356b
A
4616 }
4617
4618 entry = entry->vme_next;
4619 }
4620
4621 entry = temp_entry;
2d21ac55
A
4622 if (entry != vm_map_to_entry(map)) {
4623 /* clip and unnest if necessary */
4624 vm_map_clip_start(map, entry, start);
4625 }
1c79356b
A
4626
4627 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4628 vm_map_clip_end(map, entry, end);
fe8ab488
A
4629 if (entry->is_sub_map) {
4630 /* clip did unnest if needed */
4631 assert(!entry->use_pmap);
4632 }
1c79356b
A
4633
4634 entry->inheritance = new_inheritance;
4635
4636 entry = entry->vme_next;
4637 }
4638
4639 vm_map_unlock(map);
4640 return(KERN_SUCCESS);
4641}
4642
2d21ac55
A
4643/*
4644 * Update the accounting for the amount of wired memory in this map. If the user has
4645 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4646 */
4647
4648static kern_return_t
4649add_wire_counts(
4650 vm_map_t map,
4651 vm_map_entry_t entry,
4652 boolean_t user_wire)
4653{
4654 vm_map_size_t size;
4655
4656 if (user_wire) {
6d2010ae 4657 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
4658
4659 /*
4660 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4661 * this map entry.
4662 */
4663
4664 if (entry->user_wired_count == 0) {
4665 size = entry->vme_end - entry->vme_start;
4666
4667 /*
4668 * Since this is the first time the user is wiring this map entry, check to see if we're
4669 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4670 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4671 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4672 * limit, then we fail.
4673 */
4674
4675 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
4676 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4677 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
4678 return KERN_RESOURCE_SHORTAGE;
4679
4680 /*
4681 * The first time the user wires an entry, we also increment the wired_count and add this to
4682 * the total that has been wired in the map.
4683 */
4684
4685 if (entry->wired_count >= MAX_WIRE_COUNT)
4686 return KERN_FAILURE;
4687
4688 entry->wired_count++;
4689 map->user_wire_size += size;
4690 }
4691
4692 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4693 return KERN_FAILURE;
4694
4695 entry->user_wired_count++;
4696
4697 } else {
4698
4699 /*
4700 * The kernel's wiring the memory. Just bump the count and continue.
4701 */
4702
4703 if (entry->wired_count >= MAX_WIRE_COUNT)
4704 panic("vm_map_wire: too many wirings");
4705
4706 entry->wired_count++;
4707 }
4708
4709 return KERN_SUCCESS;
4710}
4711
4712/*
4713 * Update the memory wiring accounting now that the given map entry is being unwired.
4714 */
4715
4716static void
4717subtract_wire_counts(
4718 vm_map_t map,
4719 vm_map_entry_t entry,
4720 boolean_t user_wire)
4721{
4722
4723 if (user_wire) {
4724
4725 /*
4726 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4727 */
4728
4729 if (entry->user_wired_count == 1) {
4730
4731 /*
4732 * We're removing the last user wire reference. Decrement the wired_count and the total
4733 * user wired memory for this map.
4734 */
4735
4736 assert(entry->wired_count >= 1);
4737 entry->wired_count--;
4738 map->user_wire_size -= entry->vme_end - entry->vme_start;
4739 }
4740
4741 assert(entry->user_wired_count >= 1);
4742 entry->user_wired_count--;
4743
4744 } else {
4745
4746 /*
4747 * The kernel is unwiring the memory. Just update the count.
4748 */
4749
4750 assert(entry->wired_count >= 1);
4751 entry->wired_count--;
4752 }
4753}
4754
1c79356b
A
4755/*
4756 * vm_map_wire:
4757 *
4758 * Sets the pageability of the specified address range in the
4759 * target map as wired. Regions specified as not pageable require
4760 * locked-down physical memory and physical page maps. The
4761 * access_type variable indicates types of accesses that must not
4762 * generate page faults. This is checked against protection of
4763 * memory being locked-down.
4764 *
4765 * The map must not be locked, but a reference must remain to the
4766 * map throughout the call.
4767 */
91447636 4768static kern_return_t
1c79356b
A
4769vm_map_wire_nested(
4770 register vm_map_t map,
91447636
A
4771 register vm_map_offset_t start,
4772 register vm_map_offset_t end,
3e170ce0 4773 register vm_prot_t caller_prot,
1c79356b 4774 boolean_t user_wire,
9bccf70c 4775 pmap_t map_pmap,
fe8ab488
A
4776 vm_map_offset_t pmap_addr,
4777 ppnum_t *physpage_p)
1c79356b
A
4778{
4779 register vm_map_entry_t entry;
3e170ce0 4780 register vm_prot_t access_type;
1c79356b 4781 struct vm_map_entry *first_entry, tmp_entry;
91447636
A
4782 vm_map_t real_map;
4783 register vm_map_offset_t s,e;
1c79356b
A
4784 kern_return_t rc;
4785 boolean_t need_wakeup;
4786 boolean_t main_map = FALSE;
9bccf70c 4787 wait_interrupt_t interruptible_state;
0b4e3aa0 4788 thread_t cur_thread;
1c79356b 4789 unsigned int last_timestamp;
91447636 4790 vm_map_size_t size;
fe8ab488
A
4791 boolean_t wire_and_extract;
4792
3e170ce0
A
4793 access_type = (caller_prot & VM_PROT_ALL);
4794
fe8ab488
A
4795 wire_and_extract = FALSE;
4796 if (physpage_p != NULL) {
4797 /*
4798 * The caller wants the physical page number of the
4799 * wired page. We return only one physical page number
4800 * so this works for only one page at a time.
4801 */
4802 if ((end - start) != PAGE_SIZE) {
4803 return KERN_INVALID_ARGUMENT;
4804 }
4805 wire_and_extract = TRUE;
4806 *physpage_p = 0;
4807 }
1c79356b
A
4808
4809 vm_map_lock(map);
4810 if(map_pmap == NULL)
4811 main_map = TRUE;
4812 last_timestamp = map->timestamp;
4813
4814 VM_MAP_RANGE_CHECK(map, start, end);
4815 assert(page_aligned(start));
4816 assert(page_aligned(end));
39236c6e
A
4817 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4818 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
4819 if (start == end) {
4820 /* We wired what the caller asked for, zero pages */
4821 vm_map_unlock(map);
4822 return KERN_SUCCESS;
4823 }
1c79356b 4824
2d21ac55
A
4825 need_wakeup = FALSE;
4826 cur_thread = current_thread();
4827
4828 s = start;
4829 rc = KERN_SUCCESS;
4830
4831 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 4832 entry = first_entry;
2d21ac55
A
4833 /*
4834 * vm_map_clip_start will be done later.
4835 * We don't want to unnest any nested submaps here !
4836 */
1c79356b
A
4837 } else {
4838 /* Start address is not in map */
2d21ac55
A
4839 rc = KERN_INVALID_ADDRESS;
4840 goto done;
1c79356b
A
4841 }
4842
2d21ac55
A
4843 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4844 /*
4845 * At this point, we have wired from "start" to "s".
4846 * We still need to wire from "s" to "end".
4847 *
4848 * "entry" hasn't been clipped, so it could start before "s"
4849 * and/or end after "end".
4850 */
4851
4852 /* "e" is how far we want to wire in this entry */
4853 e = entry->vme_end;
4854 if (e > end)
4855 e = end;
4856
1c79356b
A
4857 /*
4858 * If another thread is wiring/unwiring this entry then
4859 * block after informing other thread to wake us up.
4860 */
4861 if (entry->in_transition) {
9bccf70c
A
4862 wait_result_t wait_result;
4863
1c79356b
A
4864 /*
4865 * We have not clipped the entry. Make sure that
4866 * the start address is in range so that the lookup
4867 * below will succeed.
2d21ac55
A
4868 * "s" is the current starting point: we've already
4869 * wired from "start" to "s" and we still have
4870 * to wire from "s" to "end".
1c79356b 4871 */
1c79356b
A
4872
4873 entry->needs_wakeup = TRUE;
4874
4875 /*
4876 * wake up anybody waiting on entries that we have
4877 * already wired.
4878 */
4879 if (need_wakeup) {
4880 vm_map_entry_wakeup(map);
4881 need_wakeup = FALSE;
4882 }
4883 /*
4884 * User wiring is interruptible
4885 */
9bccf70c 4886 wait_result = vm_map_entry_wait(map,
2d21ac55
A
4887 (user_wire) ? THREAD_ABORTSAFE :
4888 THREAD_UNINT);
9bccf70c 4889 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
4890 /*
4891 * undo the wirings we have done so far
4892 * We do not clear the needs_wakeup flag,
4893 * because we cannot tell if we were the
4894 * only one waiting.
4895 */
2d21ac55
A
4896 rc = KERN_FAILURE;
4897 goto done;
1c79356b
A
4898 }
4899
1c79356b
A
4900 /*
4901 * Cannot avoid a lookup here. reset timestamp.
4902 */
4903 last_timestamp = map->timestamp;
4904
4905 /*
4906 * The entry could have been clipped, look it up again.
4907 * Worse that can happen is, it may not exist anymore.
4908 */
4909 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
4910 /*
4911 * User: undo everything upto the previous
4912 * entry. let vm_map_unwire worry about
4913 * checking the validity of the range.
4914 */
2d21ac55
A
4915 rc = KERN_FAILURE;
4916 goto done;
1c79356b
A
4917 }
4918 entry = first_entry;
4919 continue;
4920 }
2d21ac55
A
4921
4922 if (entry->is_sub_map) {
91447636
A
4923 vm_map_offset_t sub_start;
4924 vm_map_offset_t sub_end;
4925 vm_map_offset_t local_start;
4926 vm_map_offset_t local_end;
1c79356b 4927 pmap_t pmap;
2d21ac55 4928
fe8ab488
A
4929 if (wire_and_extract) {
4930 /*
4931 * Wiring would result in copy-on-write
4932 * which would not be compatible with
4933 * the sharing we have with the original
4934 * provider of this memory.
4935 */
4936 rc = KERN_INVALID_ARGUMENT;
4937 goto done;
4938 }
4939
2d21ac55 4940 vm_map_clip_start(map, entry, s);
1c79356b
A
4941 vm_map_clip_end(map, entry, end);
4942
3e170ce0 4943 sub_start = VME_OFFSET(entry);
2d21ac55 4944 sub_end = entry->vme_end;
3e170ce0 4945 sub_end += VME_OFFSET(entry) - entry->vme_start;
2d21ac55 4946
1c79356b
A
4947 local_end = entry->vme_end;
4948 if(map_pmap == NULL) {
2d21ac55
A
4949 vm_object_t object;
4950 vm_object_offset_t offset;
4951 vm_prot_t prot;
4952 boolean_t wired;
4953 vm_map_entry_t local_entry;
4954 vm_map_version_t version;
4955 vm_map_t lookup_map;
4956
1c79356b 4957 if(entry->use_pmap) {
3e170ce0 4958 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
4959 /* ppc implementation requires that */
4960 /* submaps pmap address ranges line */
4961 /* up with parent map */
4962#ifdef notdef
4963 pmap_addr = sub_start;
4964#endif
2d21ac55 4965 pmap_addr = s;
1c79356b
A
4966 } else {
4967 pmap = map->pmap;
2d21ac55 4968 pmap_addr = s;
1c79356b 4969 }
2d21ac55 4970
1c79356b 4971 if (entry->wired_count) {
2d21ac55
A
4972 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4973 goto done;
4974
4975 /*
4976 * The map was not unlocked:
4977 * no need to goto re-lookup.
4978 * Just go directly to next entry.
4979 */
1c79356b 4980 entry = entry->vme_next;
2d21ac55 4981 s = entry->vme_start;
1c79356b
A
4982 continue;
4983
2d21ac55 4984 }
9bccf70c 4985
2d21ac55
A
4986 /* call vm_map_lookup_locked to */
4987 /* cause any needs copy to be */
4988 /* evaluated */
4989 local_start = entry->vme_start;
4990 lookup_map = map;
4991 vm_map_lock_write_to_read(map);
4992 if(vm_map_lookup_locked(
4993 &lookup_map, local_start,
4994 access_type,
4995 OBJECT_LOCK_EXCLUSIVE,
4996 &version, &object,
4997 &offset, &prot, &wired,
4998 NULL,
4999 &real_map)) {
1c79356b 5000
2d21ac55 5001 vm_map_unlock_read(lookup_map);
4bd07ac2 5002 assert(map_pmap == NULL);
2d21ac55
A
5003 vm_map_unwire(map, start,
5004 s, user_wire);
5005 return(KERN_FAILURE);
5006 }
316670eb 5007 vm_object_unlock(object);
2d21ac55
A
5008 if(real_map != lookup_map)
5009 vm_map_unlock(real_map);
5010 vm_map_unlock_read(lookup_map);
5011 vm_map_lock(map);
1c79356b 5012
2d21ac55
A
5013 /* we unlocked, so must re-lookup */
5014 if (!vm_map_lookup_entry(map,
5015 local_start,
5016 &local_entry)) {
5017 rc = KERN_FAILURE;
5018 goto done;
5019 }
5020
5021 /*
5022 * entry could have been "simplified",
5023 * so re-clip
5024 */
5025 entry = local_entry;
5026 assert(s == local_start);
5027 vm_map_clip_start(map, entry, s);
5028 vm_map_clip_end(map, entry, end);
5029 /* re-compute "e" */
5030 e = entry->vme_end;
5031 if (e > end)
5032 e = end;
5033
5034 /* did we have a change of type? */
5035 if (!entry->is_sub_map) {
5036 last_timestamp = map->timestamp;
5037 continue;
1c79356b
A
5038 }
5039 } else {
9bccf70c 5040 local_start = entry->vme_start;
2d21ac55
A
5041 pmap = map_pmap;
5042 }
5043
5044 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5045 goto done;
5046
5047 entry->in_transition = TRUE;
5048
5049 vm_map_unlock(map);
3e170ce0 5050 rc = vm_map_wire_nested(VME_SUBMAP(entry),
1c79356b 5051 sub_start, sub_end,
3e170ce0 5052 caller_prot,
fe8ab488
A
5053 user_wire, pmap, pmap_addr,
5054 NULL);
2d21ac55 5055 vm_map_lock(map);
9bccf70c 5056
1c79356b
A
5057 /*
5058 * Find the entry again. It could have been clipped
5059 * after we unlocked the map.
5060 */
9bccf70c
A
5061 if (!vm_map_lookup_entry(map, local_start,
5062 &first_entry))
5063 panic("vm_map_wire: re-lookup failed");
5064 entry = first_entry;
1c79356b 5065
2d21ac55
A
5066 assert(local_start == s);
5067 /* re-compute "e" */
5068 e = entry->vme_end;
5069 if (e > end)
5070 e = end;
5071
1c79356b
A
5072 last_timestamp = map->timestamp;
5073 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 5074 (entry->vme_start < e)) {
1c79356b
A
5075 assert(entry->in_transition);
5076 entry->in_transition = FALSE;
5077 if (entry->needs_wakeup) {
5078 entry->needs_wakeup = FALSE;
5079 need_wakeup = TRUE;
5080 }
5081 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 5082 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5083 }
5084 entry = entry->vme_next;
5085 }
5086 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5087 goto done;
1c79356b 5088 }
2d21ac55
A
5089
5090 /* no need to relookup again */
5091 s = entry->vme_start;
1c79356b
A
5092 continue;
5093 }
5094
5095 /*
5096 * If this entry is already wired then increment
5097 * the appropriate wire reference count.
5098 */
9bccf70c 5099 if (entry->wired_count) {
fe8ab488
A
5100
5101 if ((entry->protection & access_type) != access_type) {
5102 /* found a protection problem */
5103
5104 /*
5105 * XXX FBDP
5106 * We should always return an error
5107 * in this case but since we didn't
5108 * enforce it before, let's do
5109 * it only for the new "wire_and_extract"
5110 * code path for now...
5111 */
5112 if (wire_and_extract) {
5113 rc = KERN_PROTECTION_FAILURE;
5114 goto done;
5115 }
5116 }
5117
1c79356b
A
5118 /*
5119 * entry is already wired down, get our reference
5120 * after clipping to our range.
5121 */
2d21ac55 5122 vm_map_clip_start(map, entry, s);
1c79356b 5123 vm_map_clip_end(map, entry, end);
1c79356b 5124
2d21ac55
A
5125 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5126 goto done;
5127
fe8ab488
A
5128 if (wire_and_extract) {
5129 vm_object_t object;
5130 vm_object_offset_t offset;
5131 vm_page_t m;
5132
5133 /*
5134 * We don't have to "wire" the page again
5135 * bit we still have to "extract" its
5136 * physical page number, after some sanity
5137 * checks.
5138 */
5139 assert((entry->vme_end - entry->vme_start)
5140 == PAGE_SIZE);
5141 assert(!entry->needs_copy);
5142 assert(!entry->is_sub_map);
3e170ce0 5143 assert(VME_OBJECT(entry));
fe8ab488
A
5144 if (((entry->vme_end - entry->vme_start)
5145 != PAGE_SIZE) ||
5146 entry->needs_copy ||
5147 entry->is_sub_map ||
3e170ce0 5148 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5149 rc = KERN_INVALID_ARGUMENT;
5150 goto done;
5151 }
5152
3e170ce0
A
5153 object = VME_OBJECT(entry);
5154 offset = VME_OFFSET(entry);
fe8ab488
A
5155 /* need exclusive lock to update m->dirty */
5156 if (entry->protection & VM_PROT_WRITE) {
5157 vm_object_lock(object);
5158 } else {
5159 vm_object_lock_shared(object);
5160 }
5161 m = vm_page_lookup(object, offset);
5162 assert(m != VM_PAGE_NULL);
5163 assert(m->wire_count);
5164 if (m != VM_PAGE_NULL && m->wire_count) {
5165 *physpage_p = m->phys_page;
5166 if (entry->protection & VM_PROT_WRITE) {
5167 vm_object_lock_assert_exclusive(
5168 m->object);
5169 m->dirty = TRUE;
5170 }
5171 } else {
5172 /* not already wired !? */
5173 *physpage_p = 0;
5174 }
5175 vm_object_unlock(object);
5176 }
5177
2d21ac55 5178 /* map was not unlocked: no need to relookup */
1c79356b 5179 entry = entry->vme_next;
2d21ac55 5180 s = entry->vme_start;
1c79356b
A
5181 continue;
5182 }
5183
5184 /*
5185 * Unwired entry or wire request transmitted via submap
5186 */
5187
5188
5189 /*
5190 * Perform actions of vm_map_lookup that need the write
5191 * lock on the map: create a shadow object for a
5192 * copy-on-write region, or an object for a zero-fill
5193 * region.
5194 */
5195 size = entry->vme_end - entry->vme_start;
5196 /*
5197 * If wiring a copy-on-write page, we need to copy it now
5198 * even if we're only (currently) requesting read access.
5199 * This is aggressive, but once it's wired we can't move it.
5200 */
5201 if (entry->needs_copy) {
fe8ab488
A
5202 if (wire_and_extract) {
5203 /*
5204 * We're supposed to share with the original
5205 * provider so should not be "needs_copy"
5206 */
5207 rc = KERN_INVALID_ARGUMENT;
5208 goto done;
5209 }
3e170ce0
A
5210
5211 VME_OBJECT_SHADOW(entry, size);
1c79356b 5212 entry->needs_copy = FALSE;
3e170ce0 5213 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5214 if (wire_and_extract) {
5215 /*
5216 * We're supposed to share with the original
5217 * provider so should already have an object.
5218 */
5219 rc = KERN_INVALID_ARGUMENT;
5220 goto done;
5221 }
3e170ce0
A
5222 VME_OBJECT_SET(entry, vm_object_allocate(size));
5223 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 5224 assert(entry->use_pmap);
1c79356b
A
5225 }
5226
2d21ac55 5227 vm_map_clip_start(map, entry, s);
1c79356b
A
5228 vm_map_clip_end(map, entry, end);
5229
2d21ac55 5230 /* re-compute "e" */
1c79356b 5231 e = entry->vme_end;
2d21ac55
A
5232 if (e > end)
5233 e = end;
1c79356b
A
5234
5235 /*
5236 * Check for holes and protection mismatch.
5237 * Holes: Next entry should be contiguous unless this
5238 * is the end of the region.
5239 * Protection: Access requested must be allowed, unless
5240 * wiring is by protection class
5241 */
2d21ac55
A
5242 if ((entry->vme_end < end) &&
5243 ((entry->vme_next == vm_map_to_entry(map)) ||
5244 (entry->vme_next->vme_start > entry->vme_end))) {
5245 /* found a hole */
5246 rc = KERN_INVALID_ADDRESS;
5247 goto done;
5248 }
5249 if ((entry->protection & access_type) != access_type) {
5250 /* found a protection problem */
5251 rc = KERN_PROTECTION_FAILURE;
5252 goto done;
1c79356b
A
5253 }
5254
5255 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5256
2d21ac55
A
5257 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5258 goto done;
1c79356b
A
5259
5260 entry->in_transition = TRUE;
5261
5262 /*
5263 * This entry might get split once we unlock the map.
5264 * In vm_fault_wire(), we need the current range as
5265 * defined by this entry. In order for this to work
5266 * along with a simultaneous clip operation, we make a
5267 * temporary copy of this entry and use that for the
5268 * wiring. Note that the underlying objects do not
5269 * change during a clip.
5270 */
5271 tmp_entry = *entry;
5272
5273 /*
5274 * The in_transition state guarentees that the entry
5275 * (or entries for this range, if split occured) will be
5276 * there when the map lock is acquired for the second time.
5277 */
5278 vm_map_unlock(map);
0b4e3aa0 5279
9bccf70c
A
5280 if (!user_wire && cur_thread != THREAD_NULL)
5281 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
5282 else
5283 interruptible_state = THREAD_UNINT;
9bccf70c 5284
1c79356b 5285 if(map_pmap)
9bccf70c 5286 rc = vm_fault_wire(map,
3e170ce0 5287 &tmp_entry, caller_prot, map_pmap, pmap_addr,
fe8ab488 5288 physpage_p);
1c79356b 5289 else
9bccf70c 5290 rc = vm_fault_wire(map,
3e170ce0 5291 &tmp_entry, caller_prot, map->pmap,
fe8ab488
A
5292 tmp_entry.vme_start,
5293 physpage_p);
0b4e3aa0
A
5294
5295 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 5296 thread_interrupt_level(interruptible_state);
0b4e3aa0 5297
1c79356b
A
5298 vm_map_lock(map);
5299
5300 if (last_timestamp+1 != map->timestamp) {
5301 /*
5302 * Find the entry again. It could have been clipped
5303 * after we unlocked the map.
5304 */
5305 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5306 &first_entry))
1c79356b
A
5307 panic("vm_map_wire: re-lookup failed");
5308
5309 entry = first_entry;
5310 }
5311
5312 last_timestamp = map->timestamp;
5313
5314 while ((entry != vm_map_to_entry(map)) &&
5315 (entry->vme_start < tmp_entry.vme_end)) {
5316 assert(entry->in_transition);
5317 entry->in_transition = FALSE;
5318 if (entry->needs_wakeup) {
5319 entry->needs_wakeup = FALSE;
5320 need_wakeup = TRUE;
5321 }
5322 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5323 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5324 }
5325 entry = entry->vme_next;
5326 }
5327
5328 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5329 goto done;
1c79356b 5330 }
2d21ac55
A
5331
5332 s = entry->vme_start;
1c79356b 5333 } /* end while loop through map entries */
2d21ac55
A
5334
5335done:
5336 if (rc == KERN_SUCCESS) {
5337 /* repair any damage we may have made to the VM map */
5338 vm_map_simplify_range(map, start, end);
5339 }
5340
1c79356b
A
5341 vm_map_unlock(map);
5342
5343 /*
5344 * wake up anybody waiting on entries we wired.
5345 */
5346 if (need_wakeup)
5347 vm_map_entry_wakeup(map);
5348
2d21ac55
A
5349 if (rc != KERN_SUCCESS) {
5350 /* undo what has been wired so far */
4bd07ac2
A
5351 vm_map_unwire_nested(map, start, s, user_wire,
5352 map_pmap, pmap_addr);
fe8ab488
A
5353 if (physpage_p) {
5354 *physpage_p = 0;
5355 }
2d21ac55
A
5356 }
5357
5358 return rc;
1c79356b
A
5359
5360}
5361
5362kern_return_t
3e170ce0 5363vm_map_wire_external(
1c79356b 5364 register vm_map_t map,
91447636
A
5365 register vm_map_offset_t start,
5366 register vm_map_offset_t end,
3e170ce0 5367 register vm_prot_t caller_prot,
1c79356b
A
5368 boolean_t user_wire)
5369{
3e170ce0
A
5370 kern_return_t kret;
5371
5372 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5373 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5374 kret = vm_map_wire_nested(map, start, end, caller_prot,
5375 user_wire, (pmap_t)NULL, 0, NULL);
5376 return kret;
5377}
1c79356b 5378
3e170ce0
A
5379kern_return_t
5380vm_map_wire(
5381 register vm_map_t map,
5382 register vm_map_offset_t start,
5383 register vm_map_offset_t end,
5384 register vm_prot_t caller_prot,
5385 boolean_t user_wire)
5386{
1c79356b
A
5387 kern_return_t kret;
5388
3e170ce0 5389 kret = vm_map_wire_nested(map, start, end, caller_prot,
fe8ab488
A
5390 user_wire, (pmap_t)NULL, 0, NULL);
5391 return kret;
5392}
5393
5394kern_return_t
3e170ce0 5395vm_map_wire_and_extract_external(
fe8ab488
A
5396 vm_map_t map,
5397 vm_map_offset_t start,
3e170ce0 5398 vm_prot_t caller_prot,
fe8ab488
A
5399 boolean_t user_wire,
5400 ppnum_t *physpage_p)
5401{
3e170ce0
A
5402 kern_return_t kret;
5403
5404 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5405 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5406 kret = vm_map_wire_nested(map,
5407 start,
5408 start+VM_MAP_PAGE_SIZE(map),
5409 caller_prot,
5410 user_wire,
5411 (pmap_t)NULL,
5412 0,
5413 physpage_p);
5414 if (kret != KERN_SUCCESS &&
5415 physpage_p != NULL) {
5416 *physpage_p = 0;
5417 }
5418 return kret;
5419}
fe8ab488 5420
3e170ce0
A
5421kern_return_t
5422vm_map_wire_and_extract(
5423 vm_map_t map,
5424 vm_map_offset_t start,
5425 vm_prot_t caller_prot,
5426 boolean_t user_wire,
5427 ppnum_t *physpage_p)
5428{
fe8ab488
A
5429 kern_return_t kret;
5430
5431 kret = vm_map_wire_nested(map,
5432 start,
5433 start+VM_MAP_PAGE_SIZE(map),
3e170ce0 5434 caller_prot,
fe8ab488
A
5435 user_wire,
5436 (pmap_t)NULL,
5437 0,
5438 physpage_p);
5439 if (kret != KERN_SUCCESS &&
5440 physpage_p != NULL) {
5441 *physpage_p = 0;
5442 }
1c79356b
A
5443 return kret;
5444}
5445
5446/*
5447 * vm_map_unwire:
5448 *
5449 * Sets the pageability of the specified address range in the target
5450 * as pageable. Regions specified must have been wired previously.
5451 *
5452 * The map must not be locked, but a reference must remain to the map
5453 * throughout the call.
5454 *
5455 * Kernel will panic on failures. User unwire ignores holes and
5456 * unwired and intransition entries to avoid losing memory by leaving
5457 * it unwired.
5458 */
91447636 5459static kern_return_t
1c79356b
A
5460vm_map_unwire_nested(
5461 register vm_map_t map,
91447636
A
5462 register vm_map_offset_t start,
5463 register vm_map_offset_t end,
1c79356b 5464 boolean_t user_wire,
9bccf70c 5465 pmap_t map_pmap,
91447636 5466 vm_map_offset_t pmap_addr)
1c79356b
A
5467{
5468 register vm_map_entry_t entry;
5469 struct vm_map_entry *first_entry, tmp_entry;
5470 boolean_t need_wakeup;
5471 boolean_t main_map = FALSE;
5472 unsigned int last_timestamp;
5473
5474 vm_map_lock(map);
5475 if(map_pmap == NULL)
5476 main_map = TRUE;
5477 last_timestamp = map->timestamp;
5478
5479 VM_MAP_RANGE_CHECK(map, start, end);
5480 assert(page_aligned(start));
5481 assert(page_aligned(end));
39236c6e
A
5482 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5483 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 5484
2d21ac55
A
5485 if (start == end) {
5486 /* We unwired what the caller asked for: zero pages */
5487 vm_map_unlock(map);
5488 return KERN_SUCCESS;
5489 }
5490
1c79356b
A
5491 if (vm_map_lookup_entry(map, start, &first_entry)) {
5492 entry = first_entry;
2d21ac55
A
5493 /*
5494 * vm_map_clip_start will be done later.
5495 * We don't want to unnest any nested sub maps here !
5496 */
1c79356b
A
5497 }
5498 else {
2d21ac55
A
5499 if (!user_wire) {
5500 panic("vm_map_unwire: start not found");
5501 }
1c79356b
A
5502 /* Start address is not in map. */
5503 vm_map_unlock(map);
5504 return(KERN_INVALID_ADDRESS);
5505 }
5506
b0d623f7
A
5507 if (entry->superpage_size) {
5508 /* superpages are always wired */
5509 vm_map_unlock(map);
5510 return KERN_INVALID_ADDRESS;
5511 }
5512
1c79356b
A
5513 need_wakeup = FALSE;
5514 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5515 if (entry->in_transition) {
5516 /*
5517 * 1)
5518 * Another thread is wiring down this entry. Note
5519 * that if it is not for the other thread we would
5520 * be unwiring an unwired entry. This is not
5521 * permitted. If we wait, we will be unwiring memory
5522 * we did not wire.
5523 *
5524 * 2)
5525 * Another thread is unwiring this entry. We did not
5526 * have a reference to it, because if we did, this
5527 * entry will not be getting unwired now.
5528 */
2d21ac55
A
5529 if (!user_wire) {
5530 /*
5531 * XXX FBDP
5532 * This could happen: there could be some
5533 * overlapping vslock/vsunlock operations
5534 * going on.
5535 * We should probably just wait and retry,
5536 * but then we have to be careful that this
5537 * entry could get "simplified" after
5538 * "in_transition" gets unset and before
5539 * we re-lookup the entry, so we would
5540 * have to re-clip the entry to avoid
5541 * re-unwiring what we have already unwired...
5542 * See vm_map_wire_nested().
5543 *
5544 * Or we could just ignore "in_transition"
5545 * here and proceed to decement the wired
5546 * count(s) on this entry. That should be fine
5547 * as long as "wired_count" doesn't drop all
5548 * the way to 0 (and we should panic if THAT
5549 * happens).
5550 */
1c79356b 5551 panic("vm_map_unwire: in_transition entry");
2d21ac55 5552 }
1c79356b
A
5553
5554 entry = entry->vme_next;
5555 continue;
5556 }
5557
2d21ac55 5558 if (entry->is_sub_map) {
91447636
A
5559 vm_map_offset_t sub_start;
5560 vm_map_offset_t sub_end;
5561 vm_map_offset_t local_end;
1c79356b 5562 pmap_t pmap;
2d21ac55 5563
1c79356b
A
5564 vm_map_clip_start(map, entry, start);
5565 vm_map_clip_end(map, entry, end);
5566
3e170ce0 5567 sub_start = VME_OFFSET(entry);
1c79356b 5568 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 5569 sub_end += VME_OFFSET(entry);
1c79356b
A
5570 local_end = entry->vme_end;
5571 if(map_pmap == NULL) {
2d21ac55 5572 if(entry->use_pmap) {
3e170ce0 5573 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 5574 pmap_addr = sub_start;
2d21ac55 5575 } else {
1c79356b 5576 pmap = map->pmap;
9bccf70c 5577 pmap_addr = start;
2d21ac55
A
5578 }
5579 if (entry->wired_count == 0 ||
5580 (user_wire && entry->user_wired_count == 0)) {
5581 if (!user_wire)
5582 panic("vm_map_unwire: entry is unwired");
5583 entry = entry->vme_next;
5584 continue;
5585 }
5586
5587 /*
5588 * Check for holes
5589 * Holes: Next entry should be contiguous unless
5590 * this is the end of the region.
5591 */
5592 if (((entry->vme_end < end) &&
5593 ((entry->vme_next == vm_map_to_entry(map)) ||
5594 (entry->vme_next->vme_start
5595 > entry->vme_end)))) {
5596 if (!user_wire)
5597 panic("vm_map_unwire: non-contiguous region");
1c79356b 5598/*
2d21ac55
A
5599 entry = entry->vme_next;
5600 continue;
1c79356b 5601*/
2d21ac55 5602 }
1c79356b 5603
2d21ac55 5604 subtract_wire_counts(map, entry, user_wire);
1c79356b 5605
2d21ac55
A
5606 if (entry->wired_count != 0) {
5607 entry = entry->vme_next;
5608 continue;
5609 }
1c79356b 5610
2d21ac55
A
5611 entry->in_transition = TRUE;
5612 tmp_entry = *entry;/* see comment in vm_map_wire() */
5613
5614 /*
5615 * We can unlock the map now. The in_transition state
5616 * guarantees existance of the entry.
5617 */
5618 vm_map_unlock(map);
3e170ce0 5619 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5620 sub_start, sub_end, user_wire, pmap, pmap_addr);
5621 vm_map_lock(map);
1c79356b 5622
2d21ac55
A
5623 if (last_timestamp+1 != map->timestamp) {
5624 /*
5625 * Find the entry again. It could have been
5626 * clipped or deleted after we unlocked the map.
5627 */
5628 if (!vm_map_lookup_entry(map,
5629 tmp_entry.vme_start,
5630 &first_entry)) {
5631 if (!user_wire)
5632 panic("vm_map_unwire: re-lookup failed");
5633 entry = first_entry->vme_next;
5634 } else
5635 entry = first_entry;
5636 }
5637 last_timestamp = map->timestamp;
1c79356b 5638
1c79356b 5639 /*
2d21ac55
A
5640 * clear transition bit for all constituent entries
5641 * that were in the original entry (saved in
5642 * tmp_entry). Also check for waiters.
5643 */
5644 while ((entry != vm_map_to_entry(map)) &&
5645 (entry->vme_start < tmp_entry.vme_end)) {
5646 assert(entry->in_transition);
5647 entry->in_transition = FALSE;
5648 if (entry->needs_wakeup) {
5649 entry->needs_wakeup = FALSE;
5650 need_wakeup = TRUE;
5651 }
5652 entry = entry->vme_next;
1c79356b 5653 }
2d21ac55 5654 continue;
1c79356b 5655 } else {
2d21ac55 5656 vm_map_unlock(map);
3e170ce0 5657 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5658 sub_start, sub_end, user_wire, map_pmap,
5659 pmap_addr);
5660 vm_map_lock(map);
1c79356b 5661
2d21ac55
A
5662 if (last_timestamp+1 != map->timestamp) {
5663 /*
5664 * Find the entry again. It could have been
5665 * clipped or deleted after we unlocked the map.
5666 */
5667 if (!vm_map_lookup_entry(map,
5668 tmp_entry.vme_start,
5669 &first_entry)) {
5670 if (!user_wire)
5671 panic("vm_map_unwire: re-lookup failed");
5672 entry = first_entry->vme_next;
5673 } else
5674 entry = first_entry;
5675 }
5676 last_timestamp = map->timestamp;
1c79356b
A
5677 }
5678 }
5679
5680
9bccf70c 5681 if ((entry->wired_count == 0) ||
2d21ac55 5682 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
5683 if (!user_wire)
5684 panic("vm_map_unwire: entry is unwired");
5685
5686 entry = entry->vme_next;
5687 continue;
5688 }
2d21ac55 5689
1c79356b 5690 assert(entry->wired_count > 0 &&
2d21ac55 5691 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
5692
5693 vm_map_clip_start(map, entry, start);
5694 vm_map_clip_end(map, entry, end);
5695
5696 /*
5697 * Check for holes
5698 * Holes: Next entry should be contiguous unless
5699 * this is the end of the region.
5700 */
5701 if (((entry->vme_end < end) &&
2d21ac55
A
5702 ((entry->vme_next == vm_map_to_entry(map)) ||
5703 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
5704
5705 if (!user_wire)
5706 panic("vm_map_unwire: non-contiguous region");
5707 entry = entry->vme_next;
5708 continue;
5709 }
5710
2d21ac55 5711 subtract_wire_counts(map, entry, user_wire);
1c79356b 5712
9bccf70c 5713 if (entry->wired_count != 0) {
1c79356b
A
5714 entry = entry->vme_next;
5715 continue;
1c79356b
A
5716 }
5717
b0d623f7
A
5718 if(entry->zero_wired_pages) {
5719 entry->zero_wired_pages = FALSE;
5720 }
5721
1c79356b
A
5722 entry->in_transition = TRUE;
5723 tmp_entry = *entry; /* see comment in vm_map_wire() */
5724
5725 /*
5726 * We can unlock the map now. The in_transition state
5727 * guarantees existance of the entry.
5728 */
5729 vm_map_unlock(map);
5730 if(map_pmap) {
9bccf70c 5731 vm_fault_unwire(map,
2d21ac55 5732 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 5733 } else {
9bccf70c 5734 vm_fault_unwire(map,
2d21ac55
A
5735 &tmp_entry, FALSE, map->pmap,
5736 tmp_entry.vme_start);
1c79356b
A
5737 }
5738 vm_map_lock(map);
5739
5740 if (last_timestamp+1 != map->timestamp) {
5741 /*
5742 * Find the entry again. It could have been clipped
5743 * or deleted after we unlocked the map.
5744 */
5745 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5746 &first_entry)) {
1c79356b 5747 if (!user_wire)
2d21ac55 5748 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
5749 entry = first_entry->vme_next;
5750 } else
5751 entry = first_entry;
5752 }
5753 last_timestamp = map->timestamp;
5754
5755 /*
5756 * clear transition bit for all constituent entries that
5757 * were in the original entry (saved in tmp_entry). Also
5758 * check for waiters.
5759 */
5760 while ((entry != vm_map_to_entry(map)) &&
5761 (entry->vme_start < tmp_entry.vme_end)) {
5762 assert(entry->in_transition);
5763 entry->in_transition = FALSE;
5764 if (entry->needs_wakeup) {
5765 entry->needs_wakeup = FALSE;
5766 need_wakeup = TRUE;
5767 }
5768 entry = entry->vme_next;
5769 }
5770 }
91447636
A
5771
5772 /*
5773 * We might have fragmented the address space when we wired this
5774 * range of addresses. Attempt to re-coalesce these VM map entries
5775 * with their neighbors now that they're no longer wired.
5776 * Under some circumstances, address space fragmentation can
5777 * prevent VM object shadow chain collapsing, which can cause
5778 * swap space leaks.
5779 */
5780 vm_map_simplify_range(map, start, end);
5781
1c79356b
A
5782 vm_map_unlock(map);
5783 /*
5784 * wake up anybody waiting on entries that we have unwired.
5785 */
5786 if (need_wakeup)
5787 vm_map_entry_wakeup(map);
5788 return(KERN_SUCCESS);
5789
5790}
5791
5792kern_return_t
5793vm_map_unwire(
5794 register vm_map_t map,
91447636
A
5795 register vm_map_offset_t start,
5796 register vm_map_offset_t end,
1c79356b
A
5797 boolean_t user_wire)
5798{
9bccf70c 5799 return vm_map_unwire_nested(map, start, end,
2d21ac55 5800 user_wire, (pmap_t)NULL, 0);
1c79356b
A
5801}
5802
5803
5804/*
5805 * vm_map_entry_delete: [ internal use only ]
5806 *
5807 * Deallocate the given entry from the target map.
5808 */
91447636 5809static void
1c79356b
A
5810vm_map_entry_delete(
5811 register vm_map_t map,
5812 register vm_map_entry_t entry)
5813{
91447636 5814 register vm_map_offset_t s, e;
1c79356b
A
5815 register vm_object_t object;
5816 register vm_map_t submap;
1c79356b
A
5817
5818 s = entry->vme_start;
5819 e = entry->vme_end;
5820 assert(page_aligned(s));
5821 assert(page_aligned(e));
39236c6e
A
5822 if (entry->map_aligned == TRUE) {
5823 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5824 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5825 }
1c79356b
A
5826 assert(entry->wired_count == 0);
5827 assert(entry->user_wired_count == 0);
b0d623f7 5828 assert(!entry->permanent);
1c79356b
A
5829
5830 if (entry->is_sub_map) {
5831 object = NULL;
3e170ce0 5832 submap = VME_SUBMAP(entry);
1c79356b
A
5833 } else {
5834 submap = NULL;
3e170ce0 5835 object = VME_OBJECT(entry);
1c79356b
A
5836 }
5837
6d2010ae 5838 vm_map_store_entry_unlink(map, entry);
1c79356b
A
5839 map->size -= e - s;
5840
5841 vm_map_entry_dispose(map, entry);
5842
5843 vm_map_unlock(map);
5844 /*
5845 * Deallocate the object only after removing all
5846 * pmap entries pointing to its pages.
5847 */
5848 if (submap)
5849 vm_map_deallocate(submap);
5850 else
2d21ac55 5851 vm_object_deallocate(object);
1c79356b
A
5852
5853}
5854
5855void
5856vm_map_submap_pmap_clean(
5857 vm_map_t map,
91447636
A
5858 vm_map_offset_t start,
5859 vm_map_offset_t end,
1c79356b 5860 vm_map_t sub_map,
91447636 5861 vm_map_offset_t offset)
1c79356b 5862{
91447636
A
5863 vm_map_offset_t submap_start;
5864 vm_map_offset_t submap_end;
5865 vm_map_size_t remove_size;
1c79356b
A
5866 vm_map_entry_t entry;
5867
5868 submap_end = offset + (end - start);
5869 submap_start = offset;
b7266188
A
5870
5871 vm_map_lock_read(sub_map);
1c79356b 5872 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
2d21ac55 5873
1c79356b
A
5874 remove_size = (entry->vme_end - entry->vme_start);
5875 if(offset > entry->vme_start)
5876 remove_size -= offset - entry->vme_start;
2d21ac55 5877
1c79356b
A
5878
5879 if(submap_end < entry->vme_end) {
5880 remove_size -=
5881 entry->vme_end - submap_end;
5882 }
5883 if(entry->is_sub_map) {
5884 vm_map_submap_pmap_clean(
5885 sub_map,
5886 start,
5887 start + remove_size,
3e170ce0
A
5888 VME_SUBMAP(entry),
5889 VME_OFFSET(entry));
1c79356b 5890 } else {
9bccf70c 5891
316670eb 5892 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
5893 && (VME_OBJECT(entry) != NULL)) {
5894 vm_object_pmap_protect_options(
5895 VME_OBJECT(entry),
5896 (VME_OFFSET(entry) +
5897 offset -
5898 entry->vme_start),
9bccf70c
A
5899 remove_size,
5900 PMAP_NULL,
5901 entry->vme_start,
3e170ce0
A
5902 VM_PROT_NONE,
5903 PMAP_OPTIONS_REMOVE);
9bccf70c
A
5904 } else {
5905 pmap_remove(map->pmap,
2d21ac55
A
5906 (addr64_t)start,
5907 (addr64_t)(start + remove_size));
9bccf70c 5908 }
1c79356b
A
5909 }
5910 }
5911
5912 entry = entry->vme_next;
2d21ac55 5913
1c79356b 5914 while((entry != vm_map_to_entry(sub_map))
2d21ac55 5915 && (entry->vme_start < submap_end)) {
1c79356b
A
5916 remove_size = (entry->vme_end - entry->vme_start);
5917 if(submap_end < entry->vme_end) {
5918 remove_size -= entry->vme_end - submap_end;
5919 }
5920 if(entry->is_sub_map) {
5921 vm_map_submap_pmap_clean(
5922 sub_map,
5923 (start + entry->vme_start) - offset,
5924 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
5925 VME_SUBMAP(entry),
5926 VME_OFFSET(entry));
1c79356b 5927 } else {
316670eb 5928 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
5929 && (VME_OBJECT(entry) != NULL)) {
5930 vm_object_pmap_protect_options(
5931 VME_OBJECT(entry),
5932 VME_OFFSET(entry),
9bccf70c
A
5933 remove_size,
5934 PMAP_NULL,
5935 entry->vme_start,
3e170ce0
A
5936 VM_PROT_NONE,
5937 PMAP_OPTIONS_REMOVE);
9bccf70c
A
5938 } else {
5939 pmap_remove(map->pmap,
2d21ac55
A
5940 (addr64_t)((start + entry->vme_start)
5941 - offset),
5942 (addr64_t)(((start + entry->vme_start)
5943 - offset) + remove_size));
9bccf70c 5944 }
1c79356b
A
5945 }
5946 entry = entry->vme_next;
b7266188
A
5947 }
5948 vm_map_unlock_read(sub_map);
1c79356b
A
5949 return;
5950}
5951
5952/*
5953 * vm_map_delete: [ internal use only ]
5954 *
5955 * Deallocates the given address range from the target map.
5956 * Removes all user wirings. Unwires one kernel wiring if
5957 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
5958 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
5959 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5960 *
5961 * This routine is called with map locked and leaves map locked.
5962 */
91447636 5963static kern_return_t
1c79356b 5964vm_map_delete(
91447636
A
5965 vm_map_t map,
5966 vm_map_offset_t start,
5967 vm_map_offset_t end,
5968 int flags,
5969 vm_map_t zap_map)
1c79356b
A
5970{
5971 vm_map_entry_t entry, next;
5972 struct vm_map_entry *first_entry, tmp_entry;
2d21ac55 5973 register vm_map_offset_t s;
1c79356b
A
5974 register vm_object_t object;
5975 boolean_t need_wakeup;
5976 unsigned int last_timestamp = ~0; /* unlikely value */
5977 int interruptible;
1c79356b
A
5978
5979 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 5980 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
5981
5982 /*
5983 * All our DMA I/O operations in IOKit are currently done by
5984 * wiring through the map entries of the task requesting the I/O.
5985 * Because of this, we must always wait for kernel wirings
5986 * to go away on the entries before deleting them.
5987 *
5988 * Any caller who wants to actually remove a kernel wiring
5989 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5990 * properly remove one wiring instead of blasting through
5991 * them all.
5992 */
5993 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5994
b0d623f7
A
5995 while(1) {
5996 /*
5997 * Find the start of the region, and clip it
5998 */
5999 if (vm_map_lookup_entry(map, start, &first_entry)) {
6000 entry = first_entry;
fe8ab488
A
6001 if (map == kalloc_map &&
6002 (entry->vme_start != start ||
6003 entry->vme_end != end)) {
6004 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6005 "mismatched entry %p [0x%llx:0x%llx]\n",
6006 map,
6007 (uint64_t)start,
6008 (uint64_t)end,
6009 entry,
6010 (uint64_t)entry->vme_start,
6011 (uint64_t)entry->vme_end);
6012 }
b0d623f7
A
6013 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
6014 start = SUPERPAGE_ROUND_DOWN(start);
6015 continue;
6016 }
6017 if (start == entry->vme_start) {
6018 /*
6019 * No need to clip. We don't want to cause
6020 * any unnecessary unnesting in this case...
6021 */
6022 } else {
fe8ab488
A
6023 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6024 entry->map_aligned &&
6025 !VM_MAP_PAGE_ALIGNED(
6026 start,
6027 VM_MAP_PAGE_MASK(map))) {
6028 /*
6029 * The entry will no longer be
6030 * map-aligned after clipping
6031 * and the caller said it's OK.
6032 */
6033 entry->map_aligned = FALSE;
6034 }
6035 if (map == kalloc_map) {
6036 panic("vm_map_delete(%p,0x%llx,0x%llx):"
6037 " clipping %p at 0x%llx\n",
6038 map,
6039 (uint64_t)start,
6040 (uint64_t)end,
6041 entry,
6042 (uint64_t)start);
6043 }
b0d623f7
A
6044 vm_map_clip_start(map, entry, start);
6045 }
6046
2d21ac55 6047 /*
b0d623f7
A
6048 * Fix the lookup hint now, rather than each
6049 * time through the loop.
2d21ac55 6050 */
b0d623f7 6051 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 6052 } else {
fe8ab488
A
6053 if (map->pmap == kernel_pmap &&
6054 map->ref_count != 0) {
6055 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6056 "no map entry at 0x%llx\n",
6057 map,
6058 (uint64_t)start,
6059 (uint64_t)end,
6060 (uint64_t)start);
6061 }
b0d623f7 6062 entry = first_entry->vme_next;
2d21ac55 6063 }
b0d623f7 6064 break;
1c79356b 6065 }
b0d623f7
A
6066 if (entry->superpage_size)
6067 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
6068
6069 need_wakeup = FALSE;
6070 /*
6071 * Step through all entries in this region
6072 */
2d21ac55
A
6073 s = entry->vme_start;
6074 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6075 /*
6076 * At this point, we have deleted all the memory entries
6077 * between "start" and "s". We still need to delete
6078 * all memory entries between "s" and "end".
6079 * While we were blocked and the map was unlocked, some
6080 * new memory entries could have been re-allocated between
6081 * "start" and "s" and we don't want to mess with those.
6082 * Some of those entries could even have been re-assembled
6083 * with an entry after "s" (in vm_map_simplify_entry()), so
6084 * we may have to vm_map_clip_start() again.
6085 */
1c79356b 6086
2d21ac55
A
6087 if (entry->vme_start >= s) {
6088 /*
6089 * This entry starts on or after "s"
6090 * so no need to clip its start.
6091 */
6092 } else {
6093 /*
6094 * This entry has been re-assembled by a
6095 * vm_map_simplify_entry(). We need to
6096 * re-clip its start.
6097 */
fe8ab488
A
6098 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6099 entry->map_aligned &&
6100 !VM_MAP_PAGE_ALIGNED(s,
6101 VM_MAP_PAGE_MASK(map))) {
6102 /*
6103 * The entry will no longer be map-aligned
6104 * after clipping and the caller said it's OK.
6105 */
6106 entry->map_aligned = FALSE;
6107 }
6108 if (map == kalloc_map) {
6109 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6110 "clipping %p at 0x%llx\n",
6111 map,
6112 (uint64_t)start,
6113 (uint64_t)end,
6114 entry,
6115 (uint64_t)s);
6116 }
2d21ac55
A
6117 vm_map_clip_start(map, entry, s);
6118 }
6119 if (entry->vme_end <= end) {
6120 /*
6121 * This entry is going away completely, so no need
6122 * to clip and possibly cause an unnecessary unnesting.
6123 */
6124 } else {
fe8ab488
A
6125 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6126 entry->map_aligned &&
6127 !VM_MAP_PAGE_ALIGNED(end,
6128 VM_MAP_PAGE_MASK(map))) {
6129 /*
6130 * The entry will no longer be map-aligned
6131 * after clipping and the caller said it's OK.
6132 */
6133 entry->map_aligned = FALSE;
6134 }
6135 if (map == kalloc_map) {
6136 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6137 "clipping %p at 0x%llx\n",
6138 map,
6139 (uint64_t)start,
6140 (uint64_t)end,
6141 entry,
6142 (uint64_t)end);
6143 }
2d21ac55
A
6144 vm_map_clip_end(map, entry, end);
6145 }
b0d623f7
A
6146
6147 if (entry->permanent) {
6148 panic("attempt to remove permanent VM map entry "
6149 "%p [0x%llx:0x%llx]\n",
6150 entry, (uint64_t) s, (uint64_t) end);
6151 }
6152
6153
1c79356b 6154 if (entry->in_transition) {
9bccf70c
A
6155 wait_result_t wait_result;
6156
1c79356b
A
6157 /*
6158 * Another thread is wiring/unwiring this entry.
6159 * Let the other thread know we are waiting.
6160 */
2d21ac55 6161 assert(s == entry->vme_start);
1c79356b
A
6162 entry->needs_wakeup = TRUE;
6163
6164 /*
6165 * wake up anybody waiting on entries that we have
6166 * already unwired/deleted.
6167 */
6168 if (need_wakeup) {
6169 vm_map_entry_wakeup(map);
6170 need_wakeup = FALSE;
6171 }
6172
9bccf70c 6173 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
6174
6175 if (interruptible &&
9bccf70c 6176 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6177 /*
6178 * We do not clear the needs_wakeup flag,
6179 * since we cannot tell if we were the only one.
6180 */
6181 return KERN_ABORTED;
9bccf70c 6182 }
1c79356b
A
6183
6184 /*
6185 * The entry could have been clipped or it
6186 * may not exist anymore. Look it up again.
6187 */
6188 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6189 /*
6190 * User: use the next entry
6191 */
6192 entry = first_entry->vme_next;
2d21ac55 6193 s = entry->vme_start;
1c79356b
A
6194 } else {
6195 entry = first_entry;
0c530ab8 6196 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6197 }
9bccf70c 6198 last_timestamp = map->timestamp;
1c79356b
A
6199 continue;
6200 } /* end in_transition */
6201
6202 if (entry->wired_count) {
2d21ac55
A
6203 boolean_t user_wire;
6204
6205 user_wire = entry->user_wired_count > 0;
6206
1c79356b 6207 /*
b0d623f7 6208 * Remove a kernel wiring if requested
1c79356b 6209 */
b0d623f7 6210 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 6211 entry->wired_count--;
b0d623f7
A
6212 }
6213
6214 /*
6215 * Remove all user wirings for proper accounting
6216 */
6217 if (entry->user_wired_count > 0) {
6218 while (entry->user_wired_count)
6219 subtract_wire_counts(map, entry, user_wire);
6220 }
1c79356b
A
6221
6222 if (entry->wired_count != 0) {
2d21ac55 6223 assert(map != kernel_map);
1c79356b
A
6224 /*
6225 * Cannot continue. Typical case is when
6226 * a user thread has physical io pending on
6227 * on this page. Either wait for the
6228 * kernel wiring to go away or return an
6229 * error.
6230 */
6231 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 6232 wait_result_t wait_result;
1c79356b 6233
2d21ac55 6234 assert(s == entry->vme_start);
1c79356b 6235 entry->needs_wakeup = TRUE;
9bccf70c 6236 wait_result = vm_map_entry_wait(map,
2d21ac55 6237 interruptible);
1c79356b
A
6238
6239 if (interruptible &&
2d21ac55 6240 wait_result == THREAD_INTERRUPTED) {
1c79356b 6241 /*
2d21ac55 6242 * We do not clear the
1c79356b
A
6243 * needs_wakeup flag, since we
6244 * cannot tell if we were the
6245 * only one.
2d21ac55 6246 */
1c79356b 6247 return KERN_ABORTED;
9bccf70c 6248 }
1c79356b
A
6249
6250 /*
2d21ac55 6251 * The entry could have been clipped or
1c79356b
A
6252 * it may not exist anymore. Look it
6253 * up again.
2d21ac55 6254 */
1c79356b 6255 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
6256 &first_entry)) {
6257 assert(map != kernel_map);
1c79356b 6258 /*
2d21ac55
A
6259 * User: use the next entry
6260 */
1c79356b 6261 entry = first_entry->vme_next;
2d21ac55 6262 s = entry->vme_start;
1c79356b
A
6263 } else {
6264 entry = first_entry;
0c530ab8 6265 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6266 }
9bccf70c 6267 last_timestamp = map->timestamp;
1c79356b
A
6268 continue;
6269 }
6270 else {
6271 return KERN_FAILURE;
6272 }
6273 }
6274
6275 entry->in_transition = TRUE;
6276 /*
6277 * copy current entry. see comment in vm_map_wire()
6278 */
6279 tmp_entry = *entry;
2d21ac55 6280 assert(s == entry->vme_start);
1c79356b
A
6281
6282 /*
6283 * We can unlock the map now. The in_transition
6284 * state guarentees existance of the entry.
6285 */
6286 vm_map_unlock(map);
2d21ac55
A
6287
6288 if (tmp_entry.is_sub_map) {
6289 vm_map_t sub_map;
6290 vm_map_offset_t sub_start, sub_end;
6291 pmap_t pmap;
6292 vm_map_offset_t pmap_addr;
6293
6294
3e170ce0
A
6295 sub_map = VME_SUBMAP(&tmp_entry);
6296 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55
A
6297 sub_end = sub_start + (tmp_entry.vme_end -
6298 tmp_entry.vme_start);
6299 if (tmp_entry.use_pmap) {
6300 pmap = sub_map->pmap;
6301 pmap_addr = tmp_entry.vme_start;
6302 } else {
6303 pmap = map->pmap;
6304 pmap_addr = tmp_entry.vme_start;
6305 }
6306 (void) vm_map_unwire_nested(sub_map,
6307 sub_start, sub_end,
6308 user_wire,
6309 pmap, pmap_addr);
6310 } else {
6311
3e170ce0 6312 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
6313 pmap_protect_options(
6314 map->pmap,
6315 tmp_entry.vme_start,
6316 tmp_entry.vme_end,
6317 VM_PROT_NONE,
6318 PMAP_OPTIONS_REMOVE,
6319 NULL);
6320 }
2d21ac55 6321 vm_fault_unwire(map, &tmp_entry,
3e170ce0 6322 VME_OBJECT(&tmp_entry) == kernel_object,
2d21ac55
A
6323 map->pmap, tmp_entry.vme_start);
6324 }
6325
1c79356b
A
6326 vm_map_lock(map);
6327
6328 if (last_timestamp+1 != map->timestamp) {
6329 /*
6330 * Find the entry again. It could have
6331 * been clipped after we unlocked the map.
6332 */
6333 if (!vm_map_lookup_entry(map, s, &first_entry)){
6334 assert((map != kernel_map) &&
2d21ac55 6335 (!entry->is_sub_map));
1c79356b 6336 first_entry = first_entry->vme_next;
2d21ac55 6337 s = first_entry->vme_start;
1c79356b 6338 } else {
0c530ab8 6339 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6340 }
6341 } else {
0c530ab8 6342 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6343 first_entry = entry;
6344 }
6345
6346 last_timestamp = map->timestamp;
6347
6348 entry = first_entry;
6349 while ((entry != vm_map_to_entry(map)) &&
6350 (entry->vme_start < tmp_entry.vme_end)) {
6351 assert(entry->in_transition);
6352 entry->in_transition = FALSE;
6353 if (entry->needs_wakeup) {
6354 entry->needs_wakeup = FALSE;
6355 need_wakeup = TRUE;
6356 }
6357 entry = entry->vme_next;
6358 }
6359 /*
6360 * We have unwired the entry(s). Go back and
6361 * delete them.
6362 */
6363 entry = first_entry;
6364 continue;
6365 }
6366
6367 /* entry is unwired */
6368 assert(entry->wired_count == 0);
6369 assert(entry->user_wired_count == 0);
6370
2d21ac55
A
6371 assert(s == entry->vme_start);
6372
6373 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6374 /*
6375 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6376 * vm_map_delete(), some map entries might have been
6377 * transferred to a "zap_map", which doesn't have a
6378 * pmap. The original pmap has already been flushed
6379 * in the vm_map_delete() call targeting the original
6380 * map, but when we get to destroying the "zap_map",
6381 * we don't have any pmap to flush, so let's just skip
6382 * all this.
6383 */
6384 } else if (entry->is_sub_map) {
6385 if (entry->use_pmap) {
0c530ab8 6386#ifndef NO_NESTED_PMAP
3e170ce0
A
6387 int pmap_flags;
6388
6389 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6390 /*
6391 * This is the final cleanup of the
6392 * address space being terminated.
6393 * No new mappings are expected and
6394 * we don't really need to unnest the
6395 * shared region (and lose the "global"
6396 * pmap mappings, if applicable).
6397 *
6398 * Tell the pmap layer that we're
6399 * "clean" wrt nesting.
6400 */
6401 pmap_flags = PMAP_UNNEST_CLEAN;
6402 } else {
6403 /*
6404 * We're unmapping part of the nested
6405 * shared region, so we can't keep the
6406 * nested pmap.
6407 */
6408 pmap_flags = 0;
6409 }
6410 pmap_unnest_options(
6411 map->pmap,
6412 (addr64_t)entry->vme_start,
6413 entry->vme_end - entry->vme_start,
6414 pmap_flags);
0c530ab8 6415#endif /* NO_NESTED_PMAP */
316670eb 6416 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
9bccf70c
A
6417 /* clean up parent map/maps */
6418 vm_map_submap_pmap_clean(
6419 map, entry->vme_start,
6420 entry->vme_end,
3e170ce0
A
6421 VME_SUBMAP(entry),
6422 VME_OFFSET(entry));
9bccf70c 6423 }
2d21ac55 6424 } else {
1c79356b
A
6425 vm_map_submap_pmap_clean(
6426 map, entry->vme_start, entry->vme_end,
3e170ce0
A
6427 VME_SUBMAP(entry),
6428 VME_OFFSET(entry));
2d21ac55 6429 }
3e170ce0
A
6430 } else if (VME_OBJECT(entry) != kernel_object &&
6431 VME_OBJECT(entry) != compressor_object) {
6432 object = VME_OBJECT(entry);
39236c6e
A
6433 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6434 vm_object_pmap_protect_options(
3e170ce0 6435 object, VME_OFFSET(entry),
55e303ae
A
6436 entry->vme_end - entry->vme_start,
6437 PMAP_NULL,
6438 entry->vme_start,
39236c6e
A
6439 VM_PROT_NONE,
6440 PMAP_OPTIONS_REMOVE);
3e170ce0 6441 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
39236c6e
A
6442 (map->pmap == kernel_pmap)) {
6443 /* Remove translations associated
6444 * with this range unless the entry
6445 * does not have an object, or
6446 * it's the kernel map or a descendant
6447 * since the platform could potentially
6448 * create "backdoor" mappings invisible
6449 * to the VM. It is expected that
6450 * objectless, non-kernel ranges
6451 * do not have such VM invisible
6452 * translations.
6453 */
6454 pmap_remove_options(map->pmap,
6455 (addr64_t)entry->vme_start,
6456 (addr64_t)entry->vme_end,
6457 PMAP_OPTIONS_REMOVE);
1c79356b
A
6458 }
6459 }
6460
fe8ab488
A
6461 if (entry->iokit_acct) {
6462 /* alternate accounting */
6463 vm_map_iokit_unmapped_region(map,
6464 (entry->vme_end -
6465 entry->vme_start));
6466 entry->iokit_acct = FALSE;
6467 }
6468
91447636
A
6469 /*
6470 * All pmap mappings for this map entry must have been
6471 * cleared by now.
6472 */
fe8ab488 6473#if DEBUG
91447636
A
6474 assert(vm_map_pmap_is_empty(map,
6475 entry->vme_start,
6476 entry->vme_end));
fe8ab488 6477#endif /* DEBUG */
91447636 6478
1c79356b 6479 next = entry->vme_next;
fe8ab488
A
6480
6481 if (map->pmap == kernel_pmap &&
6482 map->ref_count != 0 &&
6483 entry->vme_end < end &&
6484 (next == vm_map_to_entry(map) ||
6485 next->vme_start != entry->vme_end)) {
6486 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6487 "hole after %p at 0x%llx\n",
6488 map,
6489 (uint64_t)start,
6490 (uint64_t)end,
6491 entry,
6492 (uint64_t)entry->vme_end);
6493 }
6494
1c79356b
A
6495 s = next->vme_start;
6496 last_timestamp = map->timestamp;
91447636
A
6497
6498 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6499 zap_map != VM_MAP_NULL) {
2d21ac55 6500 vm_map_size_t entry_size;
91447636
A
6501 /*
6502 * The caller wants to save the affected VM map entries
6503 * into the "zap_map". The caller will take care of
6504 * these entries.
6505 */
6506 /* unlink the entry from "map" ... */
6d2010ae 6507 vm_map_store_entry_unlink(map, entry);
91447636 6508 /* ... and add it to the end of the "zap_map" */
6d2010ae 6509 vm_map_store_entry_link(zap_map,
91447636
A
6510 vm_map_last_entry(zap_map),
6511 entry);
2d21ac55
A
6512 entry_size = entry->vme_end - entry->vme_start;
6513 map->size -= entry_size;
6514 zap_map->size += entry_size;
6515 /* we didn't unlock the map, so no timestamp increase */
6516 last_timestamp--;
91447636
A
6517 } else {
6518 vm_map_entry_delete(map, entry);
6519 /* vm_map_entry_delete unlocks the map */
6520 vm_map_lock(map);
6521 }
6522
1c79356b
A
6523 entry = next;
6524
6525 if(entry == vm_map_to_entry(map)) {
6526 break;
6527 }
6528 if (last_timestamp+1 != map->timestamp) {
6529 /*
6530 * we are responsible for deleting everything
6531 * from the give space, if someone has interfered
6532 * we pick up where we left off, back fills should
6533 * be all right for anyone except map_delete and
6534 * we have to assume that the task has been fully
6535 * disabled before we get here
6536 */
6537 if (!vm_map_lookup_entry(map, s, &entry)){
6538 entry = entry->vme_next;
2d21ac55 6539 s = entry->vme_start;
1c79356b 6540 } else {
2d21ac55 6541 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6542 }
6543 /*
6544 * others can not only allocate behind us, we can
6545 * also see coalesce while we don't have the map lock
6546 */
6547 if(entry == vm_map_to_entry(map)) {
6548 break;
6549 }
1c79356b
A
6550 }
6551 last_timestamp = map->timestamp;
6552 }
6553
6554 if (map->wait_for_space)
6555 thread_wakeup((event_t) map);
6556 /*
6557 * wake up anybody waiting on entries that we have already deleted.
6558 */
6559 if (need_wakeup)
6560 vm_map_entry_wakeup(map);
6561
6562 return KERN_SUCCESS;
6563}
6564
6565/*
6566 * vm_map_remove:
6567 *
6568 * Remove the given address range from the target map.
6569 * This is the exported form of vm_map_delete.
6570 */
6571kern_return_t
6572vm_map_remove(
6573 register vm_map_t map,
91447636
A
6574 register vm_map_offset_t start,
6575 register vm_map_offset_t end,
1c79356b
A
6576 register boolean_t flags)
6577{
6578 register kern_return_t result;
9bccf70c 6579
1c79356b
A
6580 vm_map_lock(map);
6581 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
6582 /*
6583 * For the zone_map, the kernel controls the allocation/freeing of memory.
6584 * Any free to the zone_map should be within the bounds of the map and
6585 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6586 * free to the zone_map into a no-op, there is a problem and we should
6587 * panic.
6588 */
6589 if ((map == zone_map) && (start == end))
6590 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
91447636 6591 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 6592 vm_map_unlock(map);
91447636 6593
1c79356b
A
6594 return(result);
6595}
6596
6597
1c79356b
A
6598/*
6599 * Routine: vm_map_copy_discard
6600 *
6601 * Description:
6602 * Dispose of a map copy object (returned by
6603 * vm_map_copyin).
6604 */
6605void
6606vm_map_copy_discard(
6607 vm_map_copy_t copy)
6608{
1c79356b
A
6609 if (copy == VM_MAP_COPY_NULL)
6610 return;
6611
6612 switch (copy->type) {
6613 case VM_MAP_COPY_ENTRY_LIST:
6614 while (vm_map_copy_first_entry(copy) !=
2d21ac55 6615 vm_map_copy_to_entry(copy)) {
1c79356b
A
6616 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6617
6618 vm_map_copy_entry_unlink(copy, entry);
39236c6e 6619 if (entry->is_sub_map) {
3e170ce0 6620 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 6621 } else {
3e170ce0 6622 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 6623 }
1c79356b
A
6624 vm_map_copy_entry_dispose(copy, entry);
6625 }
6626 break;
6627 case VM_MAP_COPY_OBJECT:
6628 vm_object_deallocate(copy->cpy_object);
6629 break;
1c79356b
A
6630 case VM_MAP_COPY_KERNEL_BUFFER:
6631
6632 /*
6633 * The vm_map_copy_t and possibly the data buffer were
6634 * allocated by a single call to kalloc(), i.e. the
6635 * vm_map_copy_t was not allocated out of the zone.
6636 */
3e170ce0
A
6637 if (copy->size > msg_ool_size_small || copy->offset)
6638 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6639 (long long)copy->size, (long long)copy->offset);
6640 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
6641 return;
6642 }
91447636 6643 zfree(vm_map_copy_zone, copy);
1c79356b
A
6644}
6645
6646/*
6647 * Routine: vm_map_copy_copy
6648 *
6649 * Description:
6650 * Move the information in a map copy object to
6651 * a new map copy object, leaving the old one
6652 * empty.
6653 *
6654 * This is used by kernel routines that need
6655 * to look at out-of-line data (in copyin form)
6656 * before deciding whether to return SUCCESS.
6657 * If the routine returns FAILURE, the original
6658 * copy object will be deallocated; therefore,
6659 * these routines must make a copy of the copy
6660 * object and leave the original empty so that
6661 * deallocation will not fail.
6662 */
6663vm_map_copy_t
6664vm_map_copy_copy(
6665 vm_map_copy_t copy)
6666{
6667 vm_map_copy_t new_copy;
6668
6669 if (copy == VM_MAP_COPY_NULL)
6670 return VM_MAP_COPY_NULL;
6671
6672 /*
6673 * Allocate a new copy object, and copy the information
6674 * from the old one into it.
6675 */
6676
6677 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 6678 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
6679 *new_copy = *copy;
6680
6681 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6682 /*
6683 * The links in the entry chain must be
6684 * changed to point to the new copy object.
6685 */
6686 vm_map_copy_first_entry(copy)->vme_prev
6687 = vm_map_copy_to_entry(new_copy);
6688 vm_map_copy_last_entry(copy)->vme_next
6689 = vm_map_copy_to_entry(new_copy);
6690 }
6691
6692 /*
6693 * Change the old copy object into one that contains
6694 * nothing to be deallocated.
6695 */
6696 copy->type = VM_MAP_COPY_OBJECT;
6697 copy->cpy_object = VM_OBJECT_NULL;
6698
6699 /*
6700 * Return the new object.
6701 */
6702 return new_copy;
6703}
6704
91447636 6705static kern_return_t
1c79356b
A
6706vm_map_overwrite_submap_recurse(
6707 vm_map_t dst_map,
91447636
A
6708 vm_map_offset_t dst_addr,
6709 vm_map_size_t dst_size)
1c79356b 6710{
91447636 6711 vm_map_offset_t dst_end;
1c79356b
A
6712 vm_map_entry_t tmp_entry;
6713 vm_map_entry_t entry;
6714 kern_return_t result;
6715 boolean_t encountered_sub_map = FALSE;
6716
6717
6718
6719 /*
6720 * Verify that the destination is all writeable
6721 * initially. We have to trunc the destination
6722 * address and round the copy size or we'll end up
6723 * splitting entries in strange ways.
6724 */
6725
39236c6e
A
6726 dst_end = vm_map_round_page(dst_addr + dst_size,
6727 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 6728 vm_map_lock(dst_map);
1c79356b
A
6729
6730start_pass_1:
1c79356b
A
6731 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6732 vm_map_unlock(dst_map);
6733 return(KERN_INVALID_ADDRESS);
6734 }
6735
39236c6e
A
6736 vm_map_clip_start(dst_map,
6737 tmp_entry,
6738 vm_map_trunc_page(dst_addr,
6739 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
6740 if (tmp_entry->is_sub_map) {
6741 /* clipping did unnest if needed */
6742 assert(!tmp_entry->use_pmap);
6743 }
1c79356b
A
6744
6745 for (entry = tmp_entry;;) {
6746 vm_map_entry_t next;
6747
6748 next = entry->vme_next;
6749 while(entry->is_sub_map) {
91447636
A
6750 vm_map_offset_t sub_start;
6751 vm_map_offset_t sub_end;
6752 vm_map_offset_t local_end;
1c79356b
A
6753
6754 if (entry->in_transition) {
2d21ac55
A
6755 /*
6756 * Say that we are waiting, and wait for entry.
6757 */
1c79356b
A
6758 entry->needs_wakeup = TRUE;
6759 vm_map_entry_wait(dst_map, THREAD_UNINT);
6760
6761 goto start_pass_1;
6762 }
6763
6764 encountered_sub_map = TRUE;
3e170ce0 6765 sub_start = VME_OFFSET(entry);
1c79356b
A
6766
6767 if(entry->vme_end < dst_end)
6768 sub_end = entry->vme_end;
6769 else
6770 sub_end = dst_end;
6771 sub_end -= entry->vme_start;
3e170ce0 6772 sub_end += VME_OFFSET(entry);
1c79356b
A
6773 local_end = entry->vme_end;
6774 vm_map_unlock(dst_map);
6775
6776 result = vm_map_overwrite_submap_recurse(
3e170ce0 6777 VME_SUBMAP(entry),
2d21ac55
A
6778 sub_start,
6779 sub_end - sub_start);
1c79356b
A
6780
6781 if(result != KERN_SUCCESS)
6782 return result;
6783 if (dst_end <= entry->vme_end)
6784 return KERN_SUCCESS;
6785 vm_map_lock(dst_map);
6786 if(!vm_map_lookup_entry(dst_map, local_end,
6787 &tmp_entry)) {
6788 vm_map_unlock(dst_map);
6789 return(KERN_INVALID_ADDRESS);
6790 }
6791 entry = tmp_entry;
6792 next = entry->vme_next;
6793 }
6794
6795 if ( ! (entry->protection & VM_PROT_WRITE)) {
6796 vm_map_unlock(dst_map);
6797 return(KERN_PROTECTION_FAILURE);
6798 }
6799
6800 /*
6801 * If the entry is in transition, we must wait
6802 * for it to exit that state. Anything could happen
6803 * when we unlock the map, so start over.
6804 */
6805 if (entry->in_transition) {
6806
6807 /*
6808 * Say that we are waiting, and wait for entry.
6809 */
6810 entry->needs_wakeup = TRUE;
6811 vm_map_entry_wait(dst_map, THREAD_UNINT);
6812
6813 goto start_pass_1;
6814 }
6815
6816/*
6817 * our range is contained completely within this map entry
6818 */
6819 if (dst_end <= entry->vme_end) {
6820 vm_map_unlock(dst_map);
6821 return KERN_SUCCESS;
6822 }
6823/*
6824 * check that range specified is contiguous region
6825 */
6826 if ((next == vm_map_to_entry(dst_map)) ||
6827 (next->vme_start != entry->vme_end)) {
6828 vm_map_unlock(dst_map);
6829 return(KERN_INVALID_ADDRESS);
6830 }
6831
6832 /*
6833 * Check for permanent objects in the destination.
6834 */
3e170ce0
A
6835 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6836 ((!VME_OBJECT(entry)->internal) ||
6837 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
6838 if(encountered_sub_map) {
6839 vm_map_unlock(dst_map);
6840 return(KERN_FAILURE);
6841 }
6842 }
6843
6844
6845 entry = next;
6846 }/* for */
6847 vm_map_unlock(dst_map);
6848 return(KERN_SUCCESS);
6849}
6850
6851/*
6852 * Routine: vm_map_copy_overwrite
6853 *
6854 * Description:
6855 * Copy the memory described by the map copy
6856 * object (copy; returned by vm_map_copyin) onto
6857 * the specified destination region (dst_map, dst_addr).
6858 * The destination must be writeable.
6859 *
6860 * Unlike vm_map_copyout, this routine actually
6861 * writes over previously-mapped memory. If the
6862 * previous mapping was to a permanent (user-supplied)
6863 * memory object, it is preserved.
6864 *
6865 * The attributes (protection and inheritance) of the
6866 * destination region are preserved.
6867 *
6868 * If successful, consumes the copy object.
6869 * Otherwise, the caller is responsible for it.
6870 *
6871 * Implementation notes:
6872 * To overwrite aligned temporary virtual memory, it is
6873 * sufficient to remove the previous mapping and insert
6874 * the new copy. This replacement is done either on
6875 * the whole region (if no permanent virtual memory
6876 * objects are embedded in the destination region) or
6877 * in individual map entries.
6878 *
6879 * To overwrite permanent virtual memory , it is necessary
6880 * to copy each page, as the external memory management
6881 * interface currently does not provide any optimizations.
6882 *
6883 * Unaligned memory also has to be copied. It is possible
6884 * to use 'vm_trickery' to copy the aligned data. This is
6885 * not done but not hard to implement.
6886 *
6887 * Once a page of permanent memory has been overwritten,
6888 * it is impossible to interrupt this function; otherwise,
6889 * the call would be neither atomic nor location-independent.
6890 * The kernel-state portion of a user thread must be
6891 * interruptible.
6892 *
6893 * It may be expensive to forward all requests that might
6894 * overwrite permanent memory (vm_write, vm_copy) to
6895 * uninterruptible kernel threads. This routine may be
6896 * called by interruptible threads; however, success is
6897 * not guaranteed -- if the request cannot be performed
6898 * atomically and interruptibly, an error indication is
6899 * returned.
6900 */
6901
91447636 6902static kern_return_t
1c79356b 6903vm_map_copy_overwrite_nested(
91447636
A
6904 vm_map_t dst_map,
6905 vm_map_address_t dst_addr,
6906 vm_map_copy_t copy,
6907 boolean_t interruptible,
6d2010ae
A
6908 pmap_t pmap,
6909 boolean_t discard_on_success)
1c79356b 6910{
91447636
A
6911 vm_map_offset_t dst_end;
6912 vm_map_entry_t tmp_entry;
6913 vm_map_entry_t entry;
6914 kern_return_t kr;
6915 boolean_t aligned = TRUE;
6916 boolean_t contains_permanent_objects = FALSE;
6917 boolean_t encountered_sub_map = FALSE;
6918 vm_map_offset_t base_addr;
6919 vm_map_size_t copy_size;
6920 vm_map_size_t total_size;
1c79356b
A
6921
6922
6923 /*
6924 * Check for null copy object.
6925 */
6926
6927 if (copy == VM_MAP_COPY_NULL)
6928 return(KERN_SUCCESS);
6929
6930 /*
6931 * Check for special kernel buffer allocated
6932 * by new_ipc_kmsg_copyin.
6933 */
6934
6935 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 6936 return(vm_map_copyout_kernel_buffer(
2d21ac55 6937 dst_map, &dst_addr,
39236c6e 6938 copy, TRUE, discard_on_success));
1c79356b
A
6939 }
6940
6941 /*
6942 * Only works for entry lists at the moment. Will
6943 * support page lists later.
6944 */
6945
6946 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6947
6948 if (copy->size == 0) {
6d2010ae
A
6949 if (discard_on_success)
6950 vm_map_copy_discard(copy);
1c79356b
A
6951 return(KERN_SUCCESS);
6952 }
6953
6954 /*
6955 * Verify that the destination is all writeable
6956 * initially. We have to trunc the destination
6957 * address and round the copy size or we'll end up
6958 * splitting entries in strange ways.
6959 */
6960
39236c6e
A
6961 if (!VM_MAP_PAGE_ALIGNED(copy->size,
6962 VM_MAP_PAGE_MASK(dst_map)) ||
6963 !VM_MAP_PAGE_ALIGNED(copy->offset,
6964 VM_MAP_PAGE_MASK(dst_map)) ||
6965 !VM_MAP_PAGE_ALIGNED(dst_addr,
fe8ab488 6966 VM_MAP_PAGE_MASK(dst_map)))
1c79356b
A
6967 {
6968 aligned = FALSE;
39236c6e
A
6969 dst_end = vm_map_round_page(dst_addr + copy->size,
6970 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
6971 } else {
6972 dst_end = dst_addr + copy->size;
6973 }
6974
1c79356b 6975 vm_map_lock(dst_map);
9bccf70c 6976
91447636
A
6977 /* LP64todo - remove this check when vm_map_commpage64()
6978 * no longer has to stuff in a map_entry for the commpage
6979 * above the map's max_offset.
6980 */
6981 if (dst_addr >= dst_map->max_offset) {
6982 vm_map_unlock(dst_map);
6983 return(KERN_INVALID_ADDRESS);
6984 }
6985
9bccf70c 6986start_pass_1:
1c79356b
A
6987 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6988 vm_map_unlock(dst_map);
6989 return(KERN_INVALID_ADDRESS);
6990 }
39236c6e
A
6991 vm_map_clip_start(dst_map,
6992 tmp_entry,
6993 vm_map_trunc_page(dst_addr,
6994 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
6995 for (entry = tmp_entry;;) {
6996 vm_map_entry_t next = entry->vme_next;
6997
6998 while(entry->is_sub_map) {
91447636
A
6999 vm_map_offset_t sub_start;
7000 vm_map_offset_t sub_end;
7001 vm_map_offset_t local_end;
1c79356b
A
7002
7003 if (entry->in_transition) {
7004
2d21ac55
A
7005 /*
7006 * Say that we are waiting, and wait for entry.
7007 */
1c79356b
A
7008 entry->needs_wakeup = TRUE;
7009 vm_map_entry_wait(dst_map, THREAD_UNINT);
7010
7011 goto start_pass_1;
7012 }
7013
7014 local_end = entry->vme_end;
7015 if (!(entry->needs_copy)) {
7016 /* if needs_copy we are a COW submap */
7017 /* in such a case we just replace so */
7018 /* there is no need for the follow- */
7019 /* ing check. */
7020 encountered_sub_map = TRUE;
3e170ce0 7021 sub_start = VME_OFFSET(entry);
1c79356b
A
7022
7023 if(entry->vme_end < dst_end)
7024 sub_end = entry->vme_end;
7025 else
7026 sub_end = dst_end;
7027 sub_end -= entry->vme_start;
3e170ce0 7028 sub_end += VME_OFFSET(entry);
1c79356b
A
7029 vm_map_unlock(dst_map);
7030
7031 kr = vm_map_overwrite_submap_recurse(
3e170ce0 7032 VME_SUBMAP(entry),
1c79356b
A
7033 sub_start,
7034 sub_end - sub_start);
7035 if(kr != KERN_SUCCESS)
7036 return kr;
7037 vm_map_lock(dst_map);
7038 }
7039
7040 if (dst_end <= entry->vme_end)
7041 goto start_overwrite;
7042 if(!vm_map_lookup_entry(dst_map, local_end,
7043 &entry)) {
7044 vm_map_unlock(dst_map);
7045 return(KERN_INVALID_ADDRESS);
7046 }
7047 next = entry->vme_next;
7048 }
7049
7050 if ( ! (entry->protection & VM_PROT_WRITE)) {
7051 vm_map_unlock(dst_map);
7052 return(KERN_PROTECTION_FAILURE);
7053 }
7054
7055 /*
7056 * If the entry is in transition, we must wait
7057 * for it to exit that state. Anything could happen
7058 * when we unlock the map, so start over.
7059 */
7060 if (entry->in_transition) {
7061
7062 /*
7063 * Say that we are waiting, and wait for entry.
7064 */
7065 entry->needs_wakeup = TRUE;
7066 vm_map_entry_wait(dst_map, THREAD_UNINT);
7067
7068 goto start_pass_1;
7069 }
7070
7071/*
7072 * our range is contained completely within this map entry
7073 */
7074 if (dst_end <= entry->vme_end)
7075 break;
7076/*
7077 * check that range specified is contiguous region
7078 */
7079 if ((next == vm_map_to_entry(dst_map)) ||
7080 (next->vme_start != entry->vme_end)) {
7081 vm_map_unlock(dst_map);
7082 return(KERN_INVALID_ADDRESS);
7083 }
7084
7085
7086 /*
7087 * Check for permanent objects in the destination.
7088 */
3e170ce0
A
7089 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7090 ((!VME_OBJECT(entry)->internal) ||
7091 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
7092 contains_permanent_objects = TRUE;
7093 }
7094
7095 entry = next;
7096 }/* for */
7097
7098start_overwrite:
7099 /*
7100 * If there are permanent objects in the destination, then
7101 * the copy cannot be interrupted.
7102 */
7103
7104 if (interruptible && contains_permanent_objects) {
7105 vm_map_unlock(dst_map);
7106 return(KERN_FAILURE); /* XXX */
7107 }
7108
7109 /*
7110 *
7111 * Make a second pass, overwriting the data
7112 * At the beginning of each loop iteration,
7113 * the next entry to be overwritten is "tmp_entry"
7114 * (initially, the value returned from the lookup above),
7115 * and the starting address expected in that entry
7116 * is "start".
7117 */
7118
7119 total_size = copy->size;
7120 if(encountered_sub_map) {
7121 copy_size = 0;
7122 /* re-calculate tmp_entry since we've had the map */
7123 /* unlocked */
7124 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7125 vm_map_unlock(dst_map);
7126 return(KERN_INVALID_ADDRESS);
7127 }
7128 } else {
7129 copy_size = copy->size;
7130 }
7131
7132 base_addr = dst_addr;
7133 while(TRUE) {
7134 /* deconstruct the copy object and do in parts */
7135 /* only in sub_map, interruptable case */
7136 vm_map_entry_t copy_entry;
91447636
A
7137 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
7138 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 7139 int nentries;
91447636 7140 int remaining_entries = 0;
b0d623f7 7141 vm_map_offset_t new_offset = 0;
1c79356b
A
7142
7143 for (entry = tmp_entry; copy_size == 0;) {
7144 vm_map_entry_t next;
7145
7146 next = entry->vme_next;
7147
7148 /* tmp_entry and base address are moved along */
7149 /* each time we encounter a sub-map. Otherwise */
7150 /* entry can outpase tmp_entry, and the copy_size */
7151 /* may reflect the distance between them */
7152 /* if the current entry is found to be in transition */
7153 /* we will start over at the beginning or the last */
7154 /* encounter of a submap as dictated by base_addr */
7155 /* we will zero copy_size accordingly. */
7156 if (entry->in_transition) {
7157 /*
7158 * Say that we are waiting, and wait for entry.
7159 */
7160 entry->needs_wakeup = TRUE;
7161 vm_map_entry_wait(dst_map, THREAD_UNINT);
7162
1c79356b 7163 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 7164 &tmp_entry)) {
1c79356b
A
7165 vm_map_unlock(dst_map);
7166 return(KERN_INVALID_ADDRESS);
7167 }
7168 copy_size = 0;
7169 entry = tmp_entry;
7170 continue;
7171 }
7172 if(entry->is_sub_map) {
91447636
A
7173 vm_map_offset_t sub_start;
7174 vm_map_offset_t sub_end;
7175 vm_map_offset_t local_end;
1c79356b
A
7176
7177 if (entry->needs_copy) {
7178 /* if this is a COW submap */
7179 /* just back the range with a */
7180 /* anonymous entry */
7181 if(entry->vme_end < dst_end)
7182 sub_end = entry->vme_end;
7183 else
7184 sub_end = dst_end;
7185 if(entry->vme_start < base_addr)
7186 sub_start = base_addr;
7187 else
7188 sub_start = entry->vme_start;
7189 vm_map_clip_end(
7190 dst_map, entry, sub_end);
7191 vm_map_clip_start(
7192 dst_map, entry, sub_start);
2d21ac55 7193 assert(!entry->use_pmap);
1c79356b
A
7194 entry->is_sub_map = FALSE;
7195 vm_map_deallocate(
3e170ce0
A
7196 VME_SUBMAP(entry));
7197 VME_SUBMAP_SET(entry, NULL);
1c79356b
A
7198 entry->is_shared = FALSE;
7199 entry->needs_copy = FALSE;
3e170ce0 7200 VME_OFFSET_SET(entry, 0);
2d21ac55
A
7201 /*
7202 * XXX FBDP
7203 * We should propagate the protections
7204 * of the submap entry here instead
7205 * of forcing them to VM_PROT_ALL...
7206 * Or better yet, we should inherit
7207 * the protection of the copy_entry.
7208 */
1c79356b
A
7209 entry->protection = VM_PROT_ALL;
7210 entry->max_protection = VM_PROT_ALL;
7211 entry->wired_count = 0;
7212 entry->user_wired_count = 0;
7213 if(entry->inheritance
2d21ac55
A
7214 == VM_INHERIT_SHARE)
7215 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
7216 continue;
7217 }
7218 /* first take care of any non-sub_map */
7219 /* entries to send */
7220 if(base_addr < entry->vme_start) {
7221 /* stuff to send */
7222 copy_size =
7223 entry->vme_start - base_addr;
7224 break;
7225 }
3e170ce0 7226 sub_start = VME_OFFSET(entry);
1c79356b
A
7227
7228 if(entry->vme_end < dst_end)
7229 sub_end = entry->vme_end;
7230 else
7231 sub_end = dst_end;
7232 sub_end -= entry->vme_start;
3e170ce0 7233 sub_end += VME_OFFSET(entry);
1c79356b
A
7234 local_end = entry->vme_end;
7235 vm_map_unlock(dst_map);
7236 copy_size = sub_end - sub_start;
7237
7238 /* adjust the copy object */
7239 if (total_size > copy_size) {
91447636
A
7240 vm_map_size_t local_size = 0;
7241 vm_map_size_t entry_size;
1c79356b 7242
2d21ac55
A
7243 nentries = 1;
7244 new_offset = copy->offset;
7245 copy_entry = vm_map_copy_first_entry(copy);
7246 while(copy_entry !=
7247 vm_map_copy_to_entry(copy)){
7248 entry_size = copy_entry->vme_end -
7249 copy_entry->vme_start;
7250 if((local_size < copy_size) &&
7251 ((local_size + entry_size)
7252 >= copy_size)) {
7253 vm_map_copy_clip_end(copy,
7254 copy_entry,
7255 copy_entry->vme_start +
7256 (copy_size - local_size));
7257 entry_size = copy_entry->vme_end -
7258 copy_entry->vme_start;
7259 local_size += entry_size;
7260 new_offset += entry_size;
7261 }
7262 if(local_size >= copy_size) {
7263 next_copy = copy_entry->vme_next;
7264 copy_entry->vme_next =
7265 vm_map_copy_to_entry(copy);
7266 previous_prev =
7267 copy->cpy_hdr.links.prev;
7268 copy->cpy_hdr.links.prev = copy_entry;
7269 copy->size = copy_size;
7270 remaining_entries =
7271 copy->cpy_hdr.nentries;
7272 remaining_entries -= nentries;
7273 copy->cpy_hdr.nentries = nentries;
7274 break;
7275 } else {
7276 local_size += entry_size;
7277 new_offset += entry_size;
7278 nentries++;
7279 }
7280 copy_entry = copy_entry->vme_next;
7281 }
1c79356b
A
7282 }
7283
7284 if((entry->use_pmap) && (pmap == NULL)) {
7285 kr = vm_map_copy_overwrite_nested(
3e170ce0 7286 VME_SUBMAP(entry),
1c79356b
A
7287 sub_start,
7288 copy,
7289 interruptible,
3e170ce0 7290 VME_SUBMAP(entry)->pmap,
6d2010ae 7291 TRUE);
1c79356b
A
7292 } else if (pmap != NULL) {
7293 kr = vm_map_copy_overwrite_nested(
3e170ce0 7294 VME_SUBMAP(entry),
1c79356b
A
7295 sub_start,
7296 copy,
6d2010ae
A
7297 interruptible, pmap,
7298 TRUE);
1c79356b
A
7299 } else {
7300 kr = vm_map_copy_overwrite_nested(
3e170ce0 7301 VME_SUBMAP(entry),
1c79356b
A
7302 sub_start,
7303 copy,
7304 interruptible,
6d2010ae
A
7305 dst_map->pmap,
7306 TRUE);
1c79356b
A
7307 }
7308 if(kr != KERN_SUCCESS) {
7309 if(next_copy != NULL) {
2d21ac55
A
7310 copy->cpy_hdr.nentries +=
7311 remaining_entries;
7312 copy->cpy_hdr.links.prev->vme_next =
7313 next_copy;
7314 copy->cpy_hdr.links.prev
7315 = previous_prev;
7316 copy->size = total_size;
1c79356b
A
7317 }
7318 return kr;
7319 }
7320 if (dst_end <= local_end) {
7321 return(KERN_SUCCESS);
7322 }
7323 /* otherwise copy no longer exists, it was */
7324 /* destroyed after successful copy_overwrite */
7325 copy = (vm_map_copy_t)
2d21ac55 7326 zalloc(vm_map_copy_zone);
04b8595b 7327 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 7328 vm_map_copy_first_entry(copy) =
2d21ac55
A
7329 vm_map_copy_last_entry(copy) =
7330 vm_map_copy_to_entry(copy);
1c79356b
A
7331 copy->type = VM_MAP_COPY_ENTRY_LIST;
7332 copy->offset = new_offset;
7333
e2d2fc5c
A
7334 /*
7335 * XXX FBDP
7336 * this does not seem to deal with
7337 * the VM map store (R&B tree)
7338 */
7339
1c79356b
A
7340 total_size -= copy_size;
7341 copy_size = 0;
7342 /* put back remainder of copy in container */
7343 if(next_copy != NULL) {
2d21ac55
A
7344 copy->cpy_hdr.nentries = remaining_entries;
7345 copy->cpy_hdr.links.next = next_copy;
7346 copy->cpy_hdr.links.prev = previous_prev;
7347 copy->size = total_size;
7348 next_copy->vme_prev =
7349 vm_map_copy_to_entry(copy);
7350 next_copy = NULL;
1c79356b
A
7351 }
7352 base_addr = local_end;
7353 vm_map_lock(dst_map);
7354 if(!vm_map_lookup_entry(dst_map,
2d21ac55 7355 local_end, &tmp_entry)) {
1c79356b
A
7356 vm_map_unlock(dst_map);
7357 return(KERN_INVALID_ADDRESS);
7358 }
7359 entry = tmp_entry;
7360 continue;
7361 }
7362 if (dst_end <= entry->vme_end) {
7363 copy_size = dst_end - base_addr;
7364 break;
7365 }
7366
7367 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 7368 (next->vme_start != entry->vme_end)) {
1c79356b
A
7369 vm_map_unlock(dst_map);
7370 return(KERN_INVALID_ADDRESS);
7371 }
7372
7373 entry = next;
7374 }/* for */
7375
7376 next_copy = NULL;
7377 nentries = 1;
7378
7379 /* adjust the copy object */
7380 if (total_size > copy_size) {
91447636
A
7381 vm_map_size_t local_size = 0;
7382 vm_map_size_t entry_size;
1c79356b
A
7383
7384 new_offset = copy->offset;
7385 copy_entry = vm_map_copy_first_entry(copy);
7386 while(copy_entry != vm_map_copy_to_entry(copy)) {
7387 entry_size = copy_entry->vme_end -
2d21ac55 7388 copy_entry->vme_start;
1c79356b 7389 if((local_size < copy_size) &&
2d21ac55
A
7390 ((local_size + entry_size)
7391 >= copy_size)) {
1c79356b 7392 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
7393 copy_entry->vme_start +
7394 (copy_size - local_size));
1c79356b 7395 entry_size = copy_entry->vme_end -
2d21ac55 7396 copy_entry->vme_start;
1c79356b
A
7397 local_size += entry_size;
7398 new_offset += entry_size;
7399 }
7400 if(local_size >= copy_size) {
7401 next_copy = copy_entry->vme_next;
7402 copy_entry->vme_next =
7403 vm_map_copy_to_entry(copy);
7404 previous_prev =
7405 copy->cpy_hdr.links.prev;
7406 copy->cpy_hdr.links.prev = copy_entry;
7407 copy->size = copy_size;
7408 remaining_entries =
7409 copy->cpy_hdr.nentries;
7410 remaining_entries -= nentries;
7411 copy->cpy_hdr.nentries = nentries;
7412 break;
7413 } else {
7414 local_size += entry_size;
7415 new_offset += entry_size;
7416 nentries++;
7417 }
7418 copy_entry = copy_entry->vme_next;
7419 }
7420 }
7421
7422 if (aligned) {
7423 pmap_t local_pmap;
7424
7425 if(pmap)
7426 local_pmap = pmap;
7427 else
7428 local_pmap = dst_map->pmap;
7429
7430 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
7431 dst_map, tmp_entry, copy,
7432 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b
A
7433 if(next_copy != NULL) {
7434 copy->cpy_hdr.nentries +=
2d21ac55 7435 remaining_entries;
1c79356b 7436 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7437 next_copy;
1c79356b 7438 copy->cpy_hdr.links.prev =
2d21ac55 7439 previous_prev;
1c79356b
A
7440 copy->size += copy_size;
7441 }
7442 return kr;
7443 }
7444 vm_map_unlock(dst_map);
7445 } else {
2d21ac55
A
7446 /*
7447 * Performance gain:
7448 *
7449 * if the copy and dst address are misaligned but the same
7450 * offset within the page we can copy_not_aligned the
7451 * misaligned parts and copy aligned the rest. If they are
7452 * aligned but len is unaligned we simply need to copy
7453 * the end bit unaligned. We'll need to split the misaligned
7454 * bits of the region in this case !
7455 */
7456 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
7457 kr = vm_map_copy_overwrite_unaligned(
7458 dst_map,
7459 tmp_entry,
7460 copy,
7461 base_addr,
7462 discard_on_success);
7463 if (kr != KERN_SUCCESS) {
1c79356b
A
7464 if(next_copy != NULL) {
7465 copy->cpy_hdr.nentries +=
2d21ac55 7466 remaining_entries;
1c79356b 7467 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7468 next_copy;
1c79356b
A
7469 copy->cpy_hdr.links.prev =
7470 previous_prev;
7471 copy->size += copy_size;
7472 }
7473 return kr;
7474 }
7475 }
7476 total_size -= copy_size;
7477 if(total_size == 0)
7478 break;
7479 base_addr += copy_size;
7480 copy_size = 0;
7481 copy->offset = new_offset;
7482 if(next_copy != NULL) {
7483 copy->cpy_hdr.nentries = remaining_entries;
7484 copy->cpy_hdr.links.next = next_copy;
7485 copy->cpy_hdr.links.prev = previous_prev;
7486 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7487 copy->size = total_size;
7488 }
7489 vm_map_lock(dst_map);
7490 while(TRUE) {
7491 if (!vm_map_lookup_entry(dst_map,
2d21ac55 7492 base_addr, &tmp_entry)) {
1c79356b
A
7493 vm_map_unlock(dst_map);
7494 return(KERN_INVALID_ADDRESS);
7495 }
7496 if (tmp_entry->in_transition) {
7497 entry->needs_wakeup = TRUE;
7498 vm_map_entry_wait(dst_map, THREAD_UNINT);
7499 } else {
7500 break;
7501 }
7502 }
39236c6e
A
7503 vm_map_clip_start(dst_map,
7504 tmp_entry,
7505 vm_map_trunc_page(base_addr,
7506 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7507
7508 entry = tmp_entry;
7509 } /* while */
7510
7511 /*
7512 * Throw away the vm_map_copy object
7513 */
6d2010ae
A
7514 if (discard_on_success)
7515 vm_map_copy_discard(copy);
1c79356b
A
7516
7517 return(KERN_SUCCESS);
7518}/* vm_map_copy_overwrite */
7519
7520kern_return_t
7521vm_map_copy_overwrite(
7522 vm_map_t dst_map,
91447636 7523 vm_map_offset_t dst_addr,
1c79356b
A
7524 vm_map_copy_t copy,
7525 boolean_t interruptible)
7526{
6d2010ae
A
7527 vm_map_size_t head_size, tail_size;
7528 vm_map_copy_t head_copy, tail_copy;
7529 vm_map_offset_t head_addr, tail_addr;
7530 vm_map_entry_t entry;
7531 kern_return_t kr;
7532
7533 head_size = 0;
7534 tail_size = 0;
7535 head_copy = NULL;
7536 tail_copy = NULL;
7537 head_addr = 0;
7538 tail_addr = 0;
7539
7540 if (interruptible ||
7541 copy == VM_MAP_COPY_NULL ||
7542 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7543 /*
7544 * We can't split the "copy" map if we're interruptible
7545 * or if we don't have a "copy" map...
7546 */
7547 blunt_copy:
7548 return vm_map_copy_overwrite_nested(dst_map,
7549 dst_addr,
7550 copy,
7551 interruptible,
7552 (pmap_t) NULL,
7553 TRUE);
7554 }
7555
7556 if (copy->size < 3 * PAGE_SIZE) {
7557 /*
7558 * Too small to bother with optimizing...
7559 */
7560 goto blunt_copy;
7561 }
7562
39236c6e
A
7563 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7564 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6d2010ae
A
7565 /*
7566 * Incompatible mis-alignment of source and destination...
7567 */
7568 goto blunt_copy;
7569 }
7570
7571 /*
7572 * Proper alignment or identical mis-alignment at the beginning.
7573 * Let's try and do a small unaligned copy first (if needed)
7574 * and then an aligned copy for the rest.
7575 */
7576 if (!page_aligned(dst_addr)) {
7577 head_addr = dst_addr;
39236c6e
A
7578 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7579 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6d2010ae
A
7580 }
7581 if (!page_aligned(copy->offset + copy->size)) {
7582 /*
7583 * Mis-alignment at the end.
7584 * Do an aligned copy up to the last page and
7585 * then an unaligned copy for the remaining bytes.
7586 */
39236c6e
A
7587 tail_size = ((copy->offset + copy->size) &
7588 VM_MAP_PAGE_MASK(dst_map));
6d2010ae
A
7589 tail_addr = dst_addr + copy->size - tail_size;
7590 }
7591
7592 if (head_size + tail_size == copy->size) {
7593 /*
7594 * It's all unaligned, no optimization possible...
7595 */
7596 goto blunt_copy;
7597 }
7598
7599 /*
7600 * Can't optimize if there are any submaps in the
7601 * destination due to the way we free the "copy" map
7602 * progressively in vm_map_copy_overwrite_nested()
7603 * in that case.
7604 */
7605 vm_map_lock_read(dst_map);
7606 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7607 vm_map_unlock_read(dst_map);
7608 goto blunt_copy;
7609 }
7610 for (;
7611 (entry != vm_map_copy_to_entry(copy) &&
7612 entry->vme_start < dst_addr + copy->size);
7613 entry = entry->vme_next) {
7614 if (entry->is_sub_map) {
7615 vm_map_unlock_read(dst_map);
7616 goto blunt_copy;
7617 }
7618 }
7619 vm_map_unlock_read(dst_map);
7620
7621 if (head_size) {
7622 /*
7623 * Unaligned copy of the first "head_size" bytes, to reach
7624 * a page boundary.
7625 */
7626
7627 /*
7628 * Extract "head_copy" out of "copy".
7629 */
7630 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7631 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7632 vm_map_copy_first_entry(head_copy) =
7633 vm_map_copy_to_entry(head_copy);
7634 vm_map_copy_last_entry(head_copy) =
7635 vm_map_copy_to_entry(head_copy);
7636 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7637 head_copy->cpy_hdr.nentries = 0;
7638 head_copy->cpy_hdr.entries_pageable =
7639 copy->cpy_hdr.entries_pageable;
7640 vm_map_store_init(&head_copy->cpy_hdr);
7641
7642 head_copy->offset = copy->offset;
7643 head_copy->size = head_size;
7644
7645 copy->offset += head_size;
7646 copy->size -= head_size;
7647
7648 entry = vm_map_copy_first_entry(copy);
7649 vm_map_copy_clip_end(copy, entry, copy->offset);
7650 vm_map_copy_entry_unlink(copy, entry);
7651 vm_map_copy_entry_link(head_copy,
7652 vm_map_copy_to_entry(head_copy),
7653 entry);
7654
7655 /*
7656 * Do the unaligned copy.
7657 */
7658 kr = vm_map_copy_overwrite_nested(dst_map,
7659 head_addr,
7660 head_copy,
7661 interruptible,
7662 (pmap_t) NULL,
7663 FALSE);
7664 if (kr != KERN_SUCCESS)
7665 goto done;
7666 }
7667
7668 if (tail_size) {
7669 /*
7670 * Extract "tail_copy" out of "copy".
7671 */
7672 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7673 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7674 vm_map_copy_first_entry(tail_copy) =
7675 vm_map_copy_to_entry(tail_copy);
7676 vm_map_copy_last_entry(tail_copy) =
7677 vm_map_copy_to_entry(tail_copy);
7678 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7679 tail_copy->cpy_hdr.nentries = 0;
7680 tail_copy->cpy_hdr.entries_pageable =
7681 copy->cpy_hdr.entries_pageable;
7682 vm_map_store_init(&tail_copy->cpy_hdr);
7683
7684 tail_copy->offset = copy->offset + copy->size - tail_size;
7685 tail_copy->size = tail_size;
7686
7687 copy->size -= tail_size;
7688
7689 entry = vm_map_copy_last_entry(copy);
7690 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7691 entry = vm_map_copy_last_entry(copy);
7692 vm_map_copy_entry_unlink(copy, entry);
7693 vm_map_copy_entry_link(tail_copy,
7694 vm_map_copy_last_entry(tail_copy),
7695 entry);
7696 }
7697
7698 /*
7699 * Copy most (or possibly all) of the data.
7700 */
7701 kr = vm_map_copy_overwrite_nested(dst_map,
7702 dst_addr + head_size,
7703 copy,
7704 interruptible,
7705 (pmap_t) NULL,
7706 FALSE);
7707 if (kr != KERN_SUCCESS) {
7708 goto done;
7709 }
7710
7711 if (tail_size) {
7712 kr = vm_map_copy_overwrite_nested(dst_map,
7713 tail_addr,
7714 tail_copy,
7715 interruptible,
7716 (pmap_t) NULL,
7717 FALSE);
7718 }
7719
7720done:
7721 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7722 if (kr == KERN_SUCCESS) {
7723 /*
7724 * Discard all the copy maps.
7725 */
7726 if (head_copy) {
7727 vm_map_copy_discard(head_copy);
7728 head_copy = NULL;
7729 }
7730 vm_map_copy_discard(copy);
7731 if (tail_copy) {
7732 vm_map_copy_discard(tail_copy);
7733 tail_copy = NULL;
7734 }
7735 } else {
7736 /*
7737 * Re-assemble the original copy map.
7738 */
7739 if (head_copy) {
7740 entry = vm_map_copy_first_entry(head_copy);
7741 vm_map_copy_entry_unlink(head_copy, entry);
7742 vm_map_copy_entry_link(copy,
7743 vm_map_copy_to_entry(copy),
7744 entry);
7745 copy->offset -= head_size;
7746 copy->size += head_size;
7747 vm_map_copy_discard(head_copy);
7748 head_copy = NULL;
7749 }
7750 if (tail_copy) {
7751 entry = vm_map_copy_last_entry(tail_copy);
7752 vm_map_copy_entry_unlink(tail_copy, entry);
7753 vm_map_copy_entry_link(copy,
7754 vm_map_copy_last_entry(copy),
7755 entry);
7756 copy->size += tail_size;
7757 vm_map_copy_discard(tail_copy);
7758 tail_copy = NULL;
7759 }
7760 }
7761 return kr;
1c79356b
A
7762}
7763
7764
7765/*
91447636 7766 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
7767 *
7768 * Decription:
7769 * Physically copy unaligned data
7770 *
7771 * Implementation:
7772 * Unaligned parts of pages have to be physically copied. We use
7773 * a modified form of vm_fault_copy (which understands none-aligned
7774 * page offsets and sizes) to do the copy. We attempt to copy as
7775 * much memory in one go as possibly, however vm_fault_copy copies
7776 * within 1 memory object so we have to find the smaller of "amount left"
7777 * "source object data size" and "target object data size". With
7778 * unaligned data we don't need to split regions, therefore the source
7779 * (copy) object should be one map entry, the target range may be split
7780 * over multiple map entries however. In any event we are pessimistic
7781 * about these assumptions.
7782 *
7783 * Assumptions:
7784 * dst_map is locked on entry and is return locked on success,
7785 * unlocked on error.
7786 */
7787
91447636 7788static kern_return_t
1c79356b
A
7789vm_map_copy_overwrite_unaligned(
7790 vm_map_t dst_map,
7791 vm_map_entry_t entry,
7792 vm_map_copy_t copy,
39236c6e
A
7793 vm_map_offset_t start,
7794 boolean_t discard_on_success)
1c79356b 7795{
39236c6e
A
7796 vm_map_entry_t copy_entry;
7797 vm_map_entry_t copy_entry_next;
1c79356b
A
7798 vm_map_version_t version;
7799 vm_object_t dst_object;
7800 vm_object_offset_t dst_offset;
7801 vm_object_offset_t src_offset;
7802 vm_object_offset_t entry_offset;
91447636
A
7803 vm_map_offset_t entry_end;
7804 vm_map_size_t src_size,
1c79356b
A
7805 dst_size,
7806 copy_size,
7807 amount_left;
7808 kern_return_t kr = KERN_SUCCESS;
7809
39236c6e
A
7810
7811 copy_entry = vm_map_copy_first_entry(copy);
7812
1c79356b
A
7813 vm_map_lock_write_to_read(dst_map);
7814
91447636 7815 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
7816 amount_left = copy->size;
7817/*
7818 * unaligned so we never clipped this entry, we need the offset into
7819 * the vm_object not just the data.
7820 */
7821 while (amount_left > 0) {
7822
7823 if (entry == vm_map_to_entry(dst_map)) {
7824 vm_map_unlock_read(dst_map);
7825 return KERN_INVALID_ADDRESS;
7826 }
7827
7828 /* "start" must be within the current map entry */
7829 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7830
7831 dst_offset = start - entry->vme_start;
7832
7833 dst_size = entry->vme_end - start;
7834
7835 src_size = copy_entry->vme_end -
7836 (copy_entry->vme_start + src_offset);
7837
7838 if (dst_size < src_size) {
7839/*
7840 * we can only copy dst_size bytes before
7841 * we have to get the next destination entry
7842 */
7843 copy_size = dst_size;
7844 } else {
7845/*
7846 * we can only copy src_size bytes before
7847 * we have to get the next source copy entry
7848 */
7849 copy_size = src_size;
7850 }
7851
7852 if (copy_size > amount_left) {
7853 copy_size = amount_left;
7854 }
7855/*
7856 * Entry needs copy, create a shadow shadow object for
7857 * Copy on write region.
7858 */
7859 if (entry->needs_copy &&
2d21ac55 7860 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
7861 {
7862 if (vm_map_lock_read_to_write(dst_map)) {
7863 vm_map_lock_read(dst_map);
7864 goto RetryLookup;
7865 }
3e170ce0
A
7866 VME_OBJECT_SHADOW(entry,
7867 (vm_map_size_t)(entry->vme_end
7868 - entry->vme_start));
1c79356b
A
7869 entry->needs_copy = FALSE;
7870 vm_map_lock_write_to_read(dst_map);
7871 }
3e170ce0 7872 dst_object = VME_OBJECT(entry);
1c79356b
A
7873/*
7874 * unlike with the virtual (aligned) copy we're going
7875 * to fault on it therefore we need a target object.
7876 */
7877 if (dst_object == VM_OBJECT_NULL) {
7878 if (vm_map_lock_read_to_write(dst_map)) {
7879 vm_map_lock_read(dst_map);
7880 goto RetryLookup;
7881 }
91447636 7882 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 7883 entry->vme_end - entry->vme_start);
3e170ce0
A
7884 VME_OBJECT(entry) = dst_object;
7885 VME_OFFSET_SET(entry, 0);
fe8ab488 7886 assert(entry->use_pmap);
1c79356b
A
7887 vm_map_lock_write_to_read(dst_map);
7888 }
7889/*
7890 * Take an object reference and unlock map. The "entry" may
7891 * disappear or change when the map is unlocked.
7892 */
7893 vm_object_reference(dst_object);
7894 version.main_timestamp = dst_map->timestamp;
3e170ce0 7895 entry_offset = VME_OFFSET(entry);
1c79356b
A
7896 entry_end = entry->vme_end;
7897 vm_map_unlock_read(dst_map);
7898/*
7899 * Copy as much as possible in one pass
7900 */
7901 kr = vm_fault_copy(
3e170ce0
A
7902 VME_OBJECT(copy_entry),
7903 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
7904 &copy_size,
7905 dst_object,
7906 entry_offset + dst_offset,
7907 dst_map,
7908 &version,
7909 THREAD_UNINT );
7910
7911 start += copy_size;
7912 src_offset += copy_size;
7913 amount_left -= copy_size;
7914/*
7915 * Release the object reference
7916 */
7917 vm_object_deallocate(dst_object);
7918/*
7919 * If a hard error occurred, return it now
7920 */
7921 if (kr != KERN_SUCCESS)
7922 return kr;
7923
7924 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 7925 || amount_left == 0)
1c79356b
A
7926 {
7927/*
7928 * all done with this copy entry, dispose.
7929 */
39236c6e
A
7930 copy_entry_next = copy_entry->vme_next;
7931
7932 if (discard_on_success) {
7933 vm_map_copy_entry_unlink(copy, copy_entry);
7934 assert(!copy_entry->is_sub_map);
3e170ce0 7935 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
7936 vm_map_copy_entry_dispose(copy, copy_entry);
7937 }
1c79356b 7938
39236c6e
A
7939 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7940 amount_left) {
1c79356b
A
7941/*
7942 * not finished copying but run out of source
7943 */
7944 return KERN_INVALID_ADDRESS;
7945 }
39236c6e
A
7946
7947 copy_entry = copy_entry_next;
7948
1c79356b
A
7949 src_offset = 0;
7950 }
7951
7952 if (amount_left == 0)
7953 return KERN_SUCCESS;
7954
7955 vm_map_lock_read(dst_map);
7956 if (version.main_timestamp == dst_map->timestamp) {
7957 if (start == entry_end) {
7958/*
7959 * destination region is split. Use the version
7960 * information to avoid a lookup in the normal
7961 * case.
7962 */
7963 entry = entry->vme_next;
7964/*
7965 * should be contiguous. Fail if we encounter
7966 * a hole in the destination.
7967 */
7968 if (start != entry->vme_start) {
7969 vm_map_unlock_read(dst_map);
7970 return KERN_INVALID_ADDRESS ;
7971 }
7972 }
7973 } else {
7974/*
7975 * Map version check failed.
7976 * we must lookup the entry because somebody
7977 * might have changed the map behind our backs.
7978 */
2d21ac55 7979 RetryLookup:
1c79356b
A
7980 if (!vm_map_lookup_entry(dst_map, start, &entry))
7981 {
7982 vm_map_unlock_read(dst_map);
7983 return KERN_INVALID_ADDRESS ;
7984 }
7985 }
7986 }/* while */
7987
1c79356b
A
7988 return KERN_SUCCESS;
7989}/* vm_map_copy_overwrite_unaligned */
7990
7991/*
91447636 7992 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
7993 *
7994 * Description:
7995 * Does all the vm_trickery possible for whole pages.
7996 *
7997 * Implementation:
7998 *
7999 * If there are no permanent objects in the destination,
8000 * and the source and destination map entry zones match,
8001 * and the destination map entry is not shared,
8002 * then the map entries can be deleted and replaced
8003 * with those from the copy. The following code is the
8004 * basic idea of what to do, but there are lots of annoying
8005 * little details about getting protection and inheritance
8006 * right. Should add protection, inheritance, and sharing checks
8007 * to the above pass and make sure that no wiring is involved.
8008 */
8009
e2d2fc5c
A
8010int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8011int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8012int vm_map_copy_overwrite_aligned_src_large = 0;
8013
91447636 8014static kern_return_t
1c79356b
A
8015vm_map_copy_overwrite_aligned(
8016 vm_map_t dst_map,
8017 vm_map_entry_t tmp_entry,
8018 vm_map_copy_t copy,
91447636 8019 vm_map_offset_t start,
2d21ac55 8020 __unused pmap_t pmap)
1c79356b
A
8021{
8022 vm_object_t object;
8023 vm_map_entry_t copy_entry;
91447636
A
8024 vm_map_size_t copy_size;
8025 vm_map_size_t size;
1c79356b
A
8026 vm_map_entry_t entry;
8027
8028 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 8029 != vm_map_copy_to_entry(copy))
1c79356b
A
8030 {
8031 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8032
8033 entry = tmp_entry;
fe8ab488
A
8034 if (entry->is_sub_map) {
8035 /* unnested when clipped earlier */
8036 assert(!entry->use_pmap);
8037 }
1c79356b
A
8038 if (entry == vm_map_to_entry(dst_map)) {
8039 vm_map_unlock(dst_map);
8040 return KERN_INVALID_ADDRESS;
8041 }
8042 size = (entry->vme_end - entry->vme_start);
8043 /*
8044 * Make sure that no holes popped up in the
8045 * address map, and that the protection is
8046 * still valid, in case the map was unlocked
8047 * earlier.
8048 */
8049
8050 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 8051 && !entry->needs_copy)) {
1c79356b
A
8052 vm_map_unlock(dst_map);
8053 return(KERN_INVALID_ADDRESS);
8054 }
8055 assert(entry != vm_map_to_entry(dst_map));
8056
8057 /*
8058 * Check protection again
8059 */
8060
8061 if ( ! (entry->protection & VM_PROT_WRITE)) {
8062 vm_map_unlock(dst_map);
8063 return(KERN_PROTECTION_FAILURE);
8064 }
8065
8066 /*
8067 * Adjust to source size first
8068 */
8069
8070 if (copy_size < size) {
fe8ab488
A
8071 if (entry->map_aligned &&
8072 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8073 VM_MAP_PAGE_MASK(dst_map))) {
8074 /* no longer map-aligned */
8075 entry->map_aligned = FALSE;
8076 }
1c79356b
A
8077 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8078 size = copy_size;
8079 }
8080
8081 /*
8082 * Adjust to destination size
8083 */
8084
8085 if (size < copy_size) {
8086 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8087 copy_entry->vme_start + size);
1c79356b
A
8088 copy_size = size;
8089 }
8090
8091 assert((entry->vme_end - entry->vme_start) == size);
8092 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8093 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8094
8095 /*
8096 * If the destination contains temporary unshared memory,
8097 * we can perform the copy by throwing it away and
8098 * installing the source data.
8099 */
8100
3e170ce0 8101 object = VME_OBJECT(entry);
1c79356b 8102 if ((!entry->is_shared &&
2d21ac55
A
8103 ((object == VM_OBJECT_NULL) ||
8104 (object->internal && !object->true_share))) ||
1c79356b 8105 entry->needs_copy) {
3e170ce0
A
8106 vm_object_t old_object = VME_OBJECT(entry);
8107 vm_object_offset_t old_offset = VME_OFFSET(entry);
1c79356b
A
8108 vm_object_offset_t offset;
8109
8110 /*
8111 * Ensure that the source and destination aren't
8112 * identical
8113 */
3e170ce0
A
8114 if (old_object == VME_OBJECT(copy_entry) &&
8115 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
8116 vm_map_copy_entry_unlink(copy, copy_entry);
8117 vm_map_copy_entry_dispose(copy, copy_entry);
8118
8119 if (old_object != VM_OBJECT_NULL)
8120 vm_object_deallocate(old_object);
8121
8122 start = tmp_entry->vme_end;
8123 tmp_entry = tmp_entry->vme_next;
8124 continue;
8125 }
8126
e2d2fc5c
A
8127#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8128#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
8129 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8130 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
8131 copy_size <= __TRADEOFF1_COPY_SIZE) {
8132 /*
8133 * Virtual vs. Physical copy tradeoff #1.
8134 *
8135 * Copying only a few pages out of a large
8136 * object: do a physical copy instead of
8137 * a virtual copy, to avoid possibly keeping
8138 * the entire large object alive because of
8139 * those few copy-on-write pages.
8140 */
8141 vm_map_copy_overwrite_aligned_src_large++;
8142 goto slow_copy;
8143 }
e2d2fc5c 8144
3e170ce0
A
8145 if ((dst_map->pmap != kernel_pmap) &&
8146 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8147 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
ebb1b9f4
A
8148 vm_object_t new_object, new_shadow;
8149
8150 /*
8151 * We're about to map something over a mapping
8152 * established by malloc()...
8153 */
3e170ce0 8154 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
8155 if (new_object != VM_OBJECT_NULL) {
8156 vm_object_lock_shared(new_object);
8157 }
8158 while (new_object != VM_OBJECT_NULL &&
e2d2fc5c
A
8159 !new_object->true_share &&
8160 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
ebb1b9f4
A
8161 new_object->internal) {
8162 new_shadow = new_object->shadow;
8163 if (new_shadow == VM_OBJECT_NULL) {
8164 break;
8165 }
8166 vm_object_lock_shared(new_shadow);
8167 vm_object_unlock(new_object);
8168 new_object = new_shadow;
8169 }
8170 if (new_object != VM_OBJECT_NULL) {
8171 if (!new_object->internal) {
8172 /*
8173 * The new mapping is backed
8174 * by an external object. We
8175 * don't want malloc'ed memory
8176 * to be replaced with such a
8177 * non-anonymous mapping, so
8178 * let's go off the optimized
8179 * path...
8180 */
e2d2fc5c 8181 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
8182 vm_object_unlock(new_object);
8183 goto slow_copy;
8184 }
e2d2fc5c
A
8185 if (new_object->true_share ||
8186 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8187 /*
8188 * Same if there's a "true_share"
8189 * object in the shadow chain, or
8190 * an object with a non-default
8191 * (SYMMETRIC) copy strategy.
8192 */
8193 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8194 vm_object_unlock(new_object);
8195 goto slow_copy;
8196 }
ebb1b9f4
A
8197 vm_object_unlock(new_object);
8198 }
8199 /*
8200 * The new mapping is still backed by
8201 * anonymous (internal) memory, so it's
8202 * OK to substitute it for the original
8203 * malloc() mapping.
8204 */
8205 }
8206
1c79356b
A
8207 if (old_object != VM_OBJECT_NULL) {
8208 if(entry->is_sub_map) {
9bccf70c 8209 if(entry->use_pmap) {
0c530ab8 8210#ifndef NO_NESTED_PMAP
9bccf70c 8211 pmap_unnest(dst_map->pmap,
2d21ac55
A
8212 (addr64_t)entry->vme_start,
8213 entry->vme_end - entry->vme_start);
0c530ab8 8214#endif /* NO_NESTED_PMAP */
316670eb 8215 if(dst_map->mapped_in_other_pmaps) {
9bccf70c
A
8216 /* clean up parent */
8217 /* map/maps */
2d21ac55
A
8218 vm_map_submap_pmap_clean(
8219 dst_map, entry->vme_start,
8220 entry->vme_end,
3e170ce0
A
8221 VME_SUBMAP(entry),
8222 VME_OFFSET(entry));
9bccf70c
A
8223 }
8224 } else {
8225 vm_map_submap_pmap_clean(
8226 dst_map, entry->vme_start,
8227 entry->vme_end,
3e170ce0
A
8228 VME_SUBMAP(entry),
8229 VME_OFFSET(entry));
9bccf70c 8230 }
3e170ce0 8231 vm_map_deallocate(VME_SUBMAP(entry));
9bccf70c 8232 } else {
316670eb 8233 if(dst_map->mapped_in_other_pmaps) {
39236c6e 8234 vm_object_pmap_protect_options(
3e170ce0
A
8235 VME_OBJECT(entry),
8236 VME_OFFSET(entry),
9bccf70c 8237 entry->vme_end
2d21ac55 8238 - entry->vme_start,
9bccf70c
A
8239 PMAP_NULL,
8240 entry->vme_start,
39236c6e
A
8241 VM_PROT_NONE,
8242 PMAP_OPTIONS_REMOVE);
9bccf70c 8243 } else {
39236c6e
A
8244 pmap_remove_options(
8245 dst_map->pmap,
8246 (addr64_t)(entry->vme_start),
8247 (addr64_t)(entry->vme_end),
8248 PMAP_OPTIONS_REMOVE);
9bccf70c 8249 }
1c79356b 8250 vm_object_deallocate(old_object);
9bccf70c 8251 }
1c79356b
A
8252 }
8253
8254 entry->is_sub_map = FALSE;
3e170ce0
A
8255 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8256 object = VME_OBJECT(entry);
1c79356b
A
8257 entry->needs_copy = copy_entry->needs_copy;
8258 entry->wired_count = 0;
8259 entry->user_wired_count = 0;
3e170ce0
A
8260 offset = VME_OFFSET(copy_entry);
8261 VME_OFFSET_SET(entry, offset);
1c79356b
A
8262
8263 vm_map_copy_entry_unlink(copy, copy_entry);
8264 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 8265
1c79356b 8266 /*
2d21ac55 8267 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
8268 * this optimization only saved on average 2 us per page if ALL
8269 * the pages in the source were currently mapped
8270 * and ALL the pages in the dest were touched, if there were fewer
8271 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 8272 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
8273 */
8274
1c79356b
A
8275 /*
8276 * Set up for the next iteration. The map
8277 * has not been unlocked, so the next
8278 * address should be at the end of this
8279 * entry, and the next map entry should be
8280 * the one following it.
8281 */
8282
8283 start = tmp_entry->vme_end;
8284 tmp_entry = tmp_entry->vme_next;
8285 } else {
8286 vm_map_version_t version;
ebb1b9f4
A
8287 vm_object_t dst_object;
8288 vm_object_offset_t dst_offset;
1c79356b
A
8289 kern_return_t r;
8290
ebb1b9f4 8291 slow_copy:
e2d2fc5c 8292 if (entry->needs_copy) {
3e170ce0
A
8293 VME_OBJECT_SHADOW(entry,
8294 (entry->vme_end -
8295 entry->vme_start));
e2d2fc5c
A
8296 entry->needs_copy = FALSE;
8297 }
8298
3e170ce0
A
8299 dst_object = VME_OBJECT(entry);
8300 dst_offset = VME_OFFSET(entry);
ebb1b9f4 8301
1c79356b
A
8302 /*
8303 * Take an object reference, and record
8304 * the map version information so that the
8305 * map can be safely unlocked.
8306 */
8307
ebb1b9f4
A
8308 if (dst_object == VM_OBJECT_NULL) {
8309 /*
8310 * We would usually have just taken the
8311 * optimized path above if the destination
8312 * object has not been allocated yet. But we
8313 * now disable that optimization if the copy
8314 * entry's object is not backed by anonymous
8315 * memory to avoid replacing malloc'ed
8316 * (i.e. re-usable) anonymous memory with a
8317 * not-so-anonymous mapping.
8318 * So we have to handle this case here and
8319 * allocate a new VM object for this map entry.
8320 */
8321 dst_object = vm_object_allocate(
8322 entry->vme_end - entry->vme_start);
8323 dst_offset = 0;
3e170ce0
A
8324 VME_OBJECT_SET(entry, dst_object);
8325 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 8326 assert(entry->use_pmap);
ebb1b9f4
A
8327
8328 }
8329
1c79356b
A
8330 vm_object_reference(dst_object);
8331
9bccf70c
A
8332 /* account for unlock bumping up timestamp */
8333 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
8334
8335 vm_map_unlock(dst_map);
8336
8337 /*
8338 * Copy as much as possible in one pass
8339 */
8340
8341 copy_size = size;
8342 r = vm_fault_copy(
3e170ce0
A
8343 VME_OBJECT(copy_entry),
8344 VME_OFFSET(copy_entry),
2d21ac55
A
8345 &copy_size,
8346 dst_object,
8347 dst_offset,
8348 dst_map,
8349 &version,
8350 THREAD_UNINT );
1c79356b
A
8351
8352 /*
8353 * Release the object reference
8354 */
8355
8356 vm_object_deallocate(dst_object);
8357
8358 /*
8359 * If a hard error occurred, return it now
8360 */
8361
8362 if (r != KERN_SUCCESS)
8363 return(r);
8364
8365 if (copy_size != 0) {
8366 /*
8367 * Dispose of the copied region
8368 */
8369
8370 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8371 copy_entry->vme_start + copy_size);
1c79356b 8372 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 8373 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
8374 vm_map_copy_entry_dispose(copy, copy_entry);
8375 }
8376
8377 /*
8378 * Pick up in the destination map where we left off.
8379 *
8380 * Use the version information to avoid a lookup
8381 * in the normal case.
8382 */
8383
8384 start += copy_size;
8385 vm_map_lock(dst_map);
e2d2fc5c
A
8386 if (version.main_timestamp == dst_map->timestamp &&
8387 copy_size != 0) {
1c79356b
A
8388 /* We can safely use saved tmp_entry value */
8389
fe8ab488
A
8390 if (tmp_entry->map_aligned &&
8391 !VM_MAP_PAGE_ALIGNED(
8392 start,
8393 VM_MAP_PAGE_MASK(dst_map))) {
8394 /* no longer map-aligned */
8395 tmp_entry->map_aligned = FALSE;
8396 }
1c79356b
A
8397 vm_map_clip_end(dst_map, tmp_entry, start);
8398 tmp_entry = tmp_entry->vme_next;
8399 } else {
8400 /* Must do lookup of tmp_entry */
8401
8402 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8403 vm_map_unlock(dst_map);
8404 return(KERN_INVALID_ADDRESS);
8405 }
fe8ab488
A
8406 if (tmp_entry->map_aligned &&
8407 !VM_MAP_PAGE_ALIGNED(
8408 start,
8409 VM_MAP_PAGE_MASK(dst_map))) {
8410 /* no longer map-aligned */
8411 tmp_entry->map_aligned = FALSE;
8412 }
1c79356b
A
8413 vm_map_clip_start(dst_map, tmp_entry, start);
8414 }
8415 }
8416 }/* while */
8417
8418 return(KERN_SUCCESS);
8419}/* vm_map_copy_overwrite_aligned */
8420
8421/*
91447636 8422 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
8423 *
8424 * Description:
8425 * Copy in data to a kernel buffer from space in the
91447636 8426 * source map. The original space may be optionally
1c79356b
A
8427 * deallocated.
8428 *
8429 * If successful, returns a new copy object.
8430 */
91447636 8431static kern_return_t
1c79356b
A
8432vm_map_copyin_kernel_buffer(
8433 vm_map_t src_map,
91447636
A
8434 vm_map_offset_t src_addr,
8435 vm_map_size_t len,
1c79356b
A
8436 boolean_t src_destroy,
8437 vm_map_copy_t *copy_result)
8438{
91447636 8439 kern_return_t kr;
1c79356b 8440 vm_map_copy_t copy;
b0d623f7
A
8441 vm_size_t kalloc_size;
8442
3e170ce0
A
8443 if (len > msg_ool_size_small)
8444 return KERN_INVALID_ARGUMENT;
1c79356b 8445
3e170ce0
A
8446 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8447
8448 copy = (vm_map_copy_t)kalloc(kalloc_size);
8449 if (copy == VM_MAP_COPY_NULL)
1c79356b 8450 return KERN_RESOURCE_SHORTAGE;
1c79356b
A
8451 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8452 copy->size = len;
8453 copy->offset = 0;
1c79356b 8454
3e170ce0 8455 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
8456 if (kr != KERN_SUCCESS) {
8457 kfree(copy, kalloc_size);
8458 return kr;
1c79356b
A
8459 }
8460 if (src_destroy) {
39236c6e
A
8461 (void) vm_map_remove(
8462 src_map,
8463 vm_map_trunc_page(src_addr,
8464 VM_MAP_PAGE_MASK(src_map)),
8465 vm_map_round_page(src_addr + len,
8466 VM_MAP_PAGE_MASK(src_map)),
8467 (VM_MAP_REMOVE_INTERRUPTIBLE |
8468 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8469 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
1c79356b
A
8470 }
8471 *copy_result = copy;
8472 return KERN_SUCCESS;
8473}
8474
8475/*
91447636 8476 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
8477 *
8478 * Description:
8479 * Copy out data from a kernel buffer into space in the
8480 * destination map. The space may be otpionally dynamically
8481 * allocated.
8482 *
8483 * If successful, consumes the copy object.
8484 * Otherwise, the caller is responsible for it.
8485 */
91447636
A
8486static int vm_map_copyout_kernel_buffer_failures = 0;
8487static kern_return_t
1c79356b 8488vm_map_copyout_kernel_buffer(
91447636
A
8489 vm_map_t map,
8490 vm_map_address_t *addr, /* IN/OUT */
8491 vm_map_copy_t copy,
39236c6e
A
8492 boolean_t overwrite,
8493 boolean_t consume_on_success)
1c79356b
A
8494{
8495 kern_return_t kr = KERN_SUCCESS;
91447636 8496 thread_t thread = current_thread();
1c79356b 8497
3e170ce0
A
8498 /*
8499 * check for corrupted vm_map_copy structure
8500 */
8501 if (copy->size > msg_ool_size_small || copy->offset)
8502 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8503 (long long)copy->size, (long long)copy->offset);
8504
1c79356b
A
8505 if (!overwrite) {
8506
8507 /*
8508 * Allocate space in the target map for the data
8509 */
8510 *addr = 0;
8511 kr = vm_map_enter(map,
8512 addr,
39236c6e
A
8513 vm_map_round_page(copy->size,
8514 VM_MAP_PAGE_MASK(map)),
91447636
A
8515 (vm_map_offset_t) 0,
8516 VM_FLAGS_ANYWHERE,
1c79356b
A
8517 VM_OBJECT_NULL,
8518 (vm_object_offset_t) 0,
8519 FALSE,
8520 VM_PROT_DEFAULT,
8521 VM_PROT_ALL,
8522 VM_INHERIT_DEFAULT);
8523 if (kr != KERN_SUCCESS)
91447636 8524 return kr;
1c79356b
A
8525 }
8526
8527 /*
8528 * Copyout the data from the kernel buffer to the target map.
8529 */
91447636 8530 if (thread->map == map) {
1c79356b
A
8531
8532 /*
8533 * If the target map is the current map, just do
8534 * the copy.
8535 */
b0d623f7
A
8536 assert((vm_size_t) copy->size == copy->size);
8537 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
91447636 8538 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8539 }
8540 }
8541 else {
8542 vm_map_t oldmap;
8543
8544 /*
8545 * If the target map is another map, assume the
8546 * target's address space identity for the duration
8547 * of the copy.
8548 */
8549 vm_map_reference(map);
8550 oldmap = vm_map_switch(map);
8551
b0d623f7
A
8552 assert((vm_size_t) copy->size == copy->size);
8553 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
91447636
A
8554 vm_map_copyout_kernel_buffer_failures++;
8555 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8556 }
8557
8558 (void) vm_map_switch(oldmap);
8559 vm_map_deallocate(map);
8560 }
8561
91447636
A
8562 if (kr != KERN_SUCCESS) {
8563 /* the copy failed, clean up */
8564 if (!overwrite) {
8565 /*
8566 * Deallocate the space we allocated in the target map.
8567 */
39236c6e
A
8568 (void) vm_map_remove(
8569 map,
8570 vm_map_trunc_page(*addr,
8571 VM_MAP_PAGE_MASK(map)),
8572 vm_map_round_page((*addr +
8573 vm_map_round_page(copy->size,
8574 VM_MAP_PAGE_MASK(map))),
8575 VM_MAP_PAGE_MASK(map)),
8576 VM_MAP_NO_FLAGS);
91447636
A
8577 *addr = 0;
8578 }
8579 } else {
8580 /* copy was successful, dicard the copy structure */
39236c6e 8581 if (consume_on_success) {
3e170ce0 8582 kfree(copy, copy->size + cpy_kdata_hdr_sz);
39236c6e 8583 }
91447636 8584 }
1c79356b 8585
91447636 8586 return kr;
1c79356b
A
8587}
8588
8589/*
8590 * Macro: vm_map_copy_insert
8591 *
8592 * Description:
8593 * Link a copy chain ("copy") into a map at the
8594 * specified location (after "where").
8595 * Side effects:
8596 * The copy chain is destroyed.
8597 * Warning:
8598 * The arguments are evaluated multiple times.
8599 */
8600#define vm_map_copy_insert(map, where, copy) \
8601MACRO_BEGIN \
6d2010ae
A
8602 vm_map_store_copy_insert(map, where, copy); \
8603 zfree(vm_map_copy_zone, copy); \
1c79356b
A
8604MACRO_END
8605
39236c6e
A
8606void
8607vm_map_copy_remap(
8608 vm_map_t map,
8609 vm_map_entry_t where,
8610 vm_map_copy_t copy,
8611 vm_map_offset_t adjustment,
8612 vm_prot_t cur_prot,
8613 vm_prot_t max_prot,
8614 vm_inherit_t inheritance)
8615{
8616 vm_map_entry_t copy_entry, new_entry;
8617
8618 for (copy_entry = vm_map_copy_first_entry(copy);
8619 copy_entry != vm_map_copy_to_entry(copy);
8620 copy_entry = copy_entry->vme_next) {
8621 /* get a new VM map entry for the map */
8622 new_entry = vm_map_entry_create(map,
8623 !map->hdr.entries_pageable);
8624 /* copy the "copy entry" to the new entry */
8625 vm_map_entry_copy(new_entry, copy_entry);
8626 /* adjust "start" and "end" */
8627 new_entry->vme_start += adjustment;
8628 new_entry->vme_end += adjustment;
8629 /* clear some attributes */
8630 new_entry->inheritance = inheritance;
8631 new_entry->protection = cur_prot;
8632 new_entry->max_protection = max_prot;
8633 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8634 /* take an extra reference on the entry's "object" */
8635 if (new_entry->is_sub_map) {
fe8ab488 8636 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
8637 vm_map_lock(VME_SUBMAP(new_entry));
8638 vm_map_reference(VME_SUBMAP(new_entry));
8639 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 8640 } else {
3e170ce0 8641 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
8642 }
8643 /* insert the new entry in the map */
8644 vm_map_store_entry_link(map, where, new_entry);
8645 /* continue inserting the "copy entries" after the new entry */
8646 where = new_entry;
8647 }
8648}
8649
2dced7af
A
8650
8651boolean_t
8652vm_map_copy_validate_size(
8653 vm_map_t dst_map,
8654 vm_map_copy_t copy,
8655 vm_map_size_t size)
8656{
8657 if (copy == VM_MAP_COPY_NULL)
8658 return FALSE;
8659 switch (copy->type) {
8660 case VM_MAP_COPY_OBJECT:
8661 case VM_MAP_COPY_KERNEL_BUFFER:
8662 if (size == copy->size)
8663 return TRUE;
8664 break;
8665 case VM_MAP_COPY_ENTRY_LIST:
8666 /*
8667 * potential page-size rounding prevents us from exactly
8668 * validating this flavor of vm_map_copy, but we can at least
8669 * assert that it's within a range.
8670 */
8671 if (copy->size >= size &&
8672 copy->size <= vm_map_round_page(size,
8673 VM_MAP_PAGE_MASK(dst_map)))
8674 return TRUE;
8675 break;
8676 default:
8677 break;
8678 }
8679 return FALSE;
8680}
8681
8682
1c79356b
A
8683/*
8684 * Routine: vm_map_copyout
8685 *
8686 * Description:
8687 * Copy out a copy chain ("copy") into newly-allocated
8688 * space in the destination map.
8689 *
8690 * If successful, consumes the copy object.
8691 * Otherwise, the caller is responsible for it.
8692 */
39236c6e 8693
1c79356b
A
8694kern_return_t
8695vm_map_copyout(
91447636
A
8696 vm_map_t dst_map,
8697 vm_map_address_t *dst_addr, /* OUT */
8698 vm_map_copy_t copy)
39236c6e
A
8699{
8700 return vm_map_copyout_internal(dst_map, dst_addr, copy,
8701 TRUE, /* consume_on_success */
8702 VM_PROT_DEFAULT,
8703 VM_PROT_ALL,
8704 VM_INHERIT_DEFAULT);
8705}
8706
8707kern_return_t
8708vm_map_copyout_internal(
8709 vm_map_t dst_map,
8710 vm_map_address_t *dst_addr, /* OUT */
8711 vm_map_copy_t copy,
8712 boolean_t consume_on_success,
8713 vm_prot_t cur_protection,
8714 vm_prot_t max_protection,
8715 vm_inherit_t inheritance)
1c79356b 8716{
91447636
A
8717 vm_map_size_t size;
8718 vm_map_size_t adjustment;
8719 vm_map_offset_t start;
1c79356b
A
8720 vm_object_offset_t vm_copy_start;
8721 vm_map_entry_t last;
1c79356b 8722 vm_map_entry_t entry;
3e170ce0 8723 vm_map_entry_t hole_entry;
1c79356b
A
8724
8725 /*
8726 * Check for null copy object.
8727 */
8728
8729 if (copy == VM_MAP_COPY_NULL) {
8730 *dst_addr = 0;
8731 return(KERN_SUCCESS);
8732 }
8733
8734 /*
8735 * Check for special copy object, created
8736 * by vm_map_copyin_object.
8737 */
8738
8739 if (copy->type == VM_MAP_COPY_OBJECT) {
8740 vm_object_t object = copy->cpy_object;
8741 kern_return_t kr;
8742 vm_object_offset_t offset;
8743
91447636 8744 offset = vm_object_trunc_page(copy->offset);
39236c6e
A
8745 size = vm_map_round_page((copy->size +
8746 (vm_map_size_t)(copy->offset -
8747 offset)),
8748 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8749 *dst_addr = 0;
8750 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 8751 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
8752 object, offset, FALSE,
8753 VM_PROT_DEFAULT, VM_PROT_ALL,
8754 VM_INHERIT_DEFAULT);
8755 if (kr != KERN_SUCCESS)
8756 return(kr);
8757 /* Account for non-pagealigned copy object */
91447636 8758 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
39236c6e
A
8759 if (consume_on_success)
8760 zfree(vm_map_copy_zone, copy);
1c79356b
A
8761 return(KERN_SUCCESS);
8762 }
8763
8764 /*
8765 * Check for special kernel buffer allocated
8766 * by new_ipc_kmsg_copyin.
8767 */
8768
8769 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39236c6e
A
8770 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8771 copy, FALSE,
8772 consume_on_success);
1c79356b
A
8773 }
8774
39236c6e 8775
1c79356b
A
8776 /*
8777 * Find space for the data
8778 */
8779
39236c6e
A
8780 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8781 VM_MAP_COPY_PAGE_MASK(copy));
8782 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8783 VM_MAP_COPY_PAGE_MASK(copy))
2d21ac55 8784 - vm_copy_start;
1c79356b 8785
39236c6e 8786
2d21ac55 8787StartAgain: ;
1c79356b
A
8788
8789 vm_map_lock(dst_map);
6d2010ae
A
8790 if( dst_map->disable_vmentry_reuse == TRUE) {
8791 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8792 last = entry;
8793 } else {
3e170ce0
A
8794 if (dst_map->holelistenabled) {
8795 hole_entry = (vm_map_entry_t)dst_map->holes_list;
8796
8797 if (hole_entry == NULL) {
8798 /*
8799 * No more space in the map?
8800 */
8801 vm_map_unlock(dst_map);
8802 return(KERN_NO_SPACE);
8803 }
8804
8805 last = hole_entry;
8806 start = last->vme_start;
8807 } else {
8808 assert(first_free_is_valid(dst_map));
8809 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8810 vm_map_min(dst_map) : last->vme_end;
8811 }
39236c6e
A
8812 start = vm_map_round_page(start,
8813 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 8814 }
1c79356b
A
8815
8816 while (TRUE) {
8817 vm_map_entry_t next = last->vme_next;
91447636 8818 vm_map_offset_t end = start + size;
1c79356b
A
8819
8820 if ((end > dst_map->max_offset) || (end < start)) {
8821 if (dst_map->wait_for_space) {
8822 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8823 assert_wait((event_t) dst_map,
8824 THREAD_INTERRUPTIBLE);
8825 vm_map_unlock(dst_map);
91447636 8826 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
8827 goto StartAgain;
8828 }
8829 }
8830 vm_map_unlock(dst_map);
8831 return(KERN_NO_SPACE);
8832 }
8833
3e170ce0
A
8834 if (dst_map->holelistenabled) {
8835 if (last->vme_end >= end)
8836 break;
8837 } else {
8838 /*
8839 * If there are no more entries, we must win.
8840 *
8841 * OR
8842 *
8843 * If there is another entry, it must be
8844 * after the end of the potential new region.
8845 */
8846
8847 if (next == vm_map_to_entry(dst_map))
8848 break;
8849
8850 if (next->vme_start >= end)
8851 break;
8852 }
1c79356b
A
8853
8854 last = next;
3e170ce0
A
8855
8856 if (dst_map->holelistenabled) {
8857 if (last == (vm_map_entry_t) dst_map->holes_list) {
8858 /*
8859 * Wrapped around
8860 */
8861 vm_map_unlock(dst_map);
8862 return(KERN_NO_SPACE);
8863 }
8864 start = last->vme_start;
8865 } else {
8866 start = last->vme_end;
8867 }
39236c6e
A
8868 start = vm_map_round_page(start,
8869 VM_MAP_PAGE_MASK(dst_map));
8870 }
8871
3e170ce0
A
8872 if (dst_map->holelistenabled) {
8873 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
8874 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
8875 }
8876 }
8877
8878
39236c6e
A
8879 adjustment = start - vm_copy_start;
8880 if (! consume_on_success) {
8881 /*
8882 * We're not allowed to consume "copy", so we'll have to
8883 * copy its map entries into the destination map below.
8884 * No need to re-allocate map entries from the correct
8885 * (pageable or not) zone, since we'll get new map entries
8886 * during the transfer.
8887 * We'll also adjust the map entries's "start" and "end"
8888 * during the transfer, to keep "copy"'s entries consistent
8889 * with its "offset".
8890 */
8891 goto after_adjustments;
1c79356b
A
8892 }
8893
8894 /*
8895 * Since we're going to just drop the map
8896 * entries from the copy into the destination
8897 * map, they must come from the same pool.
8898 */
8899
8900 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
8901 /*
8902 * Mismatches occur when dealing with the default
8903 * pager.
8904 */
8905 zone_t old_zone;
8906 vm_map_entry_t next, new;
8907
8908 /*
8909 * Find the zone that the copies were allocated from
8910 */
7ddcb079 8911
2d21ac55
A
8912 entry = vm_map_copy_first_entry(copy);
8913
8914 /*
8915 * Reinitialize the copy so that vm_map_copy_entry_link
8916 * will work.
8917 */
6d2010ae 8918 vm_map_store_copy_reset(copy, entry);
2d21ac55 8919 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
8920
8921 /*
8922 * Copy each entry.
8923 */
8924 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 8925 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 8926 vm_map_entry_copy_full(new, entry);
fe8ab488
A
8927 assert(!new->iokit_acct);
8928 if (new->is_sub_map) {
8929 /* clr address space specifics */
8930 new->use_pmap = FALSE;
8931 }
2d21ac55
A
8932 vm_map_copy_entry_link(copy,
8933 vm_map_copy_last_entry(copy),
8934 new);
8935 next = entry->vme_next;
7ddcb079 8936 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
8937 zfree(old_zone, entry);
8938 entry = next;
8939 }
1c79356b
A
8940 }
8941
8942 /*
8943 * Adjust the addresses in the copy chain, and
8944 * reset the region attributes.
8945 */
8946
1c79356b
A
8947 for (entry = vm_map_copy_first_entry(copy);
8948 entry != vm_map_copy_to_entry(copy);
8949 entry = entry->vme_next) {
39236c6e
A
8950 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8951 /*
8952 * We're injecting this copy entry into a map that
8953 * has the standard page alignment, so clear
8954 * "map_aligned" (which might have been inherited
8955 * from the original map entry).
8956 */
8957 entry->map_aligned = FALSE;
8958 }
8959
1c79356b
A
8960 entry->vme_start += adjustment;
8961 entry->vme_end += adjustment;
8962
39236c6e
A
8963 if (entry->map_aligned) {
8964 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8965 VM_MAP_PAGE_MASK(dst_map)));
8966 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8967 VM_MAP_PAGE_MASK(dst_map)));
8968 }
8969
1c79356b
A
8970 entry->inheritance = VM_INHERIT_DEFAULT;
8971 entry->protection = VM_PROT_DEFAULT;
8972 entry->max_protection = VM_PROT_ALL;
8973 entry->behavior = VM_BEHAVIOR_DEFAULT;
8974
8975 /*
8976 * If the entry is now wired,
8977 * map the pages into the destination map.
8978 */
8979 if (entry->wired_count != 0) {
2d21ac55
A
8980 register vm_map_offset_t va;
8981 vm_object_offset_t offset;
8982 register vm_object_t object;
8983 vm_prot_t prot;
8984 int type_of_fault;
1c79356b 8985
3e170ce0
A
8986 object = VME_OBJECT(entry);
8987 offset = VME_OFFSET(entry);
2d21ac55 8988 va = entry->vme_start;
1c79356b 8989
2d21ac55
A
8990 pmap_pageable(dst_map->pmap,
8991 entry->vme_start,
8992 entry->vme_end,
8993 TRUE);
1c79356b 8994
2d21ac55
A
8995 while (va < entry->vme_end) {
8996 register vm_page_t m;
1c79356b 8997
2d21ac55
A
8998 /*
8999 * Look up the page in the object.
9000 * Assert that the page will be found in the
9001 * top object:
9002 * either
9003 * the object was newly created by
9004 * vm_object_copy_slowly, and has
9005 * copies of all of the pages from
9006 * the source object
9007 * or
9008 * the object was moved from the old
9009 * map entry; because the old map
9010 * entry was wired, all of the pages
9011 * were in the top-level object.
9012 * (XXX not true if we wire pages for
9013 * reading)
9014 */
9015 vm_object_lock(object);
91447636 9016
2d21ac55 9017 m = vm_page_lookup(object, offset);
b0d623f7 9018 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
9019 m->absent)
9020 panic("vm_map_copyout: wiring %p", m);
1c79356b 9021
2d21ac55
A
9022 /*
9023 * ENCRYPTED SWAP:
9024 * The page is assumed to be wired here, so it
9025 * shouldn't be encrypted. Otherwise, we
9026 * couldn't enter it in the page table, since
9027 * we don't want the user to see the encrypted
9028 * data.
9029 */
9030 ASSERT_PAGE_DECRYPTED(m);
1c79356b 9031
2d21ac55 9032 prot = entry->protection;
1c79356b 9033
3e170ce0
A
9034 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9035 prot)
2d21ac55 9036 prot |= VM_PROT_EXECUTE;
1c79356b 9037
2d21ac55 9038 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 9039
6d2010ae 9040 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
fe8ab488 9041 VM_PAGE_WIRED(m), FALSE, FALSE,
3e170ce0 9042 FALSE, VME_ALIAS(entry),
fe8ab488
A
9043 ((entry->iokit_acct ||
9044 (!entry->is_sub_map &&
9045 !entry->use_pmap))
9046 ? PMAP_OPTIONS_ALT_ACCT
9047 : 0),
9048 NULL, &type_of_fault);
1c79356b 9049
2d21ac55 9050 vm_object_unlock(object);
1c79356b 9051
2d21ac55
A
9052 offset += PAGE_SIZE_64;
9053 va += PAGE_SIZE;
1c79356b
A
9054 }
9055 }
9056 }
9057
39236c6e
A
9058after_adjustments:
9059
1c79356b
A
9060 /*
9061 * Correct the page alignment for the result
9062 */
9063
9064 *dst_addr = start + (copy->offset - vm_copy_start);
9065
9066 /*
9067 * Update the hints and the map size
9068 */
9069
39236c6e
A
9070 if (consume_on_success) {
9071 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9072 } else {
9073 SAVE_HINT_MAP_WRITE(dst_map, last);
9074 }
1c79356b
A
9075
9076 dst_map->size += size;
9077
9078 /*
9079 * Link in the copy
9080 */
9081
39236c6e
A
9082 if (consume_on_success) {
9083 vm_map_copy_insert(dst_map, last, copy);
9084 } else {
9085 vm_map_copy_remap(dst_map, last, copy, adjustment,
9086 cur_protection, max_protection,
9087 inheritance);
9088 }
1c79356b
A
9089
9090 vm_map_unlock(dst_map);
9091
9092 /*
9093 * XXX If wiring_required, call vm_map_pageable
9094 */
9095
9096 return(KERN_SUCCESS);
9097}
9098
1c79356b
A
9099/*
9100 * Routine: vm_map_copyin
9101 *
9102 * Description:
2d21ac55
A
9103 * see vm_map_copyin_common. Exported via Unsupported.exports.
9104 *
9105 */
9106
9107#undef vm_map_copyin
9108
9109kern_return_t
9110vm_map_copyin(
9111 vm_map_t src_map,
9112 vm_map_address_t src_addr,
9113 vm_map_size_t len,
9114 boolean_t src_destroy,
9115 vm_map_copy_t *copy_result) /* OUT */
9116{
9117 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9118 FALSE, copy_result, FALSE));
9119}
9120
9121/*
9122 * Routine: vm_map_copyin_common
9123 *
9124 * Description:
1c79356b
A
9125 * Copy the specified region (src_addr, len) from the
9126 * source address space (src_map), possibly removing
9127 * the region from the source address space (src_destroy).
9128 *
9129 * Returns:
9130 * A vm_map_copy_t object (copy_result), suitable for
9131 * insertion into another address space (using vm_map_copyout),
9132 * copying over another address space region (using
9133 * vm_map_copy_overwrite). If the copy is unused, it
9134 * should be destroyed (using vm_map_copy_discard).
9135 *
9136 * In/out conditions:
9137 * The source map should not be locked on entry.
9138 */
9139
9140typedef struct submap_map {
9141 vm_map_t parent_map;
91447636
A
9142 vm_map_offset_t base_start;
9143 vm_map_offset_t base_end;
2d21ac55 9144 vm_map_size_t base_len;
1c79356b
A
9145 struct submap_map *next;
9146} submap_map_t;
9147
9148kern_return_t
9149vm_map_copyin_common(
9150 vm_map_t src_map,
91447636
A
9151 vm_map_address_t src_addr,
9152 vm_map_size_t len,
1c79356b 9153 boolean_t src_destroy,
91447636 9154 __unused boolean_t src_volatile,
1c79356b
A
9155 vm_map_copy_t *copy_result, /* OUT */
9156 boolean_t use_maxprot)
4bd07ac2
A
9157{
9158 int flags;
9159
9160 flags = 0;
9161 if (src_destroy) {
9162 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9163 }
9164 if (use_maxprot) {
9165 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9166 }
9167 return vm_map_copyin_internal(src_map,
9168 src_addr,
9169 len,
9170 flags,
9171 copy_result);
9172}
9173kern_return_t
9174vm_map_copyin_internal(
9175 vm_map_t src_map,
9176 vm_map_address_t src_addr,
9177 vm_map_size_t len,
9178 int flags,
9179 vm_map_copy_t *copy_result) /* OUT */
1c79356b 9180{
1c79356b
A
9181 vm_map_entry_t tmp_entry; /* Result of last map lookup --
9182 * in multi-level lookup, this
9183 * entry contains the actual
9184 * vm_object/offset.
9185 */
1c79356b
A
9186 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
9187
91447636 9188 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
9189 * where copy is taking place now
9190 */
91447636 9191 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 9192 * copied */
2d21ac55 9193 vm_map_offset_t src_base;
91447636 9194 vm_map_t base_map = src_map;
1c79356b
A
9195 boolean_t map_share=FALSE;
9196 submap_map_t *parent_maps = NULL;
9197
1c79356b 9198 vm_map_copy_t copy; /* Resulting copy */
fe8ab488
A
9199 vm_map_address_t copy_addr;
9200 vm_map_size_t copy_size;
4bd07ac2
A
9201 boolean_t src_destroy;
9202 boolean_t use_maxprot;
9203
9204 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9205 return KERN_INVALID_ARGUMENT;
9206 }
9207
9208 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9209 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
1c79356b
A
9210
9211 /*
9212 * Check for copies of zero bytes.
9213 */
9214
9215 if (len == 0) {
9216 *copy_result = VM_MAP_COPY_NULL;
9217 return(KERN_SUCCESS);
9218 }
9219
4a249263
A
9220 /*
9221 * Check that the end address doesn't overflow
9222 */
9223 src_end = src_addr + len;
9224 if (src_end < src_addr)
9225 return KERN_INVALID_ADDRESS;
9226
1c79356b
A
9227 /*
9228 * If the copy is sufficiently small, use a kernel buffer instead
9229 * of making a virtual copy. The theory being that the cost of
9230 * setting up VM (and taking C-O-W faults) dominates the copy costs
9231 * for small regions.
9232 */
4bd07ac2
A
9233 if ((len < msg_ool_size_small) &&
9234 !use_maxprot &&
9235 !(flags & VM_MAP_COPYIN_ENTRY_LIST))
2d21ac55
A
9236 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9237 src_destroy, copy_result);
1c79356b
A
9238
9239 /*
4a249263 9240 * Compute (page aligned) start and end of region
1c79356b 9241 */
39236c6e
A
9242 src_start = vm_map_trunc_page(src_addr,
9243 VM_MAP_PAGE_MASK(src_map));
9244 src_end = vm_map_round_page(src_end,
9245 VM_MAP_PAGE_MASK(src_map));
1c79356b 9246
b0d623f7 9247 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 9248
1c79356b
A
9249 /*
9250 * Allocate a header element for the list.
9251 *
9252 * Use the start and end in the header to
9253 * remember the endpoints prior to rounding.
9254 */
9255
9256 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 9257 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 9258 vm_map_copy_first_entry(copy) =
2d21ac55 9259 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
9260 copy->type = VM_MAP_COPY_ENTRY_LIST;
9261 copy->cpy_hdr.nentries = 0;
9262 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
9263#if 00
9264 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9265#else
9266 /*
9267 * The copy entries can be broken down for a variety of reasons,
9268 * so we can't guarantee that they will remain map-aligned...
9269 * Will need to adjust the first copy_entry's "vme_start" and
9270 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9271 * rather than the original map's alignment.
9272 */
9273 copy->cpy_hdr.page_shift = PAGE_SHIFT;
9274#endif
1c79356b 9275
6d2010ae
A
9276 vm_map_store_init( &(copy->cpy_hdr) );
9277
1c79356b
A
9278 copy->offset = src_addr;
9279 copy->size = len;
9280
7ddcb079 9281 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b
A
9282
9283#define RETURN(x) \
9284 MACRO_BEGIN \
9285 vm_map_unlock(src_map); \
9bccf70c
A
9286 if(src_map != base_map) \
9287 vm_map_deallocate(src_map); \
1c79356b
A
9288 if (new_entry != VM_MAP_ENTRY_NULL) \
9289 vm_map_copy_entry_dispose(copy,new_entry); \
9290 vm_map_copy_discard(copy); \
9291 { \
91447636 9292 submap_map_t *_ptr; \
1c79356b 9293 \
91447636 9294 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 9295 parent_maps=parent_maps->next; \
91447636
A
9296 if (_ptr->parent_map != base_map) \
9297 vm_map_deallocate(_ptr->parent_map); \
9298 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
9299 } \
9300 } \
9301 MACRO_RETURN(x); \
9302 MACRO_END
9303
9304 /*
9305 * Find the beginning of the region.
9306 */
9307
9308 vm_map_lock(src_map);
9309
fe8ab488
A
9310 /*
9311 * Lookup the original "src_addr" rather than the truncated
9312 * "src_start", in case "src_start" falls in a non-map-aligned
9313 * map entry *before* the map entry that contains "src_addr"...
9314 */
9315 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
1c79356b
A
9316 RETURN(KERN_INVALID_ADDRESS);
9317 if(!tmp_entry->is_sub_map) {
fe8ab488
A
9318 /*
9319 * ... but clip to the map-rounded "src_start" rather than
9320 * "src_addr" to preserve map-alignment. We'll adjust the
9321 * first copy entry at the end, if needed.
9322 */
1c79356b
A
9323 vm_map_clip_start(src_map, tmp_entry, src_start);
9324 }
fe8ab488
A
9325 if (src_start < tmp_entry->vme_start) {
9326 /*
9327 * Move "src_start" up to the start of the
9328 * first map entry to copy.
9329 */
9330 src_start = tmp_entry->vme_start;
9331 }
1c79356b
A
9332 /* set for later submap fix-up */
9333 copy_addr = src_start;
9334
9335 /*
9336 * Go through entries until we get to the end.
9337 */
9338
9339 while (TRUE) {
9340 register
9341 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 9342 vm_map_size_t src_size; /* Size of source
1c79356b
A
9343 * map entry (in both
9344 * maps)
9345 */
9346
9347 register
9348 vm_object_t src_object; /* Object to copy */
9349 vm_object_offset_t src_offset;
9350
9351 boolean_t src_needs_copy; /* Should source map
9352 * be made read-only
9353 * for copy-on-write?
9354 */
9355
9356 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
9357
9358 boolean_t was_wired; /* Was source wired? */
9359 vm_map_version_t version; /* Version before locks
9360 * dropped to make copy
9361 */
9362 kern_return_t result; /* Return value from
9363 * copy_strategically.
9364 */
9365 while(tmp_entry->is_sub_map) {
91447636 9366 vm_map_size_t submap_len;
1c79356b
A
9367 submap_map_t *ptr;
9368
9369 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9370 ptr->next = parent_maps;
9371 parent_maps = ptr;
9372 ptr->parent_map = src_map;
9373 ptr->base_start = src_start;
9374 ptr->base_end = src_end;
9375 submap_len = tmp_entry->vme_end - src_start;
9376 if(submap_len > (src_end-src_start))
9377 submap_len = src_end-src_start;
2d21ac55 9378 ptr->base_len = submap_len;
1c79356b
A
9379
9380 src_start -= tmp_entry->vme_start;
3e170ce0 9381 src_start += VME_OFFSET(tmp_entry);
1c79356b 9382 src_end = src_start + submap_len;
3e170ce0 9383 src_map = VME_SUBMAP(tmp_entry);
1c79356b 9384 vm_map_lock(src_map);
9bccf70c
A
9385 /* keep an outstanding reference for all maps in */
9386 /* the parents tree except the base map */
9387 vm_map_reference(src_map);
1c79356b
A
9388 vm_map_unlock(ptr->parent_map);
9389 if (!vm_map_lookup_entry(
2d21ac55 9390 src_map, src_start, &tmp_entry))
1c79356b
A
9391 RETURN(KERN_INVALID_ADDRESS);
9392 map_share = TRUE;
9393 if(!tmp_entry->is_sub_map)
2d21ac55 9394 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
9395 src_entry = tmp_entry;
9396 }
2d21ac55
A
9397 /* we are now in the lowest level submap... */
9398
3e170ce0
A
9399 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9400 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
9401 /* This is not, supported for now.In future */
9402 /* we will need to detect the phys_contig */
9403 /* condition and then upgrade copy_slowly */
9404 /* to do physical copy from the device mem */
9405 /* based object. We can piggy-back off of */
9406 /* the was wired boolean to set-up the */
9407 /* proper handling */
0b4e3aa0
A
9408 RETURN(KERN_PROTECTION_FAILURE);
9409 }
1c79356b
A
9410 /*
9411 * Create a new address map entry to hold the result.
9412 * Fill in the fields from the appropriate source entries.
9413 * We must unlock the source map to do this if we need
9414 * to allocate a map entry.
9415 */
9416 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
9417 version.main_timestamp = src_map->timestamp;
9418 vm_map_unlock(src_map);
1c79356b 9419
7ddcb079 9420 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 9421
2d21ac55
A
9422 vm_map_lock(src_map);
9423 if ((version.main_timestamp + 1) != src_map->timestamp) {
9424 if (!vm_map_lookup_entry(src_map, src_start,
9425 &tmp_entry)) {
9426 RETURN(KERN_INVALID_ADDRESS);
9427 }
9428 if (!tmp_entry->is_sub_map)
9429 vm_map_clip_start(src_map, tmp_entry, src_start);
9430 continue; /* restart w/ new tmp_entry */
1c79356b 9431 }
1c79356b
A
9432 }
9433
9434 /*
9435 * Verify that the region can be read.
9436 */
9437 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 9438 !use_maxprot) ||
1c79356b
A
9439 (src_entry->max_protection & VM_PROT_READ) == 0)
9440 RETURN(KERN_PROTECTION_FAILURE);
9441
9442 /*
9443 * Clip against the endpoints of the entire region.
9444 */
9445
9446 vm_map_clip_end(src_map, src_entry, src_end);
9447
9448 src_size = src_entry->vme_end - src_start;
3e170ce0
A
9449 src_object = VME_OBJECT(src_entry);
9450 src_offset = VME_OFFSET(src_entry);
1c79356b
A
9451 was_wired = (src_entry->wired_count != 0);
9452
9453 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
9454 if (new_entry->is_sub_map) {
9455 /* clr address space specifics */
9456 new_entry->use_pmap = FALSE;
9457 }
1c79356b
A
9458
9459 /*
9460 * Attempt non-blocking copy-on-write optimizations.
9461 */
9462
9463 if (src_destroy &&
9464 (src_object == VM_OBJECT_NULL ||
2d21ac55
A
9465 (src_object->internal && !src_object->true_share
9466 && !map_share))) {
9467 /*
9468 * If we are destroying the source, and the object
9469 * is internal, we can move the object reference
9470 * from the source to the copy. The copy is
9471 * copy-on-write only if the source is.
9472 * We make another reference to the object, because
9473 * destroying the source entry will deallocate it.
9474 */
9475 vm_object_reference(src_object);
1c79356b 9476
2d21ac55
A
9477 /*
9478 * Copy is always unwired. vm_map_copy_entry
9479 * set its wired count to zero.
9480 */
1c79356b 9481
2d21ac55 9482 goto CopySuccessful;
1c79356b
A
9483 }
9484
9485
2d21ac55 9486 RestartCopy:
1c79356b 9487 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
3e170ce0 9488 src_object, new_entry, VME_OBJECT(new_entry),
1c79356b 9489 was_wired, 0);
55e303ae 9490 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
9491 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9492 vm_object_copy_quickly(
3e170ce0 9493 &VME_OBJECT(new_entry),
2d21ac55
A
9494 src_offset,
9495 src_size,
9496 &src_needs_copy,
9497 &new_entry_needs_copy)) {
1c79356b
A
9498
9499 new_entry->needs_copy = new_entry_needs_copy;
9500
9501 /*
9502 * Handle copy-on-write obligations
9503 */
9504
9505 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
9506 vm_prot_t prot;
9507
9508 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 9509
3e170ce0
A
9510 if (override_nx(src_map, VME_ALIAS(src_entry))
9511 && prot)
0c530ab8 9512 prot |= VM_PROT_EXECUTE;
2d21ac55 9513
55e303ae
A
9514 vm_object_pmap_protect(
9515 src_object,
9516 src_offset,
9517 src_size,
9518 (src_entry->is_shared ?
2d21ac55
A
9519 PMAP_NULL
9520 : src_map->pmap),
55e303ae 9521 src_entry->vme_start,
0c530ab8
A
9522 prot);
9523
3e170ce0 9524 assert(tmp_entry->wired_count == 0);
55e303ae 9525 tmp_entry->needs_copy = TRUE;
1c79356b
A
9526 }
9527
9528 /*
9529 * The map has never been unlocked, so it's safe
9530 * to move to the next entry rather than doing
9531 * another lookup.
9532 */
9533
9534 goto CopySuccessful;
9535 }
9536
1c79356b
A
9537 /*
9538 * Take an object reference, so that we may
9539 * release the map lock(s).
9540 */
9541
9542 assert(src_object != VM_OBJECT_NULL);
9543 vm_object_reference(src_object);
9544
9545 /*
9546 * Record the timestamp for later verification.
9547 * Unlock the map.
9548 */
9549
9550 version.main_timestamp = src_map->timestamp;
9bccf70c 9551 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
9552
9553 /*
9554 * Perform the copy
9555 */
9556
9557 if (was_wired) {
55e303ae 9558 CopySlowly:
1c79356b
A
9559 vm_object_lock(src_object);
9560 result = vm_object_copy_slowly(
2d21ac55
A
9561 src_object,
9562 src_offset,
9563 src_size,
9564 THREAD_UNINT,
3e170ce0
A
9565 &VME_OBJECT(new_entry));
9566 VME_OFFSET_SET(new_entry, 0);
1c79356b 9567 new_entry->needs_copy = FALSE;
55e303ae
A
9568
9569 }
9570 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
2d21ac55 9571 (tmp_entry->is_shared || map_share)) {
55e303ae
A
9572 vm_object_t new_object;
9573
2d21ac55 9574 vm_object_lock_shared(src_object);
55e303ae 9575 new_object = vm_object_copy_delayed(
2d21ac55
A
9576 src_object,
9577 src_offset,
9578 src_size,
9579 TRUE);
55e303ae
A
9580 if (new_object == VM_OBJECT_NULL)
9581 goto CopySlowly;
9582
3e170ce0
A
9583 VME_OBJECT_SET(new_entry, new_object);
9584 assert(new_entry->wired_count == 0);
55e303ae 9585 new_entry->needs_copy = TRUE;
fe8ab488
A
9586 assert(!new_entry->iokit_acct);
9587 assert(new_object->purgable == VM_PURGABLE_DENY);
9588 new_entry->use_pmap = TRUE;
55e303ae
A
9589 result = KERN_SUCCESS;
9590
1c79356b 9591 } else {
3e170ce0
A
9592 vm_object_offset_t new_offset;
9593 new_offset = VME_OFFSET(new_entry);
1c79356b 9594 result = vm_object_copy_strategically(src_object,
2d21ac55
A
9595 src_offset,
9596 src_size,
3e170ce0
A
9597 &VME_OBJECT(new_entry),
9598 &new_offset,
2d21ac55 9599 &new_entry_needs_copy);
3e170ce0
A
9600 if (new_offset != VME_OFFSET(new_entry)) {
9601 VME_OFFSET_SET(new_entry, new_offset);
9602 }
1c79356b
A
9603
9604 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
9605 }
9606
9607 if (result != KERN_SUCCESS &&
9608 result != KERN_MEMORY_RESTART_COPY) {
9609 vm_map_lock(src_map);
9610 RETURN(result);
9611 }
9612
9613 /*
9614 * Throw away the extra reference
9615 */
9616
9617 vm_object_deallocate(src_object);
9618
9619 /*
9620 * Verify that the map has not substantially
9621 * changed while the copy was being made.
9622 */
9623
9bccf70c 9624 vm_map_lock(src_map);
1c79356b
A
9625
9626 if ((version.main_timestamp + 1) == src_map->timestamp)
9627 goto VerificationSuccessful;
9628
9629 /*
9630 * Simple version comparison failed.
9631 *
9632 * Retry the lookup and verify that the
9633 * same object/offset are still present.
9634 *
9635 * [Note: a memory manager that colludes with
9636 * the calling task can detect that we have
9637 * cheated. While the map was unlocked, the
9638 * mapping could have been changed and restored.]
9639 */
9640
9641 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 9642 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
9643 vm_object_deallocate(VME_OBJECT(new_entry));
9644 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
fe8ab488
A
9645 assert(!new_entry->iokit_acct);
9646 new_entry->use_pmap = TRUE;
9647 }
1c79356b
A
9648 RETURN(KERN_INVALID_ADDRESS);
9649 }
9650
9651 src_entry = tmp_entry;
9652 vm_map_clip_start(src_map, src_entry, src_start);
9653
91447636
A
9654 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9655 !use_maxprot) ||
9656 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
9657 goto VerificationFailed;
9658
39236c6e
A
9659 if (src_entry->vme_end < new_entry->vme_end) {
9660 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9661 VM_MAP_COPY_PAGE_MASK(copy)));
9662 new_entry->vme_end = src_entry->vme_end;
9663 src_size = new_entry->vme_end - src_start;
9664 }
1c79356b 9665
3e170ce0
A
9666 if ((VME_OBJECT(src_entry) != src_object) ||
9667 (VME_OFFSET(src_entry) != src_offset) ) {
1c79356b
A
9668
9669 /*
9670 * Verification failed.
9671 *
9672 * Start over with this top-level entry.
9673 */
9674
2d21ac55 9675 VerificationFailed: ;
1c79356b 9676
3e170ce0 9677 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
9678 tmp_entry = src_entry;
9679 continue;
9680 }
9681
9682 /*
9683 * Verification succeeded.
9684 */
9685
2d21ac55 9686 VerificationSuccessful: ;
1c79356b
A
9687
9688 if (result == KERN_MEMORY_RESTART_COPY)
9689 goto RestartCopy;
9690
9691 /*
9692 * Copy succeeded.
9693 */
9694
2d21ac55 9695 CopySuccessful: ;
1c79356b
A
9696
9697 /*
9698 * Link in the new copy entry.
9699 */
9700
9701 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9702 new_entry);
9703
9704 /*
9705 * Determine whether the entire region
9706 * has been copied.
9707 */
2d21ac55 9708 src_base = src_start;
1c79356b
A
9709 src_start = new_entry->vme_end;
9710 new_entry = VM_MAP_ENTRY_NULL;
9711 while ((src_start >= src_end) && (src_end != 0)) {
fe8ab488
A
9712 submap_map_t *ptr;
9713
9714 if (src_map == base_map) {
9715 /* back to the top */
1c79356b 9716 break;
fe8ab488
A
9717 }
9718
9719 ptr = parent_maps;
9720 assert(ptr != NULL);
9721 parent_maps = parent_maps->next;
9722
9723 /* fix up the damage we did in that submap */
9724 vm_map_simplify_range(src_map,
9725 src_base,
9726 src_end);
9727
9728 vm_map_unlock(src_map);
9729 vm_map_deallocate(src_map);
9730 vm_map_lock(ptr->parent_map);
9731 src_map = ptr->parent_map;
9732 src_base = ptr->base_start;
9733 src_start = ptr->base_start + ptr->base_len;
9734 src_end = ptr->base_end;
9735 if (!vm_map_lookup_entry(src_map,
9736 src_start,
9737 &tmp_entry) &&
9738 (src_end > src_start)) {
9739 RETURN(KERN_INVALID_ADDRESS);
9740 }
9741 kfree(ptr, sizeof(submap_map_t));
9742 if (parent_maps == NULL)
9743 map_share = FALSE;
9744 src_entry = tmp_entry->vme_prev;
9745 }
9746
9747 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9748 (src_start >= src_addr + len) &&
9749 (src_addr + len != 0)) {
9750 /*
9751 * Stop copying now, even though we haven't reached
9752 * "src_end". We'll adjust the end of the last copy
9753 * entry at the end, if needed.
9754 *
9755 * If src_map's aligment is different from the
9756 * system's page-alignment, there could be
9757 * extra non-map-aligned map entries between
9758 * the original (non-rounded) "src_addr + len"
9759 * and the rounded "src_end".
9760 * We do not want to copy those map entries since
9761 * they're not part of the copied range.
9762 */
9763 break;
1c79356b 9764 }
fe8ab488 9765
1c79356b
A
9766 if ((src_start >= src_end) && (src_end != 0))
9767 break;
9768
9769 /*
9770 * Verify that there are no gaps in the region
9771 */
9772
9773 tmp_entry = src_entry->vme_next;
fe8ab488 9774 if ((tmp_entry->vme_start != src_start) ||
39236c6e 9775 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 9776 RETURN(KERN_INVALID_ADDRESS);
39236c6e 9777 }
1c79356b
A
9778 }
9779
9780 /*
9781 * If the source should be destroyed, do it now, since the
9782 * copy was successful.
9783 */
9784 if (src_destroy) {
39236c6e
A
9785 (void) vm_map_delete(
9786 src_map,
9787 vm_map_trunc_page(src_addr,
9788 VM_MAP_PAGE_MASK(src_map)),
9789 src_end,
9790 ((src_map == kernel_map) ?
9791 VM_MAP_REMOVE_KUNWIRE :
9792 VM_MAP_NO_FLAGS),
9793 VM_MAP_NULL);
2d21ac55
A
9794 } else {
9795 /* fix up the damage we did in the base map */
39236c6e
A
9796 vm_map_simplify_range(
9797 src_map,
9798 vm_map_trunc_page(src_addr,
9799 VM_MAP_PAGE_MASK(src_map)),
9800 vm_map_round_page(src_end,
9801 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
9802 }
9803
9804 vm_map_unlock(src_map);
9805
39236c6e 9806 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488
A
9807 vm_map_offset_t original_start, original_offset, original_end;
9808
39236c6e
A
9809 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9810
9811 /* adjust alignment of first copy_entry's "vme_start" */
9812 tmp_entry = vm_map_copy_first_entry(copy);
9813 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9814 vm_map_offset_t adjustment;
fe8ab488
A
9815
9816 original_start = tmp_entry->vme_start;
3e170ce0 9817 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
9818
9819 /* map-align the start of the first copy entry... */
9820 adjustment = (tmp_entry->vme_start -
9821 vm_map_trunc_page(
9822 tmp_entry->vme_start,
9823 VM_MAP_PAGE_MASK(src_map)));
9824 tmp_entry->vme_start -= adjustment;
3e170ce0
A
9825 VME_OFFSET_SET(tmp_entry,
9826 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
9827 copy_addr -= adjustment;
9828 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9829 /* ... adjust for mis-aligned start of copy range */
39236c6e
A
9830 adjustment =
9831 (vm_map_trunc_page(copy->offset,
9832 PAGE_MASK) -
9833 vm_map_trunc_page(copy->offset,
9834 VM_MAP_PAGE_MASK(src_map)));
9835 if (adjustment) {
9836 assert(page_aligned(adjustment));
9837 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9838 tmp_entry->vme_start += adjustment;
3e170ce0
A
9839 VME_OFFSET_SET(tmp_entry,
9840 (VME_OFFSET(tmp_entry) +
9841 adjustment));
39236c6e
A
9842 copy_addr += adjustment;
9843 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9844 }
fe8ab488
A
9845
9846 /*
9847 * Assert that the adjustments haven't exposed
9848 * more than was originally copied...
9849 */
9850 assert(tmp_entry->vme_start >= original_start);
3e170ce0 9851 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
9852 /*
9853 * ... and that it did not adjust outside of a
9854 * a single 16K page.
9855 */
9856 assert(vm_map_trunc_page(tmp_entry->vme_start,
9857 VM_MAP_PAGE_MASK(src_map)) ==
9858 vm_map_trunc_page(original_start,
9859 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
9860 }
9861
9862 /* adjust alignment of last copy_entry's "vme_end" */
9863 tmp_entry = vm_map_copy_last_entry(copy);
9864 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9865 vm_map_offset_t adjustment;
fe8ab488
A
9866
9867 original_end = tmp_entry->vme_end;
9868
9869 /* map-align the end of the last copy entry... */
9870 tmp_entry->vme_end =
9871 vm_map_round_page(tmp_entry->vme_end,
9872 VM_MAP_PAGE_MASK(src_map));
9873 /* ... adjust for mis-aligned end of copy range */
39236c6e
A
9874 adjustment =
9875 (vm_map_round_page((copy->offset +
9876 copy->size),
9877 VM_MAP_PAGE_MASK(src_map)) -
9878 vm_map_round_page((copy->offset +
9879 copy->size),
9880 PAGE_MASK));
9881 if (adjustment) {
9882 assert(page_aligned(adjustment));
9883 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9884 tmp_entry->vme_end -= adjustment;
9885 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9886 }
fe8ab488
A
9887
9888 /*
9889 * Assert that the adjustments haven't exposed
9890 * more than was originally copied...
9891 */
9892 assert(tmp_entry->vme_end <= original_end);
9893 /*
9894 * ... and that it did not adjust outside of a
9895 * a single 16K page.
9896 */
9897 assert(vm_map_round_page(tmp_entry->vme_end,
9898 VM_MAP_PAGE_MASK(src_map)) ==
9899 vm_map_round_page(original_end,
9900 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
9901 }
9902 }
9903
1c79356b
A
9904 /* Fix-up start and end points in copy. This is necessary */
9905 /* when the various entries in the copy object were picked */
9906 /* up from different sub-maps */
9907
9908 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 9909 copy_size = 0; /* compute actual size */
1c79356b 9910 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e
A
9911 assert(VM_MAP_PAGE_ALIGNED(
9912 copy_addr + (tmp_entry->vme_end -
9913 tmp_entry->vme_start),
9914 VM_MAP_COPY_PAGE_MASK(copy)));
9915 assert(VM_MAP_PAGE_ALIGNED(
9916 copy_addr,
9917 VM_MAP_COPY_PAGE_MASK(copy)));
9918
9919 /*
9920 * The copy_entries will be injected directly into the
9921 * destination map and might not be "map aligned" there...
9922 */
9923 tmp_entry->map_aligned = FALSE;
9924
1c79356b
A
9925 tmp_entry->vme_end = copy_addr +
9926 (tmp_entry->vme_end - tmp_entry->vme_start);
9927 tmp_entry->vme_start = copy_addr;
e2d2fc5c 9928 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 9929 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 9930 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
9931 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9932 }
9933
fe8ab488
A
9934 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9935 copy_size < copy->size) {
9936 /*
9937 * The actual size of the VM map copy is smaller than what
9938 * was requested by the caller. This must be because some
9939 * PAGE_SIZE-sized pages are missing at the end of the last
9940 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9941 * The caller might not have been aware of those missing
9942 * pages and might not want to be aware of it, which is
9943 * fine as long as they don't try to access (and crash on)
9944 * those missing pages.
9945 * Let's adjust the size of the "copy", to avoid failing
9946 * in vm_map_copyout() or vm_map_copy_overwrite().
9947 */
9948 assert(vm_map_round_page(copy_size,
9949 VM_MAP_PAGE_MASK(src_map)) ==
9950 vm_map_round_page(copy->size,
9951 VM_MAP_PAGE_MASK(src_map)));
9952 copy->size = copy_size;
9953 }
9954
1c79356b
A
9955 *copy_result = copy;
9956 return(KERN_SUCCESS);
9957
9958#undef RETURN
9959}
9960
39236c6e
A
9961kern_return_t
9962vm_map_copy_extract(
9963 vm_map_t src_map,
9964 vm_map_address_t src_addr,
9965 vm_map_size_t len,
9966 vm_map_copy_t *copy_result, /* OUT */
9967 vm_prot_t *cur_prot, /* OUT */
9968 vm_prot_t *max_prot)
9969{
9970 vm_map_offset_t src_start, src_end;
9971 vm_map_copy_t copy;
9972 kern_return_t kr;
9973
9974 /*
9975 * Check for copies of zero bytes.
9976 */
9977
9978 if (len == 0) {
9979 *copy_result = VM_MAP_COPY_NULL;
9980 return(KERN_SUCCESS);
9981 }
9982
9983 /*
9984 * Check that the end address doesn't overflow
9985 */
9986 src_end = src_addr + len;
9987 if (src_end < src_addr)
9988 return KERN_INVALID_ADDRESS;
9989
9990 /*
9991 * Compute (page aligned) start and end of region
9992 */
9993 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
9994 src_end = vm_map_round_page(src_end, PAGE_MASK);
9995
9996 /*
9997 * Allocate a header element for the list.
9998 *
9999 * Use the start and end in the header to
10000 * remember the endpoints prior to rounding.
10001 */
10002
10003 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10004 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
39236c6e
A
10005 vm_map_copy_first_entry(copy) =
10006 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10007 copy->type = VM_MAP_COPY_ENTRY_LIST;
10008 copy->cpy_hdr.nentries = 0;
10009 copy->cpy_hdr.entries_pageable = TRUE;
10010
10011 vm_map_store_init(&copy->cpy_hdr);
10012
10013 copy->offset = 0;
10014 copy->size = len;
10015
10016 kr = vm_map_remap_extract(src_map,
10017 src_addr,
10018 len,
10019 FALSE, /* copy */
10020 &copy->cpy_hdr,
10021 cur_prot,
10022 max_prot,
10023 VM_INHERIT_SHARE,
10024 TRUE); /* pageable */
10025 if (kr != KERN_SUCCESS) {
10026 vm_map_copy_discard(copy);
10027 return kr;
10028 }
10029
10030 *copy_result = copy;
10031 return KERN_SUCCESS;
10032}
10033
1c79356b
A
10034/*
10035 * vm_map_copyin_object:
10036 *
10037 * Create a copy object from an object.
10038 * Our caller donates an object reference.
10039 */
10040
10041kern_return_t
10042vm_map_copyin_object(
10043 vm_object_t object,
10044 vm_object_offset_t offset, /* offset of region in object */
10045 vm_object_size_t size, /* size of region in object */
10046 vm_map_copy_t *copy_result) /* OUT */
10047{
10048 vm_map_copy_t copy; /* Resulting copy */
10049
10050 /*
10051 * We drop the object into a special copy object
10052 * that contains the object directly.
10053 */
10054
10055 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10056 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
10057 copy->type = VM_MAP_COPY_OBJECT;
10058 copy->cpy_object = object;
1c79356b
A
10059 copy->offset = offset;
10060 copy->size = size;
10061
10062 *copy_result = copy;
10063 return(KERN_SUCCESS);
10064}
10065
91447636 10066static void
1c79356b
A
10067vm_map_fork_share(
10068 vm_map_t old_map,
10069 vm_map_entry_t old_entry,
10070 vm_map_t new_map)
10071{
10072 vm_object_t object;
10073 vm_map_entry_t new_entry;
1c79356b
A
10074
10075 /*
10076 * New sharing code. New map entry
10077 * references original object. Internal
10078 * objects use asynchronous copy algorithm for
10079 * future copies. First make sure we have
10080 * the right object. If we need a shadow,
10081 * or someone else already has one, then
10082 * make a new shadow and share it.
10083 */
10084
3e170ce0 10085 object = VME_OBJECT(old_entry);
1c79356b
A
10086 if (old_entry->is_sub_map) {
10087 assert(old_entry->wired_count == 0);
0c530ab8 10088#ifndef NO_NESTED_PMAP
1c79356b 10089 if(old_entry->use_pmap) {
91447636
A
10090 kern_return_t result;
10091
1c79356b 10092 result = pmap_nest(new_map->pmap,
3e170ce0 10093 (VME_SUBMAP(old_entry))->pmap,
2d21ac55
A
10094 (addr64_t)old_entry->vme_start,
10095 (addr64_t)old_entry->vme_start,
10096 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
10097 if(result)
10098 panic("vm_map_fork_share: pmap_nest failed!");
10099 }
0c530ab8 10100#endif /* NO_NESTED_PMAP */
1c79356b 10101 } else if (object == VM_OBJECT_NULL) {
91447636 10102 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 10103 old_entry->vme_start));
3e170ce0
A
10104 VME_OFFSET_SET(old_entry, 0);
10105 VME_OBJECT_SET(old_entry, object);
fe8ab488 10106 old_entry->use_pmap = TRUE;
1c79356b
A
10107 assert(!old_entry->needs_copy);
10108 } else if (object->copy_strategy !=
2d21ac55 10109 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
10110
10111 /*
10112 * We are already using an asymmetric
10113 * copy, and therefore we already have
10114 * the right object.
10115 */
10116
10117 assert(! old_entry->needs_copy);
10118 }
10119 else if (old_entry->needs_copy || /* case 1 */
10120 object->shadowed || /* case 2 */
10121 (!object->true_share && /* case 3 */
2d21ac55 10122 !old_entry->is_shared &&
6d2010ae 10123 (object->vo_size >
2d21ac55
A
10124 (vm_map_size_t)(old_entry->vme_end -
10125 old_entry->vme_start)))) {
1c79356b
A
10126
10127 /*
10128 * We need to create a shadow.
10129 * There are three cases here.
10130 * In the first case, we need to
10131 * complete a deferred symmetrical
10132 * copy that we participated in.
10133 * In the second and third cases,
10134 * we need to create the shadow so
10135 * that changes that we make to the
10136 * object do not interfere with
10137 * any symmetrical copies which
10138 * have occured (case 2) or which
10139 * might occur (case 3).
10140 *
10141 * The first case is when we had
10142 * deferred shadow object creation
10143 * via the entry->needs_copy mechanism.
10144 * This mechanism only works when
10145 * only one entry points to the source
10146 * object, and we are about to create
10147 * a second entry pointing to the
10148 * same object. The problem is that
10149 * there is no way of mapping from
10150 * an object to the entries pointing
10151 * to it. (Deferred shadow creation
10152 * works with one entry because occurs
10153 * at fault time, and we walk from the
10154 * entry to the object when handling
10155 * the fault.)
10156 *
10157 * The second case is when the object
10158 * to be shared has already been copied
10159 * with a symmetric copy, but we point
10160 * directly to the object without
10161 * needs_copy set in our entry. (This
10162 * can happen because different ranges
10163 * of an object can be pointed to by
10164 * different entries. In particular,
10165 * a single entry pointing to an object
10166 * can be split by a call to vm_inherit,
10167 * which, combined with task_create, can
10168 * result in the different entries
10169 * having different needs_copy values.)
10170 * The shadowed flag in the object allows
10171 * us to detect this case. The problem
10172 * with this case is that if this object
10173 * has or will have shadows, then we
10174 * must not perform an asymmetric copy
10175 * of this object, since such a copy
10176 * allows the object to be changed, which
10177 * will break the previous symmetrical
10178 * copies (which rely upon the object
10179 * not changing). In a sense, the shadowed
10180 * flag says "don't change this object".
10181 * We fix this by creating a shadow
10182 * object for this object, and sharing
10183 * that. This works because we are free
10184 * to change the shadow object (and thus
10185 * to use an asymmetric copy strategy);
10186 * this is also semantically correct,
10187 * since this object is temporary, and
10188 * therefore a copy of the object is
10189 * as good as the object itself. (This
10190 * is not true for permanent objects,
10191 * since the pager needs to see changes,
10192 * which won't happen if the changes
10193 * are made to a copy.)
10194 *
10195 * The third case is when the object
10196 * to be shared has parts sticking
10197 * outside of the entry we're working
10198 * with, and thus may in the future
10199 * be subject to a symmetrical copy.
10200 * (This is a preemptive version of
10201 * case 2.)
10202 */
3e170ce0
A
10203 VME_OBJECT_SHADOW(old_entry,
10204 (vm_map_size_t) (old_entry->vme_end -
10205 old_entry->vme_start));
1c79356b
A
10206
10207 /*
10208 * If we're making a shadow for other than
10209 * copy on write reasons, then we have
10210 * to remove write permission.
10211 */
10212
1c79356b
A
10213 if (!old_entry->needs_copy &&
10214 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
10215 vm_prot_t prot;
10216
10217 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10218
3e170ce0 10219 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
0c530ab8 10220 prot |= VM_PROT_EXECUTE;
2d21ac55 10221
316670eb 10222 if (old_map->mapped_in_other_pmaps) {
9bccf70c 10223 vm_object_pmap_protect(
3e170ce0
A
10224 VME_OBJECT(old_entry),
10225 VME_OFFSET(old_entry),
9bccf70c 10226 (old_entry->vme_end -
2d21ac55 10227 old_entry->vme_start),
9bccf70c
A
10228 PMAP_NULL,
10229 old_entry->vme_start,
0c530ab8 10230 prot);
1c79356b 10231 } else {
9bccf70c 10232 pmap_protect(old_map->pmap,
2d21ac55
A
10233 old_entry->vme_start,
10234 old_entry->vme_end,
10235 prot);
1c79356b
A
10236 }
10237 }
10238
10239 old_entry->needs_copy = FALSE;
3e170ce0 10240 object = VME_OBJECT(old_entry);
1c79356b 10241 }
6d2010ae 10242
1c79356b
A
10243
10244 /*
10245 * If object was using a symmetric copy strategy,
10246 * change its copy strategy to the default
10247 * asymmetric copy strategy, which is copy_delay
10248 * in the non-norma case and copy_call in the
10249 * norma case. Bump the reference count for the
10250 * new entry.
10251 */
10252
10253 if(old_entry->is_sub_map) {
3e170ce0
A
10254 vm_map_lock(VME_SUBMAP(old_entry));
10255 vm_map_reference(VME_SUBMAP(old_entry));
10256 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
10257 } else {
10258 vm_object_lock(object);
2d21ac55 10259 vm_object_reference_locked(object);
1c79356b
A
10260 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10261 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10262 }
10263 vm_object_unlock(object);
10264 }
10265
10266 /*
10267 * Clone the entry, using object ref from above.
10268 * Mark both entries as shared.
10269 */
10270
7ddcb079
A
10271 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10272 * map or descendants */
1c79356b
A
10273 vm_map_entry_copy(new_entry, old_entry);
10274 old_entry->is_shared = TRUE;
10275 new_entry->is_shared = TRUE;
10276
10277 /*
10278 * Insert the entry into the new map -- we
10279 * know we're inserting at the end of the new
10280 * map.
10281 */
10282
6d2010ae 10283 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
1c79356b
A
10284
10285 /*
10286 * Update the physical map
10287 */
10288
10289 if (old_entry->is_sub_map) {
10290 /* Bill Angell pmap support goes here */
10291 } else {
10292 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
10293 old_entry->vme_end - old_entry->vme_start,
10294 old_entry->vme_start);
1c79356b
A
10295 }
10296}
10297
91447636 10298static boolean_t
1c79356b
A
10299vm_map_fork_copy(
10300 vm_map_t old_map,
10301 vm_map_entry_t *old_entry_p,
10302 vm_map_t new_map)
10303{
10304 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
10305 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10306 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
10307 vm_map_copy_t copy;
10308 vm_map_entry_t last = vm_map_last_entry(new_map);
10309
10310 vm_map_unlock(old_map);
10311 /*
10312 * Use maxprot version of copyin because we
10313 * care about whether this memory can ever
10314 * be accessed, not just whether it's accessible
10315 * right now.
10316 */
10317 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
10318 != KERN_SUCCESS) {
10319 /*
10320 * The map might have changed while it
10321 * was unlocked, check it again. Skip
10322 * any blank space or permanently
10323 * unreadable region.
10324 */
10325 vm_map_lock(old_map);
10326 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 10327 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
10328 last = last->vme_next;
10329 }
10330 *old_entry_p = last;
10331
10332 /*
10333 * XXX For some error returns, want to
10334 * XXX skip to the next element. Note
10335 * that INVALID_ADDRESS and
10336 * PROTECTION_FAILURE are handled above.
10337 */
10338
10339 return FALSE;
10340 }
10341
10342 /*
10343 * Insert the copy into the new map
10344 */
10345
10346 vm_map_copy_insert(new_map, last, copy);
10347
10348 /*
10349 * Pick up the traversal at the end of
10350 * the copied region.
10351 */
10352
10353 vm_map_lock(old_map);
10354 start += entry_size;
10355 if (! vm_map_lookup_entry(old_map, start, &last)) {
10356 last = last->vme_next;
10357 } else {
2d21ac55
A
10358 if (last->vme_start == start) {
10359 /*
10360 * No need to clip here and we don't
10361 * want to cause any unnecessary
10362 * unnesting...
10363 */
10364 } else {
10365 vm_map_clip_start(old_map, last, start);
10366 }
1c79356b
A
10367 }
10368 *old_entry_p = last;
10369
10370 return TRUE;
10371}
10372
10373/*
10374 * vm_map_fork:
10375 *
10376 * Create and return a new map based on the old
10377 * map, according to the inheritance values on the
10378 * regions in that map.
10379 *
10380 * The source map must not be locked.
10381 */
10382vm_map_t
10383vm_map_fork(
316670eb 10384 ledger_t ledger,
1c79356b
A
10385 vm_map_t old_map)
10386{
2d21ac55 10387 pmap_t new_pmap;
1c79356b
A
10388 vm_map_t new_map;
10389 vm_map_entry_t old_entry;
91447636 10390 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
10391 vm_map_entry_t new_entry;
10392 boolean_t src_needs_copy;
10393 boolean_t new_entry_needs_copy;
3e170ce0 10394 boolean_t pmap_is64bit;
1c79356b 10395
3e170ce0 10396 pmap_is64bit =
b0d623f7 10397#if defined(__i386__) || defined(__x86_64__)
3e170ce0 10398 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
b0d623f7 10399#else
316670eb 10400#error Unknown architecture.
b0d623f7 10401#endif
3e170ce0
A
10402
10403 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
2d21ac55 10404
1c79356b
A
10405 vm_map_reference_swap(old_map);
10406 vm_map_lock(old_map);
10407
10408 new_map = vm_map_create(new_pmap,
2d21ac55
A
10409 old_map->min_offset,
10410 old_map->max_offset,
10411 old_map->hdr.entries_pageable);
39236c6e
A
10412 /* inherit the parent map's page size */
10413 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 10414 for (
2d21ac55
A
10415 old_entry = vm_map_first_entry(old_map);
10416 old_entry != vm_map_to_entry(old_map);
10417 ) {
1c79356b
A
10418
10419 entry_size = old_entry->vme_end - old_entry->vme_start;
10420
10421 switch (old_entry->inheritance) {
10422 case VM_INHERIT_NONE:
10423 break;
10424
10425 case VM_INHERIT_SHARE:
10426 vm_map_fork_share(old_map, old_entry, new_map);
10427 new_size += entry_size;
10428 break;
10429
10430 case VM_INHERIT_COPY:
10431
10432 /*
10433 * Inline the copy_quickly case;
10434 * upon failure, fall back on call
10435 * to vm_map_fork_copy.
10436 */
10437
10438 if(old_entry->is_sub_map)
10439 break;
9bccf70c 10440 if ((old_entry->wired_count != 0) ||
3e170ce0
A
10441 ((VME_OBJECT(old_entry) != NULL) &&
10442 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
10443 goto slow_vm_map_fork_copy;
10444 }
10445
7ddcb079 10446 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 10447 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
10448 if (new_entry->is_sub_map) {
10449 /* clear address space specifics */
10450 new_entry->use_pmap = FALSE;
10451 }
1c79356b
A
10452
10453 if (! vm_object_copy_quickly(
3e170ce0
A
10454 &VME_OBJECT(new_entry),
10455 VME_OFFSET(old_entry),
2d21ac55
A
10456 (old_entry->vme_end -
10457 old_entry->vme_start),
10458 &src_needs_copy,
10459 &new_entry_needs_copy)) {
1c79356b
A
10460 vm_map_entry_dispose(new_map, new_entry);
10461 goto slow_vm_map_fork_copy;
10462 }
10463
10464 /*
10465 * Handle copy-on-write obligations
10466 */
10467
10468 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
10469 vm_prot_t prot;
10470
10471 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10472
3e170ce0
A
10473 if (override_nx(old_map, VME_ALIAS(old_entry))
10474 && prot)
0c530ab8 10475 prot |= VM_PROT_EXECUTE;
2d21ac55 10476
1c79356b 10477 vm_object_pmap_protect(
3e170ce0
A
10478 VME_OBJECT(old_entry),
10479 VME_OFFSET(old_entry),
1c79356b 10480 (old_entry->vme_end -
2d21ac55 10481 old_entry->vme_start),
1c79356b 10482 ((old_entry->is_shared
316670eb 10483 || old_map->mapped_in_other_pmaps)
2d21ac55
A
10484 ? PMAP_NULL :
10485 old_map->pmap),
1c79356b 10486 old_entry->vme_start,
0c530ab8 10487 prot);
1c79356b 10488
3e170ce0 10489 assert(old_entry->wired_count == 0);
1c79356b
A
10490 old_entry->needs_copy = TRUE;
10491 }
10492 new_entry->needs_copy = new_entry_needs_copy;
10493
10494 /*
10495 * Insert the entry at the end
10496 * of the map.
10497 */
10498
6d2010ae 10499 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
10500 new_entry);
10501 new_size += entry_size;
10502 break;
10503
10504 slow_vm_map_fork_copy:
10505 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
10506 new_size += entry_size;
10507 }
10508 continue;
10509 }
10510 old_entry = old_entry->vme_next;
10511 }
10512
fe8ab488 10513
1c79356b
A
10514 new_map->size = new_size;
10515 vm_map_unlock(old_map);
10516 vm_map_deallocate(old_map);
10517
10518 return(new_map);
10519}
10520
2d21ac55
A
10521/*
10522 * vm_map_exec:
10523 *
10524 * Setup the "new_map" with the proper execution environment according
10525 * to the type of executable (platform, 64bit, chroot environment).
10526 * Map the comm page and shared region, etc...
10527 */
10528kern_return_t
10529vm_map_exec(
10530 vm_map_t new_map,
10531 task_t task,
10532 void *fsroot,
10533 cpu_type_t cpu)
10534{
10535 SHARED_REGION_TRACE_DEBUG(
10536 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
fe8ab488
A
10537 (void *)VM_KERNEL_ADDRPERM(current_task()),
10538 (void *)VM_KERNEL_ADDRPERM(new_map),
10539 (void *)VM_KERNEL_ADDRPERM(task),
10540 (void *)VM_KERNEL_ADDRPERM(fsroot),
10541 cpu));
2d21ac55
A
10542 (void) vm_commpage_enter(new_map, task);
10543 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
10544 SHARED_REGION_TRACE_DEBUG(
10545 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
fe8ab488
A
10546 (void *)VM_KERNEL_ADDRPERM(current_task()),
10547 (void *)VM_KERNEL_ADDRPERM(new_map),
10548 (void *)VM_KERNEL_ADDRPERM(task),
10549 (void *)VM_KERNEL_ADDRPERM(fsroot),
10550 cpu));
2d21ac55
A
10551 return KERN_SUCCESS;
10552}
1c79356b
A
10553
10554/*
10555 * vm_map_lookup_locked:
10556 *
10557 * Finds the VM object, offset, and
10558 * protection for a given virtual address in the
10559 * specified map, assuming a page fault of the
10560 * type specified.
10561 *
10562 * Returns the (object, offset, protection) for
10563 * this address, whether it is wired down, and whether
10564 * this map has the only reference to the data in question.
10565 * In order to later verify this lookup, a "version"
10566 * is returned.
10567 *
10568 * The map MUST be locked by the caller and WILL be
10569 * locked on exit. In order to guarantee the
10570 * existence of the returned object, it is returned
10571 * locked.
10572 *
10573 * If a lookup is requested with "write protection"
10574 * specified, the map may be changed to perform virtual
10575 * copying operations, although the data referenced will
10576 * remain the same.
10577 */
10578kern_return_t
10579vm_map_lookup_locked(
10580 vm_map_t *var_map, /* IN/OUT */
2d21ac55 10581 vm_map_offset_t vaddr,
91447636 10582 vm_prot_t fault_type,
2d21ac55 10583 int object_lock_type,
1c79356b
A
10584 vm_map_version_t *out_version, /* OUT */
10585 vm_object_t *object, /* OUT */
10586 vm_object_offset_t *offset, /* OUT */
10587 vm_prot_t *out_prot, /* OUT */
10588 boolean_t *wired, /* OUT */
2d21ac55 10589 vm_object_fault_info_t fault_info, /* OUT */
91447636 10590 vm_map_t *real_map)
1c79356b
A
10591{
10592 vm_map_entry_t entry;
10593 register vm_map_t map = *var_map;
10594 vm_map_t old_map = *var_map;
10595 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
10596 vm_map_offset_t cow_parent_vaddr = 0;
10597 vm_map_offset_t old_start = 0;
10598 vm_map_offset_t old_end = 0;
1c79356b 10599 register vm_prot_t prot;
6d2010ae 10600 boolean_t mask_protections;
fe8ab488 10601 boolean_t force_copy;
6d2010ae
A
10602 vm_prot_t original_fault_type;
10603
10604 /*
10605 * VM_PROT_MASK means that the caller wants us to use "fault_type"
10606 * as a mask against the mapping's actual protections, not as an
10607 * absolute value.
10608 */
10609 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
10610 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10611 fault_type &= VM_PROT_ALL;
6d2010ae 10612 original_fault_type = fault_type;
1c79356b 10613
91447636 10614 *real_map = map;
6d2010ae
A
10615
10616RetryLookup:
10617 fault_type = original_fault_type;
1c79356b
A
10618
10619 /*
10620 * If the map has an interesting hint, try it before calling
10621 * full blown lookup routine.
10622 */
1c79356b 10623 entry = map->hint;
1c79356b
A
10624
10625 if ((entry == vm_map_to_entry(map)) ||
10626 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10627 vm_map_entry_t tmp_entry;
10628
10629 /*
10630 * Entry was either not a valid hint, or the vaddr
10631 * was not contained in the entry, so do a full lookup.
10632 */
10633 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10634 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10635 vm_map_unlock(cow_sub_map_parent);
91447636 10636 if((*real_map != map)
2d21ac55 10637 && (*real_map != cow_sub_map_parent))
91447636 10638 vm_map_unlock(*real_map);
1c79356b
A
10639 return KERN_INVALID_ADDRESS;
10640 }
10641
10642 entry = tmp_entry;
10643 }
10644 if(map == old_map) {
10645 old_start = entry->vme_start;
10646 old_end = entry->vme_end;
10647 }
10648
10649 /*
10650 * Handle submaps. Drop lock on upper map, submap is
10651 * returned locked.
10652 */
10653
10654submap_recurse:
10655 if (entry->is_sub_map) {
91447636
A
10656 vm_map_offset_t local_vaddr;
10657 vm_map_offset_t end_delta;
10658 vm_map_offset_t start_delta;
1c79356b
A
10659 vm_map_entry_t submap_entry;
10660 boolean_t mapped_needs_copy=FALSE;
10661
10662 local_vaddr = vaddr;
10663
2d21ac55 10664 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
91447636
A
10665 /* if real_map equals map we unlock below */
10666 if ((*real_map != map) &&
2d21ac55 10667 (*real_map != cow_sub_map_parent))
91447636 10668 vm_map_unlock(*real_map);
3e170ce0 10669 *real_map = VME_SUBMAP(entry);
1c79356b
A
10670 }
10671
2d21ac55 10672 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
1c79356b
A
10673 if (!mapped_needs_copy) {
10674 if (vm_map_lock_read_to_write(map)) {
10675 vm_map_lock_read(map);
99c3a104 10676 *real_map = map;
1c79356b
A
10677 goto RetryLookup;
10678 }
3e170ce0
A
10679 vm_map_lock_read(VME_SUBMAP(entry));
10680 *var_map = VME_SUBMAP(entry);
1c79356b
A
10681 cow_sub_map_parent = map;
10682 /* reset base to map before cow object */
10683 /* this is the map which will accept */
10684 /* the new cow object */
10685 old_start = entry->vme_start;
10686 old_end = entry->vme_end;
10687 cow_parent_vaddr = vaddr;
10688 mapped_needs_copy = TRUE;
10689 } else {
3e170ce0
A
10690 vm_map_lock_read(VME_SUBMAP(entry));
10691 *var_map = VME_SUBMAP(entry);
1c79356b 10692 if((cow_sub_map_parent != map) &&
2d21ac55 10693 (*real_map != map))
1c79356b
A
10694 vm_map_unlock(map);
10695 }
10696 } else {
3e170ce0
A
10697 vm_map_lock_read(VME_SUBMAP(entry));
10698 *var_map = VME_SUBMAP(entry);
1c79356b
A
10699 /* leave map locked if it is a target */
10700 /* cow sub_map above otherwise, just */
10701 /* follow the maps down to the object */
10702 /* here we unlock knowing we are not */
10703 /* revisiting the map. */
91447636 10704 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
10705 vm_map_unlock_read(map);
10706 }
10707
99c3a104 10708 map = *var_map;
1c79356b
A
10709
10710 /* calculate the offset in the submap for vaddr */
3e170ce0 10711 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 10712
2d21ac55 10713 RetrySubMap:
1c79356b
A
10714 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10715 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10716 vm_map_unlock(cow_sub_map_parent);
10717 }
91447636 10718 if((*real_map != map)
2d21ac55 10719 && (*real_map != cow_sub_map_parent)) {
91447636 10720 vm_map_unlock(*real_map);
1c79356b 10721 }
91447636 10722 *real_map = map;
1c79356b
A
10723 return KERN_INVALID_ADDRESS;
10724 }
2d21ac55 10725
1c79356b
A
10726 /* find the attenuated shadow of the underlying object */
10727 /* on our target map */
10728
10729 /* in english the submap object may extend beyond the */
10730 /* region mapped by the entry or, may only fill a portion */
10731 /* of it. For our purposes, we only care if the object */
10732 /* doesn't fill. In this case the area which will */
10733 /* ultimately be clipped in the top map will only need */
10734 /* to be as big as the portion of the underlying entry */
10735 /* which is mapped */
3e170ce0
A
10736 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
10737 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b
A
10738
10739 end_delta =
3e170ce0 10740 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
1c79356b 10741 submap_entry->vme_end ?
3e170ce0 10742 0 : (VME_OFFSET(entry) +
2d21ac55
A
10743 (old_end - old_start))
10744 - submap_entry->vme_end;
1c79356b
A
10745
10746 old_start += start_delta;
10747 old_end -= end_delta;
10748
10749 if(submap_entry->is_sub_map) {
10750 entry = submap_entry;
10751 vaddr = local_vaddr;
10752 goto submap_recurse;
10753 }
10754
10755 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10756
2d21ac55
A
10757 vm_object_t sub_object, copy_object;
10758 vm_object_offset_t copy_offset;
91447636
A
10759 vm_map_offset_t local_start;
10760 vm_map_offset_t local_end;
0b4e3aa0 10761 boolean_t copied_slowly = FALSE;
1c79356b
A
10762
10763 if (vm_map_lock_read_to_write(map)) {
10764 vm_map_lock_read(map);
10765 old_start -= start_delta;
10766 old_end += end_delta;
10767 goto RetrySubMap;
10768 }
0b4e3aa0
A
10769
10770
3e170ce0 10771 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
10772 if (sub_object == VM_OBJECT_NULL) {
10773 sub_object =
1c79356b 10774 vm_object_allocate(
91447636 10775 (vm_map_size_t)
2d21ac55
A
10776 (submap_entry->vme_end -
10777 submap_entry->vme_start));
3e170ce0
A
10778 VME_OBJECT_SET(submap_entry, sub_object);
10779 VME_OFFSET_SET(submap_entry, 0);
1c79356b
A
10780 }
10781 local_start = local_vaddr -
2d21ac55 10782 (cow_parent_vaddr - old_start);
1c79356b 10783 local_end = local_vaddr +
2d21ac55 10784 (old_end - cow_parent_vaddr);
1c79356b
A
10785 vm_map_clip_start(map, submap_entry, local_start);
10786 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
10787 if (submap_entry->is_sub_map) {
10788 /* unnesting was done when clipping */
10789 assert(!submap_entry->use_pmap);
10790 }
1c79356b
A
10791
10792 /* This is the COW case, lets connect */
10793 /* an entry in our space to the underlying */
10794 /* object in the submap, bypassing the */
10795 /* submap. */
0b4e3aa0
A
10796
10797
2d21ac55 10798 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
10799 (sub_object->copy_strategy ==
10800 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
10801 vm_object_lock(sub_object);
10802 vm_object_copy_slowly(sub_object,
3e170ce0 10803 VME_OFFSET(submap_entry),
2d21ac55
A
10804 (submap_entry->vme_end -
10805 submap_entry->vme_start),
10806 FALSE,
10807 &copy_object);
10808 copied_slowly = TRUE;
0b4e3aa0 10809 } else {
2d21ac55 10810
0b4e3aa0 10811 /* set up shadow object */
2d21ac55 10812 copy_object = sub_object;
0b4e3aa0 10813 vm_object_reference(copy_object);
2d21ac55 10814 sub_object->shadowed = TRUE;
3e170ce0 10815 assert(submap_entry->wired_count == 0);
0b4e3aa0 10816 submap_entry->needs_copy = TRUE;
0c530ab8
A
10817
10818 prot = submap_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10819
3e170ce0
A
10820 if (override_nx(old_map,
10821 VME_ALIAS(submap_entry))
10822 && prot)
0c530ab8 10823 prot |= VM_PROT_EXECUTE;
2d21ac55 10824
0b4e3aa0 10825 vm_object_pmap_protect(
2d21ac55 10826 sub_object,
3e170ce0 10827 VME_OFFSET(submap_entry),
1c79356b 10828 submap_entry->vme_end -
2d21ac55 10829 submap_entry->vme_start,
9bccf70c 10830 (submap_entry->is_shared
316670eb 10831 || map->mapped_in_other_pmaps) ?
2d21ac55 10832 PMAP_NULL : map->pmap,
1c79356b 10833 submap_entry->vme_start,
0c530ab8 10834 prot);
0b4e3aa0 10835 }
1c79356b 10836
2d21ac55
A
10837 /*
10838 * Adjust the fault offset to the submap entry.
10839 */
10840 copy_offset = (local_vaddr -
10841 submap_entry->vme_start +
3e170ce0 10842 VME_OFFSET(submap_entry));
1c79356b
A
10843
10844 /* This works diffently than the */
10845 /* normal submap case. We go back */
10846 /* to the parent of the cow map and*/
10847 /* clip out the target portion of */
10848 /* the sub_map, substituting the */
10849 /* new copy object, */
10850
10851 vm_map_unlock(map);
10852 local_start = old_start;
10853 local_end = old_end;
10854 map = cow_sub_map_parent;
10855 *var_map = cow_sub_map_parent;
10856 vaddr = cow_parent_vaddr;
10857 cow_sub_map_parent = NULL;
10858
2d21ac55
A
10859 if(!vm_map_lookup_entry(map,
10860 vaddr, &entry)) {
10861 vm_object_deallocate(
10862 copy_object);
10863 vm_map_lock_write_to_read(map);
10864 return KERN_INVALID_ADDRESS;
10865 }
10866
10867 /* clip out the portion of space */
10868 /* mapped by the sub map which */
10869 /* corresponds to the underlying */
10870 /* object */
10871
10872 /*
10873 * Clip (and unnest) the smallest nested chunk
10874 * possible around the faulting address...
10875 */
10876 local_start = vaddr & ~(pmap_nesting_size_min - 1);
10877 local_end = local_start + pmap_nesting_size_min;
10878 /*
10879 * ... but don't go beyond the "old_start" to "old_end"
10880 * range, to avoid spanning over another VM region
10881 * with a possibly different VM object and/or offset.
10882 */
10883 if (local_start < old_start) {
10884 local_start = old_start;
10885 }
10886 if (local_end > old_end) {
10887 local_end = old_end;
10888 }
10889 /*
10890 * Adjust copy_offset to the start of the range.
10891 */
10892 copy_offset -= (vaddr - local_start);
10893
1c79356b
A
10894 vm_map_clip_start(map, entry, local_start);
10895 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
10896 if (entry->is_sub_map) {
10897 /* unnesting was done when clipping */
10898 assert(!entry->use_pmap);
10899 }
1c79356b
A
10900
10901 /* substitute copy object for */
10902 /* shared map entry */
3e170ce0 10903 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 10904 assert(!entry->iokit_acct);
1c79356b 10905 entry->is_sub_map = FALSE;
fe8ab488 10906 entry->use_pmap = TRUE;
3e170ce0 10907 VME_OBJECT_SET(entry, copy_object);
1c79356b 10908
2d21ac55
A
10909 /* propagate the submap entry's protections */
10910 entry->protection |= submap_entry->protection;
10911 entry->max_protection |= submap_entry->max_protection;
10912
0b4e3aa0 10913 if(copied_slowly) {
3e170ce0 10914 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
10915 entry->needs_copy = FALSE;
10916 entry->is_shared = FALSE;
10917 } else {
3e170ce0
A
10918 VME_OFFSET_SET(entry, copy_offset);
10919 assert(entry->wired_count == 0);
0b4e3aa0
A
10920 entry->needs_copy = TRUE;
10921 if(entry->inheritance == VM_INHERIT_SHARE)
10922 entry->inheritance = VM_INHERIT_COPY;
10923 if (map != old_map)
10924 entry->is_shared = TRUE;
10925 }
1c79356b 10926 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 10927 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
10928
10929 vm_map_lock_write_to_read(map);
10930 } else {
10931 if((cow_sub_map_parent)
2d21ac55
A
10932 && (cow_sub_map_parent != *real_map)
10933 && (cow_sub_map_parent != map)) {
1c79356b
A
10934 vm_map_unlock(cow_sub_map_parent);
10935 }
10936 entry = submap_entry;
10937 vaddr = local_vaddr;
10938 }
10939 }
10940
10941 /*
10942 * Check whether this task is allowed to have
10943 * this page.
10944 */
2d21ac55 10945
6601e61a 10946 prot = entry->protection;
0c530ab8 10947
3e170ce0 10948 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0c530ab8 10949 /*
2d21ac55 10950 * HACK -- if not a stack, then allow execution
0c530ab8
A
10951 */
10952 prot |= VM_PROT_EXECUTE;
2d21ac55
A
10953 }
10954
6d2010ae
A
10955 if (mask_protections) {
10956 fault_type &= prot;
10957 if (fault_type == VM_PROT_NONE) {
10958 goto protection_failure;
10959 }
10960 }
1c79356b 10961 if ((fault_type & (prot)) != fault_type) {
6d2010ae 10962 protection_failure:
2d21ac55
A
10963 if (*real_map != map) {
10964 vm_map_unlock(*real_map);
0c530ab8
A
10965 }
10966 *real_map = map;
10967
10968 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 10969 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 10970
2d21ac55 10971 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 10972 return KERN_PROTECTION_FAILURE;
1c79356b
A
10973 }
10974
10975 /*
10976 * If this page is not pageable, we have to get
10977 * it for all possible accesses.
10978 */
10979
91447636
A
10980 *wired = (entry->wired_count != 0);
10981 if (*wired)
0c530ab8 10982 fault_type = prot;
1c79356b
A
10983
10984 /*
10985 * If the entry was copy-on-write, we either ...
10986 */
10987
10988 if (entry->needs_copy) {
10989 /*
10990 * If we want to write the page, we may as well
10991 * handle that now since we've got the map locked.
10992 *
10993 * If we don't need to write the page, we just
10994 * demote the permissions allowed.
10995 */
10996
fe8ab488 10997 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
10998 /*
10999 * Make a new object, and place it in the
11000 * object chain. Note that no new references
11001 * have appeared -- one just moved from the
11002 * map to the new object.
11003 */
11004
11005 if (vm_map_lock_read_to_write(map)) {
11006 vm_map_lock_read(map);
11007 goto RetryLookup;
11008 }
3e170ce0
A
11009 VME_OBJECT_SHADOW(entry,
11010 (vm_map_size_t) (entry->vme_end -
11011 entry->vme_start));
1c79356b 11012
3e170ce0 11013 VME_OBJECT(entry)->shadowed = TRUE;
1c79356b
A
11014 entry->needs_copy = FALSE;
11015 vm_map_lock_write_to_read(map);
11016 }
11017 else {
11018 /*
11019 * We're attempting to read a copy-on-write
11020 * page -- don't allow writes.
11021 */
11022
11023 prot &= (~VM_PROT_WRITE);
11024 }
11025 }
11026
11027 /*
11028 * Create an object if necessary.
11029 */
3e170ce0 11030 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
11031
11032 if (vm_map_lock_read_to_write(map)) {
11033 vm_map_lock_read(map);
11034 goto RetryLookup;
11035 }
11036
3e170ce0
A
11037 VME_OBJECT_SET(entry,
11038 vm_object_allocate(
11039 (vm_map_size_t)(entry->vme_end -
11040 entry->vme_start)));
11041 VME_OFFSET_SET(entry, 0);
1c79356b
A
11042 vm_map_lock_write_to_read(map);
11043 }
11044
11045 /*
11046 * Return the object/offset from this entry. If the entry
11047 * was copy-on-write or empty, it has been fixed up. Also
11048 * return the protection.
11049 */
11050
3e170ce0
A
11051 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11052 *object = VME_OBJECT(entry);
1c79356b 11053 *out_prot = prot;
2d21ac55
A
11054
11055 if (fault_info) {
11056 fault_info->interruptible = THREAD_UNINT; /* for now... */
11057 /* ... the caller will change "interruptible" if needed */
11058 fault_info->cluster_size = 0;
3e170ce0 11059 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
11060 fault_info->pmap_options = 0;
11061 if (entry->iokit_acct ||
11062 (!entry->is_sub_map && !entry->use_pmap)) {
11063 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11064 }
2d21ac55 11065 fault_info->behavior = entry->behavior;
3e170ce0
A
11066 fault_info->lo_offset = VME_OFFSET(entry);
11067 fault_info->hi_offset =
11068 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 11069 fault_info->no_cache = entry->no_cache;
b0d623f7 11070 fault_info->stealth = FALSE;
6d2010ae 11071 fault_info->io_sync = FALSE;
3e170ce0
A
11072 if (entry->used_for_jit ||
11073 entry->vme_resilient_codesign) {
11074 fault_info->cs_bypass = TRUE;
11075 } else {
11076 fault_info->cs_bypass = FALSE;
11077 }
0b4c1975 11078 fault_info->mark_zf_absent = FALSE;
316670eb 11079 fault_info->batch_pmap_op = FALSE;
2d21ac55 11080 }
1c79356b
A
11081
11082 /*
11083 * Lock the object to prevent it from disappearing
11084 */
2d21ac55
A
11085 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11086 vm_object_lock(*object);
11087 else
11088 vm_object_lock_shared(*object);
11089
1c79356b
A
11090 /*
11091 * Save the version number
11092 */
11093
11094 out_version->main_timestamp = map->timestamp;
11095
11096 return KERN_SUCCESS;
11097}
11098
11099
11100/*
11101 * vm_map_verify:
11102 *
11103 * Verifies that the map in question has not changed
11104 * since the given version. If successful, the map
11105 * will not change until vm_map_verify_done() is called.
11106 */
11107boolean_t
11108vm_map_verify(
11109 register vm_map_t map,
11110 register vm_map_version_t *version) /* REF */
11111{
11112 boolean_t result;
11113
11114 vm_map_lock_read(map);
11115 result = (map->timestamp == version->main_timestamp);
11116
11117 if (!result)
11118 vm_map_unlock_read(map);
11119
11120 return(result);
11121}
11122
11123/*
11124 * vm_map_verify_done:
11125 *
11126 * Releases locks acquired by a vm_map_verify.
11127 *
11128 * This is now a macro in vm/vm_map.h. It does a
11129 * vm_map_unlock_read on the map.
11130 */
11131
11132
91447636
A
11133/*
11134 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11135 * Goes away after regular vm_region_recurse function migrates to
11136 * 64 bits
11137 * vm_region_recurse: A form of vm_region which follows the
11138 * submaps in a target map
11139 *
11140 */
11141
11142kern_return_t
11143vm_map_region_recurse_64(
11144 vm_map_t map,
11145 vm_map_offset_t *address, /* IN/OUT */
11146 vm_map_size_t *size, /* OUT */
11147 natural_t *nesting_depth, /* IN/OUT */
11148 vm_region_submap_info_64_t submap_info, /* IN/OUT */
11149 mach_msg_type_number_t *count) /* IN/OUT */
11150{
39236c6e 11151 mach_msg_type_number_t original_count;
91447636
A
11152 vm_region_extended_info_data_t extended;
11153 vm_map_entry_t tmp_entry;
11154 vm_map_offset_t user_address;
11155 unsigned int user_max_depth;
11156
11157 /*
11158 * "curr_entry" is the VM map entry preceding or including the
11159 * address we're looking for.
11160 * "curr_map" is the map or sub-map containing "curr_entry".
6d2010ae
A
11161 * "curr_address" is the equivalent of the top map's "user_address"
11162 * in the current map.
91447636
A
11163 * "curr_offset" is the cumulated offset of "curr_map" in the
11164 * target task's address space.
11165 * "curr_depth" is the depth of "curr_map" in the chain of
11166 * sub-maps.
6d2010ae
A
11167 *
11168 * "curr_max_below" and "curr_max_above" limit the range (around
11169 * "curr_address") we should take into account in the current (sub)map.
11170 * They limit the range to what's visible through the map entries
11171 * we've traversed from the top map to the current map.
11172
91447636
A
11173 */
11174 vm_map_entry_t curr_entry;
6d2010ae 11175 vm_map_address_t curr_address;
91447636
A
11176 vm_map_offset_t curr_offset;
11177 vm_map_t curr_map;
11178 unsigned int curr_depth;
6d2010ae
A
11179 vm_map_offset_t curr_max_below, curr_max_above;
11180 vm_map_offset_t curr_skip;
91447636
A
11181
11182 /*
11183 * "next_" is the same as "curr_" but for the VM region immediately
11184 * after the address we're looking for. We need to keep track of this
11185 * too because we want to return info about that region if the
11186 * address we're looking for is not mapped.
11187 */
11188 vm_map_entry_t next_entry;
11189 vm_map_offset_t next_offset;
6d2010ae 11190 vm_map_offset_t next_address;
91447636
A
11191 vm_map_t next_map;
11192 unsigned int next_depth;
6d2010ae
A
11193 vm_map_offset_t next_max_below, next_max_above;
11194 vm_map_offset_t next_skip;
91447636 11195
2d21ac55
A
11196 boolean_t look_for_pages;
11197 vm_region_submap_short_info_64_t short_info;
11198
91447636
A
11199 if (map == VM_MAP_NULL) {
11200 /* no address space to work on */
11201 return KERN_INVALID_ARGUMENT;
11202 }
11203
39236c6e
A
11204
11205 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11206 /*
11207 * "info" structure is not big enough and
11208 * would overflow
11209 */
11210 return KERN_INVALID_ARGUMENT;
11211 }
11212
11213 original_count = *count;
11214
11215 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11216 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11217 look_for_pages = FALSE;
11218 short_info = (vm_region_submap_short_info_64_t) submap_info;
11219 submap_info = NULL;
2d21ac55
A
11220 } else {
11221 look_for_pages = TRUE;
39236c6e 11222 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 11223 short_info = NULL;
39236c6e
A
11224
11225 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11226 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11227 }
91447636 11228 }
39236c6e 11229
91447636
A
11230 user_address = *address;
11231 user_max_depth = *nesting_depth;
11232
3e170ce0
A
11233 if (not_in_kdp) {
11234 vm_map_lock_read(map);
11235 }
11236
11237recurse_again:
91447636
A
11238 curr_entry = NULL;
11239 curr_map = map;
6d2010ae 11240 curr_address = user_address;
91447636 11241 curr_offset = 0;
6d2010ae 11242 curr_skip = 0;
91447636 11243 curr_depth = 0;
6d2010ae
A
11244 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11245 curr_max_below = curr_address;
91447636
A
11246
11247 next_entry = NULL;
11248 next_map = NULL;
6d2010ae 11249 next_address = 0;
91447636 11250 next_offset = 0;
6d2010ae 11251 next_skip = 0;
91447636 11252 next_depth = 0;
6d2010ae
A
11253 next_max_above = (vm_map_offset_t) -1;
11254 next_max_below = (vm_map_offset_t) -1;
91447636 11255
91447636
A
11256 for (;;) {
11257 if (vm_map_lookup_entry(curr_map,
6d2010ae 11258 curr_address,
91447636
A
11259 &tmp_entry)) {
11260 /* tmp_entry contains the address we're looking for */
11261 curr_entry = tmp_entry;
11262 } else {
6d2010ae 11263 vm_map_offset_t skip;
91447636
A
11264 /*
11265 * The address is not mapped. "tmp_entry" is the
11266 * map entry preceding the address. We want the next
11267 * one, if it exists.
11268 */
11269 curr_entry = tmp_entry->vme_next;
6d2010ae 11270
91447636 11271 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
11272 (curr_entry->vme_start >=
11273 curr_address + curr_max_above)) {
91447636
A
11274 /* no next entry at this level: stop looking */
11275 if (not_in_kdp) {
11276 vm_map_unlock_read(curr_map);
11277 }
11278 curr_entry = NULL;
11279 curr_map = NULL;
3e170ce0 11280 curr_skip = 0;
91447636
A
11281 curr_offset = 0;
11282 curr_depth = 0;
6d2010ae
A
11283 curr_max_above = 0;
11284 curr_max_below = 0;
91447636
A
11285 break;
11286 }
6d2010ae
A
11287
11288 /* adjust current address and offset */
11289 skip = curr_entry->vme_start - curr_address;
11290 curr_address = curr_entry->vme_start;
3e170ce0 11291 curr_skip += skip;
6d2010ae
A
11292 curr_offset += skip;
11293 curr_max_above -= skip;
11294 curr_max_below = 0;
91447636
A
11295 }
11296
11297 /*
11298 * Is the next entry at this level closer to the address (or
11299 * deeper in the submap chain) than the one we had
11300 * so far ?
11301 */
11302 tmp_entry = curr_entry->vme_next;
11303 if (tmp_entry == vm_map_to_entry(curr_map)) {
11304 /* no next entry at this level */
6d2010ae
A
11305 } else if (tmp_entry->vme_start >=
11306 curr_address + curr_max_above) {
91447636
A
11307 /*
11308 * tmp_entry is beyond the scope of what we mapped of
11309 * this submap in the upper level: ignore it.
11310 */
11311 } else if ((next_entry == NULL) ||
11312 (tmp_entry->vme_start + curr_offset <=
11313 next_entry->vme_start + next_offset)) {
11314 /*
11315 * We didn't have a "next_entry" or this one is
11316 * closer to the address we're looking for:
11317 * use this "tmp_entry" as the new "next_entry".
11318 */
11319 if (next_entry != NULL) {
11320 /* unlock the last "next_map" */
11321 if (next_map != curr_map && not_in_kdp) {
11322 vm_map_unlock_read(next_map);
11323 }
11324 }
11325 next_entry = tmp_entry;
11326 next_map = curr_map;
91447636 11327 next_depth = curr_depth;
6d2010ae
A
11328 next_address = next_entry->vme_start;
11329 next_skip = curr_skip;
3e170ce0 11330 next_skip += (next_address - curr_address);
6d2010ae
A
11331 next_offset = curr_offset;
11332 next_offset += (next_address - curr_address);
11333 next_max_above = MIN(next_max_above, curr_max_above);
11334 next_max_above = MIN(next_max_above,
11335 next_entry->vme_end - next_address);
11336 next_max_below = MIN(next_max_below, curr_max_below);
11337 next_max_below = MIN(next_max_below,
11338 next_address - next_entry->vme_start);
91447636
A
11339 }
11340
6d2010ae
A
11341 /*
11342 * "curr_max_{above,below}" allow us to keep track of the
11343 * portion of the submap that is actually mapped at this level:
11344 * the rest of that submap is irrelevant to us, since it's not
11345 * mapped here.
11346 * The relevant portion of the map starts at
3e170ce0 11347 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
11348 */
11349 curr_max_above = MIN(curr_max_above,
11350 curr_entry->vme_end - curr_address);
11351 curr_max_below = MIN(curr_max_below,
11352 curr_address - curr_entry->vme_start);
11353
91447636
A
11354 if (!curr_entry->is_sub_map ||
11355 curr_depth >= user_max_depth) {
11356 /*
11357 * We hit a leaf map or we reached the maximum depth
11358 * we could, so stop looking. Keep the current map
11359 * locked.
11360 */
11361 break;
11362 }
11363
11364 /*
11365 * Get down to the next submap level.
11366 */
11367
11368 /*
11369 * Lock the next level and unlock the current level,
11370 * unless we need to keep it locked to access the "next_entry"
11371 * later.
11372 */
11373 if (not_in_kdp) {
3e170ce0 11374 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
11375 }
11376 if (curr_map == next_map) {
11377 /* keep "next_map" locked in case we need it */
11378 } else {
11379 /* release this map */
b0d623f7
A
11380 if (not_in_kdp)
11381 vm_map_unlock_read(curr_map);
91447636
A
11382 }
11383
11384 /*
11385 * Adjust the offset. "curr_entry" maps the submap
11386 * at relative address "curr_entry->vme_start" in the
3e170ce0 11387 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
11388 * bytes of the submap.
11389 * "curr_offset" always represents the offset of a virtual
11390 * address in the curr_map relative to the absolute address
11391 * space (i.e. the top-level VM map).
11392 */
11393 curr_offset +=
3e170ce0 11394 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 11395 curr_address = user_address + curr_offset;
91447636 11396 /* switch to the submap */
3e170ce0 11397 curr_map = VME_SUBMAP(curr_entry);
91447636 11398 curr_depth++;
91447636
A
11399 curr_entry = NULL;
11400 }
11401
11402 if (curr_entry == NULL) {
11403 /* no VM region contains the address... */
11404 if (next_entry == NULL) {
11405 /* ... and no VM region follows it either */
11406 return KERN_INVALID_ADDRESS;
11407 }
11408 /* ... gather info about the next VM region */
11409 curr_entry = next_entry;
11410 curr_map = next_map; /* still locked ... */
6d2010ae
A
11411 curr_address = next_address;
11412 curr_skip = next_skip;
91447636
A
11413 curr_offset = next_offset;
11414 curr_depth = next_depth;
6d2010ae
A
11415 curr_max_above = next_max_above;
11416 curr_max_below = next_max_below;
91447636
A
11417 } else {
11418 /* we won't need "next_entry" after all */
11419 if (next_entry != NULL) {
11420 /* release "next_map" */
11421 if (next_map != curr_map && not_in_kdp) {
11422 vm_map_unlock_read(next_map);
11423 }
11424 }
11425 }
11426 next_entry = NULL;
11427 next_map = NULL;
11428 next_offset = 0;
6d2010ae 11429 next_skip = 0;
91447636 11430 next_depth = 0;
6d2010ae
A
11431 next_max_below = -1;
11432 next_max_above = -1;
91447636 11433
3e170ce0
A
11434 if (curr_entry->is_sub_map &&
11435 curr_depth < user_max_depth) {
11436 /*
11437 * We're not as deep as we could be: we must have
11438 * gone back up after not finding anything mapped
11439 * below the original top-level map entry's.
11440 * Let's move "curr_address" forward and recurse again.
11441 */
11442 user_address = curr_address;
11443 goto recurse_again;
11444 }
11445
91447636 11446 *nesting_depth = curr_depth;
6d2010ae
A
11447 *size = curr_max_above + curr_max_below;
11448 *address = user_address + curr_skip - curr_max_below;
91447636 11449
b0d623f7
A
11450// LP64todo: all the current tools are 32bit, obviously never worked for 64b
11451// so probably should be a real 32b ID vs. ptr.
11452// Current users just check for equality
39236c6e 11453#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 11454
2d21ac55 11455 if (look_for_pages) {
3e170ce0
A
11456 submap_info->user_tag = VME_ALIAS(curr_entry);
11457 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11458 submap_info->protection = curr_entry->protection;
11459 submap_info->inheritance = curr_entry->inheritance;
11460 submap_info->max_protection = curr_entry->max_protection;
11461 submap_info->behavior = curr_entry->behavior;
11462 submap_info->user_wired_count = curr_entry->user_wired_count;
11463 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11464 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11465 } else {
3e170ce0
A
11466 short_info->user_tag = VME_ALIAS(curr_entry);
11467 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11468 short_info->protection = curr_entry->protection;
11469 short_info->inheritance = curr_entry->inheritance;
11470 short_info->max_protection = curr_entry->max_protection;
11471 short_info->behavior = curr_entry->behavior;
11472 short_info->user_wired_count = curr_entry->user_wired_count;
11473 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11474 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11475 }
91447636
A
11476
11477 extended.pages_resident = 0;
11478 extended.pages_swapped_out = 0;
11479 extended.pages_shared_now_private = 0;
11480 extended.pages_dirtied = 0;
39236c6e 11481 extended.pages_reusable = 0;
91447636
A
11482 extended.external_pager = 0;
11483 extended.shadow_depth = 0;
3e170ce0
A
11484 extended.share_mode = SM_EMPTY;
11485 extended.ref_count = 0;
91447636
A
11486
11487 if (not_in_kdp) {
11488 if (!curr_entry->is_sub_map) {
6d2010ae
A
11489 vm_map_offset_t range_start, range_end;
11490 range_start = MAX((curr_address - curr_max_below),
11491 curr_entry->vme_start);
11492 range_end = MIN((curr_address + curr_max_above),
11493 curr_entry->vme_end);
91447636 11494 vm_map_region_walk(curr_map,
6d2010ae 11495 range_start,
91447636 11496 curr_entry,
3e170ce0 11497 (VME_OFFSET(curr_entry) +
6d2010ae
A
11498 (range_start -
11499 curr_entry->vme_start)),
11500 range_end - range_start,
2d21ac55 11501 &extended,
39236c6e 11502 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
11503 if (extended.external_pager &&
11504 extended.ref_count == 2 &&
11505 extended.share_mode == SM_SHARED) {
2d21ac55 11506 extended.share_mode = SM_PRIVATE;
91447636 11507 }
91447636
A
11508 } else {
11509 if (curr_entry->use_pmap) {
2d21ac55 11510 extended.share_mode = SM_TRUESHARED;
91447636 11511 } else {
2d21ac55 11512 extended.share_mode = SM_PRIVATE;
91447636 11513 }
3e170ce0 11514 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
91447636
A
11515 }
11516 }
11517
2d21ac55
A
11518 if (look_for_pages) {
11519 submap_info->pages_resident = extended.pages_resident;
11520 submap_info->pages_swapped_out = extended.pages_swapped_out;
11521 submap_info->pages_shared_now_private =
11522 extended.pages_shared_now_private;
11523 submap_info->pages_dirtied = extended.pages_dirtied;
11524 submap_info->external_pager = extended.external_pager;
11525 submap_info->shadow_depth = extended.shadow_depth;
11526 submap_info->share_mode = extended.share_mode;
11527 submap_info->ref_count = extended.ref_count;
39236c6e
A
11528
11529 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11530 submap_info->pages_reusable = extended.pages_reusable;
11531 }
2d21ac55
A
11532 } else {
11533 short_info->external_pager = extended.external_pager;
11534 short_info->shadow_depth = extended.shadow_depth;
11535 short_info->share_mode = extended.share_mode;
11536 short_info->ref_count = extended.ref_count;
11537 }
91447636
A
11538
11539 if (not_in_kdp) {
11540 vm_map_unlock_read(curr_map);
11541 }
11542
11543 return KERN_SUCCESS;
11544}
11545
1c79356b
A
11546/*
11547 * vm_region:
11548 *
11549 * User call to obtain information about a region in
11550 * a task's address map. Currently, only one flavor is
11551 * supported.
11552 *
11553 * XXX The reserved and behavior fields cannot be filled
11554 * in until the vm merge from the IK is completed, and
11555 * vm_reserve is implemented.
1c79356b
A
11556 */
11557
11558kern_return_t
91447636 11559vm_map_region(
1c79356b 11560 vm_map_t map,
91447636
A
11561 vm_map_offset_t *address, /* IN/OUT */
11562 vm_map_size_t *size, /* OUT */
1c79356b
A
11563 vm_region_flavor_t flavor, /* IN */
11564 vm_region_info_t info, /* OUT */
91447636
A
11565 mach_msg_type_number_t *count, /* IN/OUT */
11566 mach_port_t *object_name) /* OUT */
1c79356b
A
11567{
11568 vm_map_entry_t tmp_entry;
1c79356b 11569 vm_map_entry_t entry;
91447636 11570 vm_map_offset_t start;
1c79356b
A
11571
11572 if (map == VM_MAP_NULL)
11573 return(KERN_INVALID_ARGUMENT);
11574
11575 switch (flavor) {
91447636 11576
1c79356b 11577 case VM_REGION_BASIC_INFO:
2d21ac55 11578 /* legacy for old 32-bit objects info */
1c79356b 11579 {
2d21ac55 11580 vm_region_basic_info_t basic;
91447636 11581
2d21ac55
A
11582 if (*count < VM_REGION_BASIC_INFO_COUNT)
11583 return(KERN_INVALID_ARGUMENT);
1c79356b 11584
2d21ac55
A
11585 basic = (vm_region_basic_info_t) info;
11586 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 11587
2d21ac55 11588 vm_map_lock_read(map);
1c79356b 11589
2d21ac55
A
11590 start = *address;
11591 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11592 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11593 vm_map_unlock_read(map);
11594 return(KERN_INVALID_ADDRESS);
11595 }
11596 } else {
11597 entry = tmp_entry;
1c79356b 11598 }
1c79356b 11599
2d21ac55 11600 start = entry->vme_start;
1c79356b 11601
3e170ce0 11602 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
11603 basic->protection = entry->protection;
11604 basic->inheritance = entry->inheritance;
11605 basic->max_protection = entry->max_protection;
11606 basic->behavior = entry->behavior;
11607 basic->user_wired_count = entry->user_wired_count;
11608 basic->reserved = entry->is_sub_map;
11609 *address = start;
11610 *size = (entry->vme_end - start);
91447636 11611
2d21ac55
A
11612 if (object_name) *object_name = IP_NULL;
11613 if (entry->is_sub_map) {
11614 basic->shared = FALSE;
11615 } else {
11616 basic->shared = entry->is_shared;
11617 }
91447636 11618
2d21ac55
A
11619 vm_map_unlock_read(map);
11620 return(KERN_SUCCESS);
91447636
A
11621 }
11622
11623 case VM_REGION_BASIC_INFO_64:
11624 {
2d21ac55 11625 vm_region_basic_info_64_t basic;
91447636 11626
2d21ac55
A
11627 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11628 return(KERN_INVALID_ARGUMENT);
11629
11630 basic = (vm_region_basic_info_64_t) info;
11631 *count = VM_REGION_BASIC_INFO_COUNT_64;
11632
11633 vm_map_lock_read(map);
11634
11635 start = *address;
11636 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11637 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11638 vm_map_unlock_read(map);
11639 return(KERN_INVALID_ADDRESS);
11640 }
11641 } else {
11642 entry = tmp_entry;
11643 }
91447636 11644
2d21ac55 11645 start = entry->vme_start;
91447636 11646
3e170ce0 11647 basic->offset = VME_OFFSET(entry);
2d21ac55
A
11648 basic->protection = entry->protection;
11649 basic->inheritance = entry->inheritance;
11650 basic->max_protection = entry->max_protection;
11651 basic->behavior = entry->behavior;
11652 basic->user_wired_count = entry->user_wired_count;
11653 basic->reserved = entry->is_sub_map;
11654 *address = start;
11655 *size = (entry->vme_end - start);
91447636 11656
2d21ac55
A
11657 if (object_name) *object_name = IP_NULL;
11658 if (entry->is_sub_map) {
11659 basic->shared = FALSE;
11660 } else {
11661 basic->shared = entry->is_shared;
91447636 11662 }
2d21ac55
A
11663
11664 vm_map_unlock_read(map);
11665 return(KERN_SUCCESS);
1c79356b
A
11666 }
11667 case VM_REGION_EXTENDED_INFO:
2d21ac55
A
11668 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11669 return(KERN_INVALID_ARGUMENT);
39236c6e
A
11670 /*fallthru*/
11671 case VM_REGION_EXTENDED_INFO__legacy:
11672 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11673 return KERN_INVALID_ARGUMENT;
11674
11675 {
11676 vm_region_extended_info_t extended;
11677 mach_msg_type_number_t original_count;
1c79356b 11678
2d21ac55 11679 extended = (vm_region_extended_info_t) info;
1c79356b 11680
2d21ac55 11681 vm_map_lock_read(map);
1c79356b 11682
2d21ac55
A
11683 start = *address;
11684 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11685 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11686 vm_map_unlock_read(map);
11687 return(KERN_INVALID_ADDRESS);
11688 }
11689 } else {
11690 entry = tmp_entry;
1c79356b 11691 }
2d21ac55 11692 start = entry->vme_start;
1c79356b 11693
2d21ac55 11694 extended->protection = entry->protection;
3e170ce0 11695 extended->user_tag = VME_ALIAS(entry);
2d21ac55
A
11696 extended->pages_resident = 0;
11697 extended->pages_swapped_out = 0;
11698 extended->pages_shared_now_private = 0;
11699 extended->pages_dirtied = 0;
11700 extended->external_pager = 0;
11701 extended->shadow_depth = 0;
1c79356b 11702
39236c6e
A
11703 original_count = *count;
11704 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11705 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11706 } else {
11707 extended->pages_reusable = 0;
11708 *count = VM_REGION_EXTENDED_INFO_COUNT;
11709 }
11710
3e170ce0 11711 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 11712
2d21ac55
A
11713 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11714 extended->share_mode = SM_PRIVATE;
1c79356b 11715
2d21ac55
A
11716 if (object_name)
11717 *object_name = IP_NULL;
11718 *address = start;
11719 *size = (entry->vme_end - start);
1c79356b 11720
2d21ac55
A
11721 vm_map_unlock_read(map);
11722 return(KERN_SUCCESS);
1c79356b
A
11723 }
11724 case VM_REGION_TOP_INFO:
11725 {
2d21ac55 11726 vm_region_top_info_t top;
1c79356b 11727
2d21ac55
A
11728 if (*count < VM_REGION_TOP_INFO_COUNT)
11729 return(KERN_INVALID_ARGUMENT);
1c79356b 11730
2d21ac55
A
11731 top = (vm_region_top_info_t) info;
11732 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 11733
2d21ac55 11734 vm_map_lock_read(map);
1c79356b 11735
2d21ac55
A
11736 start = *address;
11737 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11738 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11739 vm_map_unlock_read(map);
11740 return(KERN_INVALID_ADDRESS);
11741 }
11742 } else {
11743 entry = tmp_entry;
1c79356b 11744
2d21ac55
A
11745 }
11746 start = entry->vme_start;
1c79356b 11747
2d21ac55
A
11748 top->private_pages_resident = 0;
11749 top->shared_pages_resident = 0;
1c79356b 11750
2d21ac55 11751 vm_map_region_top_walk(entry, top);
1c79356b 11752
2d21ac55
A
11753 if (object_name)
11754 *object_name = IP_NULL;
11755 *address = start;
11756 *size = (entry->vme_end - start);
1c79356b 11757
2d21ac55
A
11758 vm_map_unlock_read(map);
11759 return(KERN_SUCCESS);
1c79356b
A
11760 }
11761 default:
2d21ac55 11762 return(KERN_INVALID_ARGUMENT);
1c79356b
A
11763 }
11764}
11765
b0d623f7
A
11766#define OBJ_RESIDENT_COUNT(obj, entry_size) \
11767 MIN((entry_size), \
11768 ((obj)->all_reusable ? \
11769 (obj)->wired_page_count : \
11770 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 11771
0c530ab8 11772void
91447636
A
11773vm_map_region_top_walk(
11774 vm_map_entry_t entry,
11775 vm_region_top_info_t top)
1c79356b 11776{
1c79356b 11777
3e170ce0 11778 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
11779 top->share_mode = SM_EMPTY;
11780 top->ref_count = 0;
11781 top->obj_id = 0;
11782 return;
1c79356b 11783 }
2d21ac55 11784
91447636 11785 {
2d21ac55
A
11786 struct vm_object *obj, *tmp_obj;
11787 int ref_count;
11788 uint32_t entry_size;
1c79356b 11789
b0d623f7 11790 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 11791
3e170ce0 11792 obj = VME_OBJECT(entry);
1c79356b 11793
2d21ac55
A
11794 vm_object_lock(obj);
11795
11796 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11797 ref_count--;
11798
b0d623f7 11799 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
11800 if (obj->shadow) {
11801 if (ref_count == 1)
b0d623f7
A
11802 top->private_pages_resident =
11803 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 11804 else
b0d623f7
A
11805 top->shared_pages_resident =
11806 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
11807 top->ref_count = ref_count;
11808 top->share_mode = SM_COW;
91447636 11809
2d21ac55
A
11810 while ((tmp_obj = obj->shadow)) {
11811 vm_object_lock(tmp_obj);
11812 vm_object_unlock(obj);
11813 obj = tmp_obj;
1c79356b 11814
2d21ac55
A
11815 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11816 ref_count--;
1c79356b 11817
b0d623f7
A
11818 assert(obj->reusable_page_count <= obj->resident_page_count);
11819 top->shared_pages_resident +=
11820 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
11821 top->ref_count += ref_count - 1;
11822 }
1c79356b 11823 } else {
6d2010ae
A
11824 if (entry->superpage_size) {
11825 top->share_mode = SM_LARGE_PAGE;
11826 top->shared_pages_resident = 0;
11827 top->private_pages_resident = entry_size;
11828 } else if (entry->needs_copy) {
2d21ac55 11829 top->share_mode = SM_COW;
b0d623f7
A
11830 top->shared_pages_resident =
11831 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
11832 } else {
11833 if (ref_count == 1 ||
11834 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11835 top->share_mode = SM_PRIVATE;
39236c6e
A
11836 top->private_pages_resident =
11837 OBJ_RESIDENT_COUNT(obj,
11838 entry_size);
2d21ac55
A
11839 } else {
11840 top->share_mode = SM_SHARED;
b0d623f7
A
11841 top->shared_pages_resident =
11842 OBJ_RESIDENT_COUNT(obj,
11843 entry_size);
2d21ac55
A
11844 }
11845 }
11846 top->ref_count = ref_count;
1c79356b 11847 }
b0d623f7 11848 /* XXX K64: obj_id will be truncated */
39236c6e 11849 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 11850
2d21ac55 11851 vm_object_unlock(obj);
1c79356b 11852 }
91447636
A
11853}
11854
0c530ab8 11855void
91447636
A
11856vm_map_region_walk(
11857 vm_map_t map,
2d21ac55
A
11858 vm_map_offset_t va,
11859 vm_map_entry_t entry,
91447636
A
11860 vm_object_offset_t offset,
11861 vm_object_size_t range,
2d21ac55 11862 vm_region_extended_info_t extended,
39236c6e
A
11863 boolean_t look_for_pages,
11864 mach_msg_type_number_t count)
91447636
A
11865{
11866 register struct vm_object *obj, *tmp_obj;
11867 register vm_map_offset_t last_offset;
11868 register int i;
11869 register int ref_count;
11870 struct vm_object *shadow_object;
11871 int shadow_depth;
11872
3e170ce0 11873 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 11874 (entry->is_sub_map) ||
3e170ce0 11875 (VME_OBJECT(entry)->phys_contiguous &&
6d2010ae 11876 !entry->superpage_size)) {
2d21ac55
A
11877 extended->share_mode = SM_EMPTY;
11878 extended->ref_count = 0;
11879 return;
1c79356b 11880 }
6d2010ae
A
11881
11882 if (entry->superpage_size) {
11883 extended->shadow_depth = 0;
11884 extended->share_mode = SM_LARGE_PAGE;
11885 extended->ref_count = 1;
11886 extended->external_pager = 0;
11887 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11888 extended->shadow_depth = 0;
11889 return;
11890 }
11891
91447636 11892 {
3e170ce0 11893 obj = VME_OBJECT(entry);
2d21ac55
A
11894
11895 vm_object_lock(obj);
11896
11897 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11898 ref_count--;
11899
11900 if (look_for_pages) {
11901 for (last_offset = offset + range;
11902 offset < last_offset;
39236c6e
A
11903 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11904 vm_map_region_look_for_page(map, va, obj,
11905 offset, ref_count,
11906 0, extended, count);
11907 }
b0d623f7
A
11908 } else {
11909 shadow_object = obj->shadow;
11910 shadow_depth = 0;
11911
11912 if ( !(obj->pager_trusted) && !(obj->internal))
11913 extended->external_pager = 1;
11914
11915 if (shadow_object != VM_OBJECT_NULL) {
11916 vm_object_lock(shadow_object);
11917 for (;
11918 shadow_object != VM_OBJECT_NULL;
11919 shadow_depth++) {
11920 vm_object_t next_shadow;
11921
11922 if ( !(shadow_object->pager_trusted) &&
11923 !(shadow_object->internal))
11924 extended->external_pager = 1;
11925
11926 next_shadow = shadow_object->shadow;
11927 if (next_shadow) {
11928 vm_object_lock(next_shadow);
11929 }
11930 vm_object_unlock(shadow_object);
11931 shadow_object = next_shadow;
2d21ac55 11932 }
2d21ac55 11933 }
b0d623f7 11934 extended->shadow_depth = shadow_depth;
2d21ac55 11935 }
2d21ac55
A
11936
11937 if (extended->shadow_depth || entry->needs_copy)
11938 extended->share_mode = SM_COW;
91447636 11939 else {
2d21ac55
A
11940 if (ref_count == 1)
11941 extended->share_mode = SM_PRIVATE;
11942 else {
11943 if (obj->true_share)
11944 extended->share_mode = SM_TRUESHARED;
11945 else
11946 extended->share_mode = SM_SHARED;
11947 }
91447636 11948 }
2d21ac55 11949 extended->ref_count = ref_count - extended->shadow_depth;
91447636 11950
2d21ac55
A
11951 for (i = 0; i < extended->shadow_depth; i++) {
11952 if ((tmp_obj = obj->shadow) == 0)
11953 break;
11954 vm_object_lock(tmp_obj);
11955 vm_object_unlock(obj);
1c79356b 11956
2d21ac55
A
11957 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11958 ref_count--;
1c79356b 11959
2d21ac55
A
11960 extended->ref_count += ref_count;
11961 obj = tmp_obj;
11962 }
11963 vm_object_unlock(obj);
1c79356b 11964
2d21ac55
A
11965 if (extended->share_mode == SM_SHARED) {
11966 register vm_map_entry_t cur;
11967 register vm_map_entry_t last;
11968 int my_refs;
91447636 11969
3e170ce0 11970 obj = VME_OBJECT(entry);
2d21ac55
A
11971 last = vm_map_to_entry(map);
11972 my_refs = 0;
91447636 11973
2d21ac55
A
11974 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11975 ref_count--;
11976 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11977 my_refs += vm_map_region_count_obj_refs(cur, obj);
91447636 11978
2d21ac55
A
11979 if (my_refs == ref_count)
11980 extended->share_mode = SM_PRIVATE_ALIASED;
11981 else if (my_refs > 1)
11982 extended->share_mode = SM_SHARED_ALIASED;
11983 }
91447636 11984 }
1c79356b
A
11985}
11986
1c79356b 11987
91447636
A
11988/* object is locked on entry and locked on return */
11989
11990
11991static void
11992vm_map_region_look_for_page(
11993 __unused vm_map_t map,
2d21ac55
A
11994 __unused vm_map_offset_t va,
11995 vm_object_t object,
11996 vm_object_offset_t offset,
91447636
A
11997 int max_refcnt,
11998 int depth,
39236c6e
A
11999 vm_region_extended_info_t extended,
12000 mach_msg_type_number_t count)
1c79356b 12001{
2d21ac55
A
12002 register vm_page_t p;
12003 register vm_object_t shadow;
12004 register int ref_count;
12005 vm_object_t caller_object;
2d21ac55 12006 kern_return_t kr;
91447636
A
12007 shadow = object->shadow;
12008 caller_object = object;
1c79356b 12009
91447636
A
12010
12011 while (TRUE) {
1c79356b 12012
91447636 12013 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 12014 extended->external_pager = 1;
1c79356b 12015
91447636
A
12016 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12017 if (shadow && (max_refcnt == 1))
12018 extended->pages_shared_now_private++;
1c79356b 12019
39236c6e 12020 if (!p->fictitious &&
91447636
A
12021 (p->dirty || pmap_is_modified(p->phys_page)))
12022 extended->pages_dirtied++;
39236c6e
A
12023 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12024 if (p->reusable || p->object->all_reusable) {
12025 extended->pages_reusable++;
12026 }
12027 }
1c79356b 12028
39236c6e 12029 extended->pages_resident++;
91447636
A
12030
12031 if(object != caller_object)
2d21ac55 12032 vm_object_unlock(object);
91447636
A
12033
12034 return;
1c79356b 12035 }
2d21ac55 12036#if MACH_PAGEMAP
91447636
A
12037 if (object->existence_map) {
12038 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
1c79356b 12039
91447636 12040 extended->pages_swapped_out++;
1c79356b 12041
91447636 12042 if(object != caller_object)
2d21ac55 12043 vm_object_unlock(object);
1c79356b 12044
91447636
A
12045 return;
12046 }
39236c6e
A
12047 } else
12048#endif /* MACH_PAGEMAP */
12049 if (object->internal &&
12050 object->alive &&
12051 !object->terminating &&
12052 object->pager_ready) {
12053
12054 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12055 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
12056 offset)
12057 == VM_EXTERNAL_STATE_EXISTS) {
12058 /* the pager has that page */
12059 extended->pages_swapped_out++;
12060 if (object != caller_object)
12061 vm_object_unlock(object);
12062 return;
12063 }
12064 } else {
12065 memory_object_t pager;
2d21ac55 12066
39236c6e
A
12067 vm_object_paging_begin(object);
12068 pager = object->pager;
12069 vm_object_unlock(object);
2d21ac55 12070
39236c6e
A
12071 kr = memory_object_data_request(
12072 pager,
12073 offset + object->paging_offset,
12074 0, /* just poke the pager */
12075 VM_PROT_READ,
12076 NULL);
2d21ac55 12077
39236c6e
A
12078 vm_object_lock(object);
12079 vm_object_paging_end(object);
12080
12081 if (kr == KERN_SUCCESS) {
12082 /* the pager has that page */
12083 extended->pages_swapped_out++;
12084 if (object != caller_object)
12085 vm_object_unlock(object);
12086 return;
12087 }
2d21ac55 12088 }
1c79356b 12089 }
2d21ac55 12090
91447636 12091 if (shadow) {
2d21ac55 12092 vm_object_lock(shadow);
1c79356b 12093
91447636
A
12094 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12095 ref_count--;
1c79356b 12096
91447636
A
12097 if (++depth > extended->shadow_depth)
12098 extended->shadow_depth = depth;
1c79356b 12099
91447636
A
12100 if (ref_count > max_refcnt)
12101 max_refcnt = ref_count;
12102
12103 if(object != caller_object)
2d21ac55 12104 vm_object_unlock(object);
91447636 12105
6d2010ae 12106 offset = offset + object->vo_shadow_offset;
91447636
A
12107 object = shadow;
12108 shadow = object->shadow;
12109 continue;
1c79356b 12110 }
91447636 12111 if(object != caller_object)
2d21ac55 12112 vm_object_unlock(object);
91447636
A
12113 break;
12114 }
12115}
1c79356b 12116
91447636
A
12117static int
12118vm_map_region_count_obj_refs(
12119 vm_map_entry_t entry,
12120 vm_object_t object)
12121{
12122 register int ref_count;
12123 register vm_object_t chk_obj;
12124 register vm_object_t tmp_obj;
1c79356b 12125
3e170ce0 12126 if (VME_OBJECT(entry) == 0)
2d21ac55 12127 return(0);
1c79356b 12128
91447636 12129 if (entry->is_sub_map)
2d21ac55 12130 return(0);
91447636 12131 else {
2d21ac55 12132 ref_count = 0;
1c79356b 12133
3e170ce0 12134 chk_obj = VME_OBJECT(entry);
2d21ac55 12135 vm_object_lock(chk_obj);
1c79356b 12136
2d21ac55
A
12137 while (chk_obj) {
12138 if (chk_obj == object)
12139 ref_count++;
12140 tmp_obj = chk_obj->shadow;
12141 if (tmp_obj)
12142 vm_object_lock(tmp_obj);
12143 vm_object_unlock(chk_obj);
1c79356b 12144
2d21ac55
A
12145 chk_obj = tmp_obj;
12146 }
1c79356b 12147 }
91447636 12148 return(ref_count);
1c79356b
A
12149}
12150
12151
12152/*
91447636
A
12153 * Routine: vm_map_simplify
12154 *
12155 * Description:
12156 * Attempt to simplify the map representation in
12157 * the vicinity of the given starting address.
12158 * Note:
12159 * This routine is intended primarily to keep the
12160 * kernel maps more compact -- they generally don't
12161 * benefit from the "expand a map entry" technology
12162 * at allocation time because the adjacent entry
12163 * is often wired down.
1c79356b 12164 */
91447636
A
12165void
12166vm_map_simplify_entry(
12167 vm_map_t map,
12168 vm_map_entry_t this_entry)
1c79356b 12169{
91447636 12170 vm_map_entry_t prev_entry;
1c79356b 12171
91447636 12172 counter(c_vm_map_simplify_entry_called++);
1c79356b 12173
91447636 12174 prev_entry = this_entry->vme_prev;
1c79356b 12175
91447636 12176 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 12177 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 12178
91447636 12179 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 12180
2d21ac55 12181 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
12182 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12183 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
91447636 12184 prev_entry->vme_start))
3e170ce0 12185 == VME_OFFSET(this_entry)) &&
1c79356b 12186
fe8ab488
A
12187 (prev_entry->behavior == this_entry->behavior) &&
12188 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
12189 (prev_entry->protection == this_entry->protection) &&
12190 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
12191 (prev_entry->inheritance == this_entry->inheritance) &&
12192 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 12193 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 12194 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
12195 (prev_entry->permanent == this_entry->permanent) &&
12196 (prev_entry->map_aligned == this_entry->map_aligned) &&
12197 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12198 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12199 /* from_reserved_zone: OK if that field doesn't match */
12200 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0
A
12201 (prev_entry->vme_resilient_codesign ==
12202 this_entry->vme_resilient_codesign) &&
12203 (prev_entry->vme_resilient_media ==
12204 this_entry->vme_resilient_media) &&
fe8ab488 12205
91447636
A
12206 (prev_entry->wired_count == this_entry->wired_count) &&
12207 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 12208
91447636
A
12209 (prev_entry->in_transition == FALSE) &&
12210 (this_entry->in_transition == FALSE) &&
12211 (prev_entry->needs_wakeup == FALSE) &&
12212 (this_entry->needs_wakeup == FALSE) &&
12213 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
12214 (this_entry->is_shared == FALSE) &&
12215 (prev_entry->superpage_size == FALSE) &&
12216 (this_entry->superpage_size == FALSE)
2d21ac55 12217 ) {
316670eb 12218 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 12219 assert(prev_entry->vme_start < this_entry->vme_end);
39236c6e
A
12220 if (prev_entry->map_aligned)
12221 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12222 VM_MAP_PAGE_MASK(map)));
91447636 12223 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
12224 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12225
12226 if (map->holelistenabled) {
12227 vm_map_store_update_first_free(map, this_entry, TRUE);
12228 }
12229
2d21ac55 12230 if (prev_entry->is_sub_map) {
3e170ce0 12231 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 12232 } else {
3e170ce0 12233 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 12234 }
91447636 12235 vm_map_entry_dispose(map, prev_entry);
0c530ab8 12236 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 12237 counter(c_vm_map_simplified++);
1c79356b 12238 }
91447636 12239}
1c79356b 12240
91447636
A
12241void
12242vm_map_simplify(
12243 vm_map_t map,
12244 vm_map_offset_t start)
12245{
12246 vm_map_entry_t this_entry;
1c79356b 12247
91447636
A
12248 vm_map_lock(map);
12249 if (vm_map_lookup_entry(map, start, &this_entry)) {
12250 vm_map_simplify_entry(map, this_entry);
12251 vm_map_simplify_entry(map, this_entry->vme_next);
12252 }
12253 counter(c_vm_map_simplify_called++);
12254 vm_map_unlock(map);
12255}
1c79356b 12256
91447636
A
12257static void
12258vm_map_simplify_range(
12259 vm_map_t map,
12260 vm_map_offset_t start,
12261 vm_map_offset_t end)
12262{
12263 vm_map_entry_t entry;
1c79356b 12264
91447636
A
12265 /*
12266 * The map should be locked (for "write") by the caller.
12267 */
1c79356b 12268
91447636
A
12269 if (start >= end) {
12270 /* invalid address range */
12271 return;
12272 }
1c79356b 12273
39236c6e
A
12274 start = vm_map_trunc_page(start,
12275 VM_MAP_PAGE_MASK(map));
12276 end = vm_map_round_page(end,
12277 VM_MAP_PAGE_MASK(map));
2d21ac55 12278
91447636
A
12279 if (!vm_map_lookup_entry(map, start, &entry)) {
12280 /* "start" is not mapped and "entry" ends before "start" */
12281 if (entry == vm_map_to_entry(map)) {
12282 /* start with first entry in the map */
12283 entry = vm_map_first_entry(map);
12284 } else {
12285 /* start with next entry */
12286 entry = entry->vme_next;
12287 }
12288 }
12289
12290 while (entry != vm_map_to_entry(map) &&
12291 entry->vme_start <= end) {
12292 /* try and coalesce "entry" with its previous entry */
12293 vm_map_simplify_entry(map, entry);
12294 entry = entry->vme_next;
12295 }
12296}
1c79356b 12297
1c79356b 12298
91447636
A
12299/*
12300 * Routine: vm_map_machine_attribute
12301 * Purpose:
12302 * Provide machine-specific attributes to mappings,
12303 * such as cachability etc. for machines that provide
12304 * them. NUMA architectures and machines with big/strange
12305 * caches will use this.
12306 * Note:
12307 * Responsibilities for locking and checking are handled here,
12308 * everything else in the pmap module. If any non-volatile
12309 * information must be kept, the pmap module should handle
12310 * it itself. [This assumes that attributes do not
12311 * need to be inherited, which seems ok to me]
12312 */
12313kern_return_t
12314vm_map_machine_attribute(
12315 vm_map_t map,
12316 vm_map_offset_t start,
12317 vm_map_offset_t end,
12318 vm_machine_attribute_t attribute,
12319 vm_machine_attribute_val_t* value) /* IN/OUT */
12320{
12321 kern_return_t ret;
12322 vm_map_size_t sync_size;
12323 vm_map_entry_t entry;
12324
12325 if (start < vm_map_min(map) || end > vm_map_max(map))
12326 return KERN_INVALID_ADDRESS;
1c79356b 12327
91447636
A
12328 /* Figure how much memory we need to flush (in page increments) */
12329 sync_size = end - start;
1c79356b 12330
91447636
A
12331 vm_map_lock(map);
12332
12333 if (attribute != MATTR_CACHE) {
12334 /* If we don't have to find physical addresses, we */
12335 /* don't have to do an explicit traversal here. */
12336 ret = pmap_attribute(map->pmap, start, end-start,
12337 attribute, value);
12338 vm_map_unlock(map);
12339 return ret;
12340 }
1c79356b 12341
91447636 12342 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 12343
91447636
A
12344 while(sync_size) {
12345 if (vm_map_lookup_entry(map, start, &entry)) {
12346 vm_map_size_t sub_size;
12347 if((entry->vme_end - start) > sync_size) {
12348 sub_size = sync_size;
12349 sync_size = 0;
12350 } else {
12351 sub_size = entry->vme_end - start;
2d21ac55 12352 sync_size -= sub_size;
91447636
A
12353 }
12354 if(entry->is_sub_map) {
12355 vm_map_offset_t sub_start;
12356 vm_map_offset_t sub_end;
1c79356b 12357
91447636 12358 sub_start = (start - entry->vme_start)
3e170ce0 12359 + VME_OFFSET(entry);
91447636
A
12360 sub_end = sub_start + sub_size;
12361 vm_map_machine_attribute(
3e170ce0 12362 VME_SUBMAP(entry),
91447636
A
12363 sub_start,
12364 sub_end,
12365 attribute, value);
12366 } else {
3e170ce0 12367 if (VME_OBJECT(entry)) {
91447636
A
12368 vm_page_t m;
12369 vm_object_t object;
12370 vm_object_t base_object;
12371 vm_object_t last_object;
12372 vm_object_offset_t offset;
12373 vm_object_offset_t base_offset;
12374 vm_map_size_t range;
12375 range = sub_size;
12376 offset = (start - entry->vme_start)
3e170ce0 12377 + VME_OFFSET(entry);
91447636 12378 base_offset = offset;
3e170ce0 12379 object = VME_OBJECT(entry);
91447636
A
12380 base_object = object;
12381 last_object = NULL;
1c79356b 12382
91447636 12383 vm_object_lock(object);
1c79356b 12384
91447636
A
12385 while (range) {
12386 m = vm_page_lookup(
12387 object, offset);
1c79356b 12388
91447636
A
12389 if (m && !m->fictitious) {
12390 ret =
2d21ac55
A
12391 pmap_attribute_cache_sync(
12392 m->phys_page,
12393 PAGE_SIZE,
12394 attribute, value);
91447636
A
12395
12396 } else if (object->shadow) {
6d2010ae 12397 offset = offset + object->vo_shadow_offset;
91447636
A
12398 last_object = object;
12399 object = object->shadow;
12400 vm_object_lock(last_object->shadow);
12401 vm_object_unlock(last_object);
12402 continue;
12403 }
12404 range -= PAGE_SIZE;
1c79356b 12405
91447636
A
12406 if (base_object != object) {
12407 vm_object_unlock(object);
12408 vm_object_lock(base_object);
12409 object = base_object;
12410 }
12411 /* Bump to the next page */
12412 base_offset += PAGE_SIZE;
12413 offset = base_offset;
12414 }
12415 vm_object_unlock(object);
12416 }
12417 }
12418 start += sub_size;
12419 } else {
12420 vm_map_unlock(map);
12421 return KERN_FAILURE;
12422 }
12423
1c79356b 12424 }
e5568f75 12425
91447636 12426 vm_map_unlock(map);
e5568f75 12427
91447636
A
12428 return ret;
12429}
e5568f75 12430
91447636
A
12431/*
12432 * vm_map_behavior_set:
12433 *
12434 * Sets the paging reference behavior of the specified address
12435 * range in the target map. Paging reference behavior affects
12436 * how pagein operations resulting from faults on the map will be
12437 * clustered.
12438 */
12439kern_return_t
12440vm_map_behavior_set(
12441 vm_map_t map,
12442 vm_map_offset_t start,
12443 vm_map_offset_t end,
12444 vm_behavior_t new_behavior)
12445{
12446 register vm_map_entry_t entry;
12447 vm_map_entry_t temp_entry;
e5568f75 12448
91447636 12449 XPR(XPR_VM_MAP,
2d21ac55 12450 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 12451 map, start, end, new_behavior, 0);
e5568f75 12452
6d2010ae
A
12453 if (start > end ||
12454 start < vm_map_min(map) ||
12455 end > vm_map_max(map)) {
12456 return KERN_NO_SPACE;
12457 }
12458
91447636 12459 switch (new_behavior) {
b0d623f7
A
12460
12461 /*
12462 * This first block of behaviors all set a persistent state on the specified
12463 * memory range. All we have to do here is to record the desired behavior
12464 * in the vm_map_entry_t's.
12465 */
12466
91447636
A
12467 case VM_BEHAVIOR_DEFAULT:
12468 case VM_BEHAVIOR_RANDOM:
12469 case VM_BEHAVIOR_SEQUENTIAL:
12470 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
12471 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12472 vm_map_lock(map);
12473
12474 /*
12475 * The entire address range must be valid for the map.
12476 * Note that vm_map_range_check() does a
12477 * vm_map_lookup_entry() internally and returns the
12478 * entry containing the start of the address range if
12479 * the entire range is valid.
12480 */
12481 if (vm_map_range_check(map, start, end, &temp_entry)) {
12482 entry = temp_entry;
12483 vm_map_clip_start(map, entry, start);
12484 }
12485 else {
12486 vm_map_unlock(map);
12487 return(KERN_INVALID_ADDRESS);
12488 }
12489
12490 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12491 vm_map_clip_end(map, entry, end);
fe8ab488
A
12492 if (entry->is_sub_map) {
12493 assert(!entry->use_pmap);
12494 }
b0d623f7
A
12495
12496 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12497 entry->zero_wired_pages = TRUE;
12498 } else {
12499 entry->behavior = new_behavior;
12500 }
12501 entry = entry->vme_next;
12502 }
12503
12504 vm_map_unlock(map);
91447636 12505 break;
b0d623f7
A
12506
12507 /*
12508 * The rest of these are different from the above in that they cause
12509 * an immediate action to take place as opposed to setting a behavior that
12510 * affects future actions.
12511 */
12512
91447636 12513 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
12514 return vm_map_willneed(map, start, end);
12515
91447636 12516 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
12517 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12518
12519 case VM_BEHAVIOR_FREE:
12520 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12521
12522 case VM_BEHAVIOR_REUSABLE:
12523 return vm_map_reusable_pages(map, start, end);
12524
12525 case VM_BEHAVIOR_REUSE:
12526 return vm_map_reuse_pages(map, start, end);
12527
12528 case VM_BEHAVIOR_CAN_REUSE:
12529 return vm_map_can_reuse(map, start, end);
12530
3e170ce0
A
12531#if MACH_ASSERT
12532 case VM_BEHAVIOR_PAGEOUT:
12533 return vm_map_pageout(map, start, end);
12534#endif /* MACH_ASSERT */
12535
1c79356b 12536 default:
91447636 12537 return(KERN_INVALID_ARGUMENT);
1c79356b 12538 }
1c79356b 12539
b0d623f7
A
12540 return(KERN_SUCCESS);
12541}
12542
12543
12544/*
12545 * Internals for madvise(MADV_WILLNEED) system call.
12546 *
12547 * The present implementation is to do a read-ahead if the mapping corresponds
12548 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
12549 * and basically ignore the "advice" (which we are always free to do).
12550 */
12551
12552
12553static kern_return_t
12554vm_map_willneed(
12555 vm_map_t map,
12556 vm_map_offset_t start,
12557 vm_map_offset_t end
12558)
12559{
12560 vm_map_entry_t entry;
12561 vm_object_t object;
12562 memory_object_t pager;
12563 struct vm_object_fault_info fault_info;
12564 kern_return_t kr;
12565 vm_object_size_t len;
12566 vm_object_offset_t offset;
1c79356b 12567
91447636 12568 /*
b0d623f7
A
12569 * Fill in static values in fault_info. Several fields get ignored by the code
12570 * we call, but we'll fill them in anyway since uninitialized fields are bad
12571 * when it comes to future backwards compatibility.
91447636 12572 */
b0d623f7
A
12573
12574 fault_info.interruptible = THREAD_UNINT; /* ignored value */
12575 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
12576 fault_info.no_cache = FALSE; /* ignored value */
12577 fault_info.stealth = TRUE;
6d2010ae
A
12578 fault_info.io_sync = FALSE;
12579 fault_info.cs_bypass = FALSE;
0b4c1975 12580 fault_info.mark_zf_absent = FALSE;
316670eb 12581 fault_info.batch_pmap_op = FALSE;
b0d623f7
A
12582
12583 /*
12584 * The MADV_WILLNEED operation doesn't require any changes to the
12585 * vm_map_entry_t's, so the read lock is sufficient.
12586 */
12587
12588 vm_map_lock_read(map);
12589
12590 /*
12591 * The madvise semantics require that the address range be fully
12592 * allocated with no holes. Otherwise, we're required to return
12593 * an error.
12594 */
12595
6d2010ae
A
12596 if (! vm_map_range_check(map, start, end, &entry)) {
12597 vm_map_unlock_read(map);
12598 return KERN_INVALID_ADDRESS;
12599 }
b0d623f7 12600
6d2010ae
A
12601 /*
12602 * Examine each vm_map_entry_t in the range.
12603 */
12604 for (; entry != vm_map_to_entry(map) && start < end; ) {
12605
b0d623f7 12606 /*
6d2010ae
A
12607 * The first time through, the start address could be anywhere
12608 * within the vm_map_entry we found. So adjust the offset to
12609 * correspond. After that, the offset will always be zero to
12610 * correspond to the beginning of the current vm_map_entry.
b0d623f7 12611 */
3e170ce0 12612 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 12613
6d2010ae
A
12614 /*
12615 * Set the length so we don't go beyond the end of the
12616 * map_entry or beyond the end of the range we were given.
12617 * This range could span also multiple map entries all of which
12618 * map different files, so make sure we only do the right amount
12619 * of I/O for each object. Note that it's possible for there
12620 * to be multiple map entries all referring to the same object
12621 * but with different page permissions, but it's not worth
12622 * trying to optimize that case.
12623 */
12624 len = MIN(entry->vme_end - start, end - start);
b0d623f7 12625
6d2010ae
A
12626 if ((vm_size_t) len != len) {
12627 /* 32-bit overflow */
12628 len = (vm_size_t) (0 - PAGE_SIZE);
12629 }
12630 fault_info.cluster_size = (vm_size_t) len;
12631 fault_info.lo_offset = offset;
12632 fault_info.hi_offset = offset + len;
3e170ce0 12633 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
12634 fault_info.pmap_options = 0;
12635 if (entry->iokit_acct ||
12636 (!entry->is_sub_map && !entry->use_pmap)) {
12637 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12638 }
b0d623f7 12639
6d2010ae
A
12640 /*
12641 * If there's no read permission to this mapping, then just
12642 * skip it.
12643 */
12644 if ((entry->protection & VM_PROT_READ) == 0) {
12645 entry = entry->vme_next;
12646 start = entry->vme_start;
12647 continue;
12648 }
b0d623f7 12649
6d2010ae
A
12650 /*
12651 * Find the file object backing this map entry. If there is
12652 * none, then we simply ignore the "will need" advice for this
12653 * entry and go on to the next one.
12654 */
12655 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12656 entry = entry->vme_next;
12657 start = entry->vme_start;
12658 continue;
12659 }
b0d623f7 12660
6d2010ae
A
12661 /*
12662 * The data_request() could take a long time, so let's
12663 * release the map lock to avoid blocking other threads.
12664 */
12665 vm_map_unlock_read(map);
b0d623f7 12666
6d2010ae
A
12667 vm_object_paging_begin(object);
12668 pager = object->pager;
12669 vm_object_unlock(object);
b0d623f7 12670
6d2010ae
A
12671 /*
12672 * Get the data from the object asynchronously.
12673 *
12674 * Note that memory_object_data_request() places limits on the
12675 * amount of I/O it will do. Regardless of the len we
fe8ab488 12676 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
6d2010ae
A
12677 * silently truncates the len to that size. This isn't
12678 * necessarily bad since madvise shouldn't really be used to
12679 * page in unlimited amounts of data. Other Unix variants
12680 * limit the willneed case as well. If this turns out to be an
12681 * issue for developers, then we can always adjust the policy
12682 * here and still be backwards compatible since this is all
12683 * just "advice".
12684 */
12685 kr = memory_object_data_request(
12686 pager,
12687 offset + object->paging_offset,
12688 0, /* ignored */
12689 VM_PROT_READ,
12690 (memory_object_fault_info_t)&fault_info);
b0d623f7 12691
6d2010ae
A
12692 vm_object_lock(object);
12693 vm_object_paging_end(object);
12694 vm_object_unlock(object);
b0d623f7 12695
6d2010ae
A
12696 /*
12697 * If we couldn't do the I/O for some reason, just give up on
12698 * the madvise. We still return success to the user since
12699 * madvise isn't supposed to fail when the advice can't be
12700 * taken.
12701 */
12702 if (kr != KERN_SUCCESS) {
12703 return KERN_SUCCESS;
12704 }
b0d623f7 12705
6d2010ae
A
12706 start += len;
12707 if (start >= end) {
12708 /* done */
12709 return KERN_SUCCESS;
12710 }
b0d623f7 12711
6d2010ae
A
12712 /* look up next entry */
12713 vm_map_lock_read(map);
12714 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 12715 /*
6d2010ae 12716 * There's a new hole in the address range.
b0d623f7 12717 */
6d2010ae
A
12718 vm_map_unlock_read(map);
12719 return KERN_INVALID_ADDRESS;
b0d623f7 12720 }
6d2010ae 12721 }
b0d623f7
A
12722
12723 vm_map_unlock_read(map);
6d2010ae 12724 return KERN_SUCCESS;
b0d623f7
A
12725}
12726
12727static boolean_t
12728vm_map_entry_is_reusable(
12729 vm_map_entry_t entry)
12730{
3e170ce0
A
12731 /* Only user map entries */
12732
b0d623f7
A
12733 vm_object_t object;
12734
2dced7af
A
12735 if (entry->is_sub_map) {
12736 return FALSE;
12737 }
12738
3e170ce0 12739 switch (VME_ALIAS(entry)) {
39236c6e
A
12740 case VM_MEMORY_MALLOC:
12741 case VM_MEMORY_MALLOC_SMALL:
12742 case VM_MEMORY_MALLOC_LARGE:
12743 case VM_MEMORY_REALLOC:
12744 case VM_MEMORY_MALLOC_TINY:
12745 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12746 case VM_MEMORY_MALLOC_LARGE_REUSED:
12747 /*
12748 * This is a malloc() memory region: check if it's still
12749 * in its original state and can be re-used for more
12750 * malloc() allocations.
12751 */
12752 break;
12753 default:
12754 /*
12755 * Not a malloc() memory region: let the caller decide if
12756 * it's re-usable.
12757 */
12758 return TRUE;
12759 }
12760
b0d623f7
A
12761 if (entry->is_shared ||
12762 entry->is_sub_map ||
12763 entry->in_transition ||
12764 entry->protection != VM_PROT_DEFAULT ||
12765 entry->max_protection != VM_PROT_ALL ||
12766 entry->inheritance != VM_INHERIT_DEFAULT ||
12767 entry->no_cache ||
12768 entry->permanent ||
39236c6e 12769 entry->superpage_size != FALSE ||
b0d623f7
A
12770 entry->zero_wired_pages ||
12771 entry->wired_count != 0 ||
12772 entry->user_wired_count != 0) {
12773 return FALSE;
91447636 12774 }
b0d623f7 12775
3e170ce0 12776 object = VME_OBJECT(entry);
b0d623f7
A
12777 if (object == VM_OBJECT_NULL) {
12778 return TRUE;
12779 }
316670eb
A
12780 if (
12781#if 0
12782 /*
12783 * Let's proceed even if the VM object is potentially
12784 * shared.
12785 * We check for this later when processing the actual
12786 * VM pages, so the contents will be safe if shared.
12787 *
12788 * But we can still mark this memory region as "reusable" to
12789 * acknowledge that the caller did let us know that the memory
12790 * could be re-used and should not be penalized for holding
12791 * on to it. This allows its "resident size" to not include
12792 * the reusable range.
12793 */
12794 object->ref_count == 1 &&
12795#endif
b0d623f7
A
12796 object->wired_page_count == 0 &&
12797 object->copy == VM_OBJECT_NULL &&
12798 object->shadow == VM_OBJECT_NULL &&
12799 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12800 object->internal &&
12801 !object->true_share &&
6d2010ae 12802 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
12803 !object->code_signed) {
12804 return TRUE;
1c79356b 12805 }
b0d623f7
A
12806 return FALSE;
12807
12808
12809}
1c79356b 12810
b0d623f7
A
12811static kern_return_t
12812vm_map_reuse_pages(
12813 vm_map_t map,
12814 vm_map_offset_t start,
12815 vm_map_offset_t end)
12816{
12817 vm_map_entry_t entry;
12818 vm_object_t object;
12819 vm_object_offset_t start_offset, end_offset;
12820
12821 /*
12822 * The MADV_REUSE operation doesn't require any changes to the
12823 * vm_map_entry_t's, so the read lock is sufficient.
12824 */
0b4e3aa0 12825
b0d623f7 12826 vm_map_lock_read(map);
3e170ce0 12827 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 12828
b0d623f7
A
12829 /*
12830 * The madvise semantics require that the address range be fully
12831 * allocated with no holes. Otherwise, we're required to return
12832 * an error.
12833 */
12834
12835 if (!vm_map_range_check(map, start, end, &entry)) {
12836 vm_map_unlock_read(map);
12837 vm_page_stats_reusable.reuse_pages_failure++;
12838 return KERN_INVALID_ADDRESS;
1c79356b 12839 }
91447636 12840
b0d623f7
A
12841 /*
12842 * Examine each vm_map_entry_t in the range.
12843 */
12844 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12845 entry = entry->vme_next) {
12846 /*
12847 * Sanity check on the VM map entry.
12848 */
12849 if (! vm_map_entry_is_reusable(entry)) {
12850 vm_map_unlock_read(map);
12851 vm_page_stats_reusable.reuse_pages_failure++;
12852 return KERN_INVALID_ADDRESS;
12853 }
12854
12855 /*
12856 * The first time through, the start address could be anywhere
12857 * within the vm_map_entry we found. So adjust the offset to
12858 * correspond.
12859 */
12860 if (entry->vme_start < start) {
12861 start_offset = start - entry->vme_start;
12862 } else {
12863 start_offset = 0;
12864 }
12865 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
12866 start_offset += VME_OFFSET(entry);
12867 end_offset += VME_OFFSET(entry);
b0d623f7 12868
2dced7af 12869 assert(!entry->is_sub_map);
3e170ce0 12870 object = VME_OBJECT(entry);
b0d623f7
A
12871 if (object != VM_OBJECT_NULL) {
12872 vm_object_lock(object);
12873 vm_object_reuse_pages(object, start_offset, end_offset,
12874 TRUE);
12875 vm_object_unlock(object);
12876 }
12877
3e170ce0 12878 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
12879 /*
12880 * XXX
12881 * We do not hold the VM map exclusively here.
12882 * The "alias" field is not that critical, so it's
12883 * safe to update it here, as long as it is the only
12884 * one that can be modified while holding the VM map
12885 * "shared".
12886 */
3e170ce0 12887 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
12888 }
12889 }
12890
12891 vm_map_unlock_read(map);
12892 vm_page_stats_reusable.reuse_pages_success++;
12893 return KERN_SUCCESS;
1c79356b
A
12894}
12895
1c79356b 12896
b0d623f7
A
12897static kern_return_t
12898vm_map_reusable_pages(
12899 vm_map_t map,
12900 vm_map_offset_t start,
12901 vm_map_offset_t end)
12902{
12903 vm_map_entry_t entry;
12904 vm_object_t object;
12905 vm_object_offset_t start_offset, end_offset;
3e170ce0 12906 vm_map_offset_t pmap_offset;
b0d623f7
A
12907
12908 /*
12909 * The MADV_REUSABLE operation doesn't require any changes to the
12910 * vm_map_entry_t's, so the read lock is sufficient.
12911 */
12912
12913 vm_map_lock_read(map);
3e170ce0 12914 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
12915
12916 /*
12917 * The madvise semantics require that the address range be fully
12918 * allocated with no holes. Otherwise, we're required to return
12919 * an error.
12920 */
12921
12922 if (!vm_map_range_check(map, start, end, &entry)) {
12923 vm_map_unlock_read(map);
12924 vm_page_stats_reusable.reusable_pages_failure++;
12925 return KERN_INVALID_ADDRESS;
12926 }
12927
12928 /*
12929 * Examine each vm_map_entry_t in the range.
12930 */
12931 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12932 entry = entry->vme_next) {
12933 int kill_pages = 0;
12934
12935 /*
12936 * Sanity check on the VM map entry.
12937 */
12938 if (! vm_map_entry_is_reusable(entry)) {
12939 vm_map_unlock_read(map);
12940 vm_page_stats_reusable.reusable_pages_failure++;
12941 return KERN_INVALID_ADDRESS;
12942 }
12943
12944 /*
12945 * The first time through, the start address could be anywhere
12946 * within the vm_map_entry we found. So adjust the offset to
12947 * correspond.
12948 */
12949 if (entry->vme_start < start) {
12950 start_offset = start - entry->vme_start;
3e170ce0 12951 pmap_offset = start;
b0d623f7
A
12952 } else {
12953 start_offset = 0;
3e170ce0 12954 pmap_offset = entry->vme_start;
b0d623f7
A
12955 }
12956 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
12957 start_offset += VME_OFFSET(entry);
12958 end_offset += VME_OFFSET(entry);
b0d623f7 12959
2dced7af 12960 assert(!entry->is_sub_map);
3e170ce0 12961 object = VME_OBJECT(entry);
b0d623f7
A
12962 if (object == VM_OBJECT_NULL)
12963 continue;
12964
12965
12966 vm_object_lock(object);
fe8ab488
A
12967 if (object->ref_count == 1 &&
12968 !object->shadow &&
12969 /*
12970 * "iokit_acct" entries are billed for their virtual size
12971 * (rather than for their resident pages only), so they
12972 * wouldn't benefit from making pages reusable, and it
12973 * would be hard to keep track of pages that are both
12974 * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12975 */
12976 !(entry->iokit_acct ||
12977 (!entry->is_sub_map && !entry->use_pmap)))
b0d623f7
A
12978 kill_pages = 1;
12979 else
12980 kill_pages = -1;
12981 if (kill_pages != -1) {
12982 vm_object_deactivate_pages(object,
12983 start_offset,
12984 end_offset - start_offset,
12985 kill_pages,
3e170ce0
A
12986 TRUE /*reusable_pages*/,
12987 map->pmap,
12988 pmap_offset);
b0d623f7
A
12989 } else {
12990 vm_page_stats_reusable.reusable_pages_shared++;
12991 }
12992 vm_object_unlock(object);
12993
3e170ce0
A
12994 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
12995 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
12996 /*
12997 * XXX
12998 * We do not hold the VM map exclusively here.
12999 * The "alias" field is not that critical, so it's
13000 * safe to update it here, as long as it is the only
13001 * one that can be modified while holding the VM map
13002 * "shared".
13003 */
3e170ce0 13004 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
13005 }
13006 }
13007
13008 vm_map_unlock_read(map);
13009 vm_page_stats_reusable.reusable_pages_success++;
13010 return KERN_SUCCESS;
13011}
13012
13013
13014static kern_return_t
13015vm_map_can_reuse(
13016 vm_map_t map,
13017 vm_map_offset_t start,
13018 vm_map_offset_t end)
13019{
13020 vm_map_entry_t entry;
13021
13022 /*
13023 * The MADV_REUSABLE operation doesn't require any changes to the
13024 * vm_map_entry_t's, so the read lock is sufficient.
13025 */
13026
13027 vm_map_lock_read(map);
3e170ce0 13028 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13029
13030 /*
13031 * The madvise semantics require that the address range be fully
13032 * allocated with no holes. Otherwise, we're required to return
13033 * an error.
13034 */
13035
13036 if (!vm_map_range_check(map, start, end, &entry)) {
13037 vm_map_unlock_read(map);
13038 vm_page_stats_reusable.can_reuse_failure++;
13039 return KERN_INVALID_ADDRESS;
13040 }
13041
13042 /*
13043 * Examine each vm_map_entry_t in the range.
13044 */
13045 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13046 entry = entry->vme_next) {
13047 /*
13048 * Sanity check on the VM map entry.
13049 */
13050 if (! vm_map_entry_is_reusable(entry)) {
13051 vm_map_unlock_read(map);
13052 vm_page_stats_reusable.can_reuse_failure++;
13053 return KERN_INVALID_ADDRESS;
13054 }
13055 }
13056
13057 vm_map_unlock_read(map);
13058 vm_page_stats_reusable.can_reuse_success++;
13059 return KERN_SUCCESS;
13060}
13061
13062
3e170ce0
A
13063#if MACH_ASSERT
13064static kern_return_t
13065vm_map_pageout(
13066 vm_map_t map,
13067 vm_map_offset_t start,
13068 vm_map_offset_t end)
13069{
13070 vm_map_entry_t entry;
13071
13072 /*
13073 * The MADV_PAGEOUT operation doesn't require any changes to the
13074 * vm_map_entry_t's, so the read lock is sufficient.
13075 */
13076
13077 vm_map_lock_read(map);
13078
13079 /*
13080 * The madvise semantics require that the address range be fully
13081 * allocated with no holes. Otherwise, we're required to return
13082 * an error.
13083 */
13084
13085 if (!vm_map_range_check(map, start, end, &entry)) {
13086 vm_map_unlock_read(map);
13087 return KERN_INVALID_ADDRESS;
13088 }
13089
13090 /*
13091 * Examine each vm_map_entry_t in the range.
13092 */
13093 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13094 entry = entry->vme_next) {
13095 vm_object_t object;
13096
13097 /*
13098 * Sanity check on the VM map entry.
13099 */
13100 if (entry->is_sub_map) {
13101 vm_map_t submap;
13102 vm_map_offset_t submap_start;
13103 vm_map_offset_t submap_end;
13104 vm_map_entry_t submap_entry;
13105
13106 submap = VME_SUBMAP(entry);
13107 submap_start = VME_OFFSET(entry);
13108 submap_end = submap_start + (entry->vme_end -
13109 entry->vme_start);
13110
13111 vm_map_lock_read(submap);
13112
13113 if (! vm_map_range_check(submap,
13114 submap_start,
13115 submap_end,
13116 &submap_entry)) {
13117 vm_map_unlock_read(submap);
13118 vm_map_unlock_read(map);
13119 return KERN_INVALID_ADDRESS;
13120 }
13121
13122 object = VME_OBJECT(submap_entry);
13123 if (submap_entry->is_sub_map ||
13124 object == VM_OBJECT_NULL ||
13125 !object->internal) {
13126 vm_map_unlock_read(submap);
13127 continue;
13128 }
13129
13130 vm_object_pageout(object);
13131
13132 vm_map_unlock_read(submap);
13133 submap = VM_MAP_NULL;
13134 submap_entry = VM_MAP_ENTRY_NULL;
13135 continue;
13136 }
13137
13138 object = VME_OBJECT(entry);
13139 if (entry->is_sub_map ||
13140 object == VM_OBJECT_NULL ||
13141 !object->internal) {
13142 continue;
13143 }
13144
13145 vm_object_pageout(object);
13146 }
13147
13148 vm_map_unlock_read(map);
13149 return KERN_SUCCESS;
13150}
13151#endif /* MACH_ASSERT */
13152
13153
1c79356b 13154/*
91447636
A
13155 * Routine: vm_map_entry_insert
13156 *
13157 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 13158 */
91447636
A
13159vm_map_entry_t
13160vm_map_entry_insert(
13161 vm_map_t map,
13162 vm_map_entry_t insp_entry,
13163 vm_map_offset_t start,
13164 vm_map_offset_t end,
13165 vm_object_t object,
13166 vm_object_offset_t offset,
13167 boolean_t needs_copy,
13168 boolean_t is_shared,
13169 boolean_t in_transition,
13170 vm_prot_t cur_protection,
13171 vm_prot_t max_protection,
13172 vm_behavior_t behavior,
13173 vm_inherit_t inheritance,
2d21ac55 13174 unsigned wired_count,
b0d623f7
A
13175 boolean_t no_cache,
13176 boolean_t permanent,
39236c6e 13177 unsigned int superpage_size,
fe8ab488
A
13178 boolean_t clear_map_aligned,
13179 boolean_t is_submap)
1c79356b 13180{
91447636 13181 vm_map_entry_t new_entry;
1c79356b 13182
91447636 13183 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 13184
7ddcb079 13185 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 13186
39236c6e
A
13187 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13188 new_entry->map_aligned = TRUE;
13189 } else {
13190 new_entry->map_aligned = FALSE;
13191 }
13192 if (clear_map_aligned &&
fe8ab488
A
13193 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13194 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
13195 new_entry->map_aligned = FALSE;
13196 }
13197
91447636
A
13198 new_entry->vme_start = start;
13199 new_entry->vme_end = end;
13200 assert(page_aligned(new_entry->vme_start));
13201 assert(page_aligned(new_entry->vme_end));
39236c6e 13202 if (new_entry->map_aligned) {
fe8ab488
A
13203 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13204 VM_MAP_PAGE_MASK(map)));
39236c6e
A
13205 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13206 VM_MAP_PAGE_MASK(map)));
13207 }
e2d2fc5c 13208 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 13209
3e170ce0
A
13210 VME_OBJECT_SET(new_entry, object);
13211 VME_OFFSET_SET(new_entry, offset);
91447636 13212 new_entry->is_shared = is_shared;
fe8ab488 13213 new_entry->is_sub_map = is_submap;
91447636
A
13214 new_entry->needs_copy = needs_copy;
13215 new_entry->in_transition = in_transition;
13216 new_entry->needs_wakeup = FALSE;
13217 new_entry->inheritance = inheritance;
13218 new_entry->protection = cur_protection;
13219 new_entry->max_protection = max_protection;
13220 new_entry->behavior = behavior;
13221 new_entry->wired_count = wired_count;
13222 new_entry->user_wired_count = 0;
fe8ab488
A
13223 if (is_submap) {
13224 /*
13225 * submap: "use_pmap" means "nested".
13226 * default: false.
13227 */
13228 new_entry->use_pmap = FALSE;
13229 } else {
13230 /*
13231 * object: "use_pmap" means "use pmap accounting" for footprint.
13232 * default: true.
13233 */
13234 new_entry->use_pmap = TRUE;
13235 }
3e170ce0 13236 VME_ALIAS_SET(new_entry, 0);
b0d623f7 13237 new_entry->zero_wired_pages = FALSE;
2d21ac55 13238 new_entry->no_cache = no_cache;
b0d623f7 13239 new_entry->permanent = permanent;
39236c6e
A
13240 if (superpage_size)
13241 new_entry->superpage_size = TRUE;
13242 else
13243 new_entry->superpage_size = FALSE;
6d2010ae 13244 new_entry->used_for_jit = FALSE;
fe8ab488 13245 new_entry->iokit_acct = FALSE;
3e170ce0
A
13246 new_entry->vme_resilient_codesign = FALSE;
13247 new_entry->vme_resilient_media = FALSE;
1c79356b 13248
91447636
A
13249 /*
13250 * Insert the new entry into the list.
13251 */
1c79356b 13252
6d2010ae 13253 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
13254 map->size += end - start;
13255
13256 /*
13257 * Update the free space hint and the lookup hint.
13258 */
13259
0c530ab8 13260 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 13261 return new_entry;
1c79356b
A
13262}
13263
13264/*
91447636
A
13265 * Routine: vm_map_remap_extract
13266 *
13267 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 13268 */
91447636
A
13269static kern_return_t
13270vm_map_remap_extract(
13271 vm_map_t map,
13272 vm_map_offset_t addr,
13273 vm_map_size_t size,
13274 boolean_t copy,
13275 struct vm_map_header *map_header,
13276 vm_prot_t *cur_protection,
13277 vm_prot_t *max_protection,
13278 /* What, no behavior? */
13279 vm_inherit_t inheritance,
13280 boolean_t pageable)
1c79356b 13281{
91447636
A
13282 kern_return_t result;
13283 vm_map_size_t mapped_size;
13284 vm_map_size_t tmp_size;
13285 vm_map_entry_t src_entry; /* result of last map lookup */
13286 vm_map_entry_t new_entry;
13287 vm_object_offset_t offset;
13288 vm_map_offset_t map_address;
13289 vm_map_offset_t src_start; /* start of entry to map */
13290 vm_map_offset_t src_end; /* end of region to be mapped */
13291 vm_object_t object;
13292 vm_map_version_t version;
13293 boolean_t src_needs_copy;
13294 boolean_t new_entry_needs_copy;
1c79356b 13295
91447636 13296 assert(map != VM_MAP_NULL);
39236c6e
A
13297 assert(size != 0);
13298 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636
A
13299 assert(inheritance == VM_INHERIT_NONE ||
13300 inheritance == VM_INHERIT_COPY ||
13301 inheritance == VM_INHERIT_SHARE);
1c79356b 13302
91447636
A
13303 /*
13304 * Compute start and end of region.
13305 */
39236c6e
A
13306 src_start = vm_map_trunc_page(addr, PAGE_MASK);
13307 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13308
1c79356b 13309
91447636
A
13310 /*
13311 * Initialize map_header.
13312 */
13313 map_header->links.next = (struct vm_map_entry *)&map_header->links;
13314 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13315 map_header->nentries = 0;
13316 map_header->entries_pageable = pageable;
39236c6e 13317 map_header->page_shift = PAGE_SHIFT;
1c79356b 13318
6d2010ae
A
13319 vm_map_store_init( map_header );
13320
91447636
A
13321 *cur_protection = VM_PROT_ALL;
13322 *max_protection = VM_PROT_ALL;
1c79356b 13323
91447636
A
13324 map_address = 0;
13325 mapped_size = 0;
13326 result = KERN_SUCCESS;
1c79356b 13327
91447636
A
13328 /*
13329 * The specified source virtual space might correspond to
13330 * multiple map entries, need to loop on them.
13331 */
13332 vm_map_lock(map);
13333 while (mapped_size != size) {
13334 vm_map_size_t entry_size;
1c79356b 13335
91447636
A
13336 /*
13337 * Find the beginning of the region.
13338 */
13339 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13340 result = KERN_INVALID_ADDRESS;
13341 break;
13342 }
1c79356b 13343
91447636
A
13344 if (src_start < src_entry->vme_start ||
13345 (mapped_size && src_start != src_entry->vme_start)) {
13346 result = KERN_INVALID_ADDRESS;
13347 break;
13348 }
1c79356b 13349
91447636
A
13350 tmp_size = size - mapped_size;
13351 if (src_end > src_entry->vme_end)
13352 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 13353
91447636 13354 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 13355 src_entry->vme_start);
1c79356b 13356
91447636 13357 if(src_entry->is_sub_map) {
3e170ce0 13358 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
13359 object = VM_OBJECT_NULL;
13360 } else {
3e170ce0 13361 object = VME_OBJECT(src_entry);
fe8ab488
A
13362 if (src_entry->iokit_acct) {
13363 /*
13364 * This entry uses "IOKit accounting".
13365 */
13366 } else if (object != VM_OBJECT_NULL &&
13367 object->purgable != VM_PURGABLE_DENY) {
13368 /*
13369 * Purgeable objects have their own accounting:
13370 * no pmap accounting for them.
13371 */
13372 assert(!src_entry->use_pmap);
13373 } else {
13374 /*
13375 * Not IOKit or purgeable:
13376 * must be accounted by pmap stats.
13377 */
13378 assert(src_entry->use_pmap);
13379 }
55e303ae 13380
91447636
A
13381 if (object == VM_OBJECT_NULL) {
13382 object = vm_object_allocate(entry_size);
3e170ce0
A
13383 VME_OFFSET_SET(src_entry, 0);
13384 VME_OBJECT_SET(src_entry, object);
91447636
A
13385 } else if (object->copy_strategy !=
13386 MEMORY_OBJECT_COPY_SYMMETRIC) {
13387 /*
13388 * We are already using an asymmetric
13389 * copy, and therefore we already have
13390 * the right object.
13391 */
13392 assert(!src_entry->needs_copy);
13393 } else if (src_entry->needs_copy || object->shadowed ||
13394 (object->internal && !object->true_share &&
2d21ac55 13395 !src_entry->is_shared &&
6d2010ae 13396 object->vo_size > entry_size)) {
1c79356b 13397
3e170ce0 13398 VME_OBJECT_SHADOW(src_entry, entry_size);
1c79356b 13399
91447636
A
13400 if (!src_entry->needs_copy &&
13401 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
13402 vm_prot_t prot;
13403
13404 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13405
3e170ce0
A
13406 if (override_nx(map,
13407 VME_ALIAS(src_entry))
13408 && prot)
0c530ab8 13409 prot |= VM_PROT_EXECUTE;
2d21ac55 13410
316670eb 13411 if(map->mapped_in_other_pmaps) {
2d21ac55 13412 vm_object_pmap_protect(
3e170ce0
A
13413 VME_OBJECT(src_entry),
13414 VME_OFFSET(src_entry),
2d21ac55
A
13415 entry_size,
13416 PMAP_NULL,
0c530ab8 13417 src_entry->vme_start,
0c530ab8 13418 prot);
2d21ac55
A
13419 } else {
13420 pmap_protect(vm_map_pmap(map),
13421 src_entry->vme_start,
13422 src_entry->vme_end,
13423 prot);
91447636
A
13424 }
13425 }
1c79356b 13426
3e170ce0 13427 object = VME_OBJECT(src_entry);
91447636
A
13428 src_entry->needs_copy = FALSE;
13429 }
1c79356b 13430
1c79356b 13431
91447636 13432 vm_object_lock(object);
2d21ac55 13433 vm_object_reference_locked(object); /* object ref. for new entry */
91447636 13434 if (object->copy_strategy ==
2d21ac55 13435 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
13436 object->copy_strategy =
13437 MEMORY_OBJECT_COPY_DELAY;
13438 }
13439 vm_object_unlock(object);
13440 }
1c79356b 13441
3e170ce0
A
13442 offset = (VME_OFFSET(src_entry) +
13443 (src_start - src_entry->vme_start));
1c79356b 13444
7ddcb079 13445 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 13446 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
13447 if (new_entry->is_sub_map) {
13448 /* clr address space specifics */
13449 new_entry->use_pmap = FALSE;
13450 }
1c79356b 13451
39236c6e
A
13452 new_entry->map_aligned = FALSE;
13453
91447636
A
13454 new_entry->vme_start = map_address;
13455 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 13456 assert(new_entry->vme_start < new_entry->vme_end);
91447636 13457 new_entry->inheritance = inheritance;
3e170ce0 13458 VME_OFFSET_SET(new_entry, offset);
1c79356b 13459
91447636
A
13460 /*
13461 * The new region has to be copied now if required.
13462 */
13463 RestartCopy:
13464 if (!copy) {
316670eb
A
13465 /*
13466 * Cannot allow an entry describing a JIT
13467 * region to be shared across address spaces.
13468 */
13469 if (src_entry->used_for_jit == TRUE) {
13470 result = KERN_INVALID_ARGUMENT;
13471 break;
13472 }
91447636
A
13473 src_entry->is_shared = TRUE;
13474 new_entry->is_shared = TRUE;
13475 if (!(new_entry->is_sub_map))
13476 new_entry->needs_copy = FALSE;
1c79356b 13477
91447636
A
13478 } else if (src_entry->is_sub_map) {
13479 /* make this a COW sub_map if not already */
3e170ce0 13480 assert(new_entry->wired_count == 0);
91447636
A
13481 new_entry->needs_copy = TRUE;
13482 object = VM_OBJECT_NULL;
13483 } else if (src_entry->wired_count == 0 &&
3e170ce0
A
13484 vm_object_copy_quickly(&VME_OBJECT(new_entry),
13485 VME_OFFSET(new_entry),
2d21ac55
A
13486 (new_entry->vme_end -
13487 new_entry->vme_start),
13488 &src_needs_copy,
13489 &new_entry_needs_copy)) {
55e303ae 13490
91447636
A
13491 new_entry->needs_copy = new_entry_needs_copy;
13492 new_entry->is_shared = FALSE;
1c79356b 13493
91447636
A
13494 /*
13495 * Handle copy_on_write semantics.
13496 */
13497 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
13498 vm_prot_t prot;
13499
13500 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13501
3e170ce0
A
13502 if (override_nx(map,
13503 VME_ALIAS(src_entry))
13504 && prot)
0c530ab8 13505 prot |= VM_PROT_EXECUTE;
2d21ac55 13506
91447636
A
13507 vm_object_pmap_protect(object,
13508 offset,
13509 entry_size,
13510 ((src_entry->is_shared
316670eb 13511 || map->mapped_in_other_pmaps) ?
91447636
A
13512 PMAP_NULL : map->pmap),
13513 src_entry->vme_start,
0c530ab8 13514 prot);
1c79356b 13515
3e170ce0 13516 assert(src_entry->wired_count == 0);
91447636
A
13517 src_entry->needs_copy = TRUE;
13518 }
13519 /*
13520 * Throw away the old object reference of the new entry.
13521 */
13522 vm_object_deallocate(object);
1c79356b 13523
91447636
A
13524 } else {
13525 new_entry->is_shared = FALSE;
1c79356b 13526
91447636
A
13527 /*
13528 * The map can be safely unlocked since we
13529 * already hold a reference on the object.
13530 *
13531 * Record the timestamp of the map for later
13532 * verification, and unlock the map.
13533 */
13534 version.main_timestamp = map->timestamp;
13535 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 13536
91447636
A
13537 /*
13538 * Perform the copy.
13539 */
13540 if (src_entry->wired_count > 0) {
13541 vm_object_lock(object);
13542 result = vm_object_copy_slowly(
2d21ac55
A
13543 object,
13544 offset,
13545 entry_size,
13546 THREAD_UNINT,
3e170ce0 13547 &VME_OBJECT(new_entry));
1c79356b 13548
3e170ce0 13549 VME_OFFSET_SET(new_entry, 0);
91447636
A
13550 new_entry->needs_copy = FALSE;
13551 } else {
3e170ce0
A
13552 vm_object_offset_t new_offset;
13553
13554 new_offset = VME_OFFSET(new_entry);
91447636 13555 result = vm_object_copy_strategically(
2d21ac55
A
13556 object,
13557 offset,
13558 entry_size,
3e170ce0
A
13559 &VME_OBJECT(new_entry),
13560 &new_offset,
2d21ac55 13561 &new_entry_needs_copy);
3e170ce0
A
13562 if (new_offset != VME_OFFSET(new_entry)) {
13563 VME_OFFSET_SET(new_entry, new_offset);
13564 }
1c79356b 13565
91447636
A
13566 new_entry->needs_copy = new_entry_needs_copy;
13567 }
1c79356b 13568
91447636
A
13569 /*
13570 * Throw away the old object reference of the new entry.
13571 */
13572 vm_object_deallocate(object);
1c79356b 13573
91447636
A
13574 if (result != KERN_SUCCESS &&
13575 result != KERN_MEMORY_RESTART_COPY) {
13576 _vm_map_entry_dispose(map_header, new_entry);
13577 break;
13578 }
1c79356b 13579
91447636
A
13580 /*
13581 * Verify that the map has not substantially
13582 * changed while the copy was being made.
13583 */
1c79356b 13584
91447636
A
13585 vm_map_lock(map);
13586 if (version.main_timestamp + 1 != map->timestamp) {
13587 /*
13588 * Simple version comparison failed.
13589 *
13590 * Retry the lookup and verify that the
13591 * same object/offset are still present.
13592 */
3e170ce0 13593 vm_object_deallocate(VME_OBJECT(new_entry));
91447636
A
13594 _vm_map_entry_dispose(map_header, new_entry);
13595 if (result == KERN_MEMORY_RESTART_COPY)
13596 result = KERN_SUCCESS;
13597 continue;
13598 }
1c79356b 13599
91447636
A
13600 if (result == KERN_MEMORY_RESTART_COPY) {
13601 vm_object_reference(object);
13602 goto RestartCopy;
13603 }
13604 }
1c79356b 13605
6d2010ae 13606 _vm_map_store_entry_link(map_header,
91447636 13607 map_header->links.prev, new_entry);
1c79356b 13608
6d2010ae
A
13609 /*Protections for submap mapping are irrelevant here*/
13610 if( !src_entry->is_sub_map ) {
13611 *cur_protection &= src_entry->protection;
13612 *max_protection &= src_entry->max_protection;
13613 }
91447636
A
13614 map_address += tmp_size;
13615 mapped_size += tmp_size;
13616 src_start += tmp_size;
1c79356b 13617
91447636 13618 } /* end while */
1c79356b 13619
91447636
A
13620 vm_map_unlock(map);
13621 if (result != KERN_SUCCESS) {
13622 /*
13623 * Free all allocated elements.
13624 */
13625 for (src_entry = map_header->links.next;
13626 src_entry != (struct vm_map_entry *)&map_header->links;
13627 src_entry = new_entry) {
13628 new_entry = src_entry->vme_next;
6d2010ae 13629 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 13630 if (src_entry->is_sub_map) {
3e170ce0 13631 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 13632 } else {
3e170ce0 13633 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 13634 }
91447636
A
13635 _vm_map_entry_dispose(map_header, src_entry);
13636 }
13637 }
13638 return result;
1c79356b
A
13639}
13640
13641/*
91447636 13642 * Routine: vm_remap
1c79356b 13643 *
91447636
A
13644 * Map portion of a task's address space.
13645 * Mapped region must not overlap more than
13646 * one vm memory object. Protections and
13647 * inheritance attributes remain the same
13648 * as in the original task and are out parameters.
13649 * Source and Target task can be identical
13650 * Other attributes are identical as for vm_map()
1c79356b
A
13651 */
13652kern_return_t
91447636
A
13653vm_map_remap(
13654 vm_map_t target_map,
13655 vm_map_address_t *address,
13656 vm_map_size_t size,
13657 vm_map_offset_t mask,
060df5ea 13658 int flags,
91447636
A
13659 vm_map_t src_map,
13660 vm_map_offset_t memory_address,
1c79356b 13661 boolean_t copy,
1c79356b
A
13662 vm_prot_t *cur_protection,
13663 vm_prot_t *max_protection,
91447636 13664 vm_inherit_t inheritance)
1c79356b
A
13665{
13666 kern_return_t result;
91447636 13667 vm_map_entry_t entry;
0c530ab8 13668 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 13669 vm_map_entry_t new_entry;
91447636 13670 struct vm_map_header map_header;
39236c6e 13671 vm_map_offset_t offset_in_mapping;
1c79356b 13672
91447636
A
13673 if (target_map == VM_MAP_NULL)
13674 return KERN_INVALID_ARGUMENT;
1c79356b 13675
91447636 13676 switch (inheritance) {
2d21ac55
A
13677 case VM_INHERIT_NONE:
13678 case VM_INHERIT_COPY:
13679 case VM_INHERIT_SHARE:
91447636
A
13680 if (size != 0 && src_map != VM_MAP_NULL)
13681 break;
13682 /*FALL THRU*/
2d21ac55 13683 default:
91447636
A
13684 return KERN_INVALID_ARGUMENT;
13685 }
1c79356b 13686
39236c6e
A
13687 /*
13688 * If the user is requesting that we return the address of the
13689 * first byte of the data (rather than the base of the page),
13690 * then we use different rounding semantics: specifically,
13691 * we assume that (memory_address, size) describes a region
13692 * all of whose pages we must cover, rather than a base to be truncated
13693 * down and a size to be added to that base. So we figure out
13694 * the highest page that the requested region includes and make
13695 * sure that the size will cover it.
13696 *
13697 * The key example we're worried about it is of the form:
13698 *
13699 * memory_address = 0x1ff0, size = 0x20
13700 *
13701 * With the old semantics, we round down the memory_address to 0x1000
13702 * and round up the size to 0x1000, resulting in our covering *only*
13703 * page 0x1000. With the new semantics, we'd realize that the region covers
13704 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
13705 * 0x1000 and page 0x2000 in the region we remap.
13706 */
13707 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13708 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
13709 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
13710 } else {
13711 size = vm_map_round_page(size, PAGE_MASK);
13712 }
1c79356b 13713
91447636 13714 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
13715 size, copy, &map_header,
13716 cur_protection,
13717 max_protection,
13718 inheritance,
39236c6e 13719 target_map->hdr.entries_pageable);
1c79356b 13720
91447636
A
13721 if (result != KERN_SUCCESS) {
13722 return result;
13723 }
1c79356b 13724
91447636
A
13725 /*
13726 * Allocate/check a range of free virtual address
13727 * space for the target
1c79356b 13728 */
39236c6e
A
13729 *address = vm_map_trunc_page(*address,
13730 VM_MAP_PAGE_MASK(target_map));
91447636
A
13731 vm_map_lock(target_map);
13732 result = vm_map_remap_range_allocate(target_map, address, size,
060df5ea 13733 mask, flags, &insp_entry);
1c79356b 13734
91447636
A
13735 for (entry = map_header.links.next;
13736 entry != (struct vm_map_entry *)&map_header.links;
13737 entry = new_entry) {
13738 new_entry = entry->vme_next;
6d2010ae 13739 _vm_map_store_entry_unlink(&map_header, entry);
91447636 13740 if (result == KERN_SUCCESS) {
3e170ce0
A
13741 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13742 /* no codesigning -> read-only access */
13743 assert(!entry->used_for_jit);
13744 entry->max_protection = VM_PROT_READ;
13745 entry->protection = VM_PROT_READ;
13746 entry->vme_resilient_codesign = TRUE;
13747 }
91447636
A
13748 entry->vme_start += *address;
13749 entry->vme_end += *address;
39236c6e 13750 assert(!entry->map_aligned);
6d2010ae 13751 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
13752 insp_entry = entry;
13753 } else {
13754 if (!entry->is_sub_map) {
3e170ce0 13755 vm_object_deallocate(VME_OBJECT(entry));
91447636 13756 } else {
3e170ce0 13757 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 13758 }
91447636 13759 _vm_map_entry_dispose(&map_header, entry);
1c79356b 13760 }
91447636 13761 }
1c79356b 13762
3e170ce0
A
13763 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13764 *cur_protection = VM_PROT_READ;
13765 *max_protection = VM_PROT_READ;
13766 }
13767
6d2010ae
A
13768 if( target_map->disable_vmentry_reuse == TRUE) {
13769 if( target_map->highest_entry_end < insp_entry->vme_end ){
13770 target_map->highest_entry_end = insp_entry->vme_end;
13771 }
13772 }
13773
91447636
A
13774 if (result == KERN_SUCCESS) {
13775 target_map->size += size;
0c530ab8 13776 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
13777 }
13778 vm_map_unlock(target_map);
1c79356b 13779
91447636
A
13780 if (result == KERN_SUCCESS && target_map->wiring_required)
13781 result = vm_map_wire(target_map, *address,
3e170ce0
A
13782 *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
13783 TRUE);
39236c6e
A
13784
13785 /*
13786 * If requested, return the address of the data pointed to by the
13787 * request, rather than the base of the resulting page.
13788 */
13789 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13790 *address += offset_in_mapping;
13791 }
13792
91447636
A
13793 return result;
13794}
1c79356b 13795
91447636
A
13796/*
13797 * Routine: vm_map_remap_range_allocate
13798 *
13799 * Description:
13800 * Allocate a range in the specified virtual address map.
13801 * returns the address and the map entry just before the allocated
13802 * range
13803 *
13804 * Map must be locked.
13805 */
1c79356b 13806
91447636
A
13807static kern_return_t
13808vm_map_remap_range_allocate(
13809 vm_map_t map,
13810 vm_map_address_t *address, /* IN/OUT */
13811 vm_map_size_t size,
13812 vm_map_offset_t mask,
060df5ea 13813 int flags,
91447636
A
13814 vm_map_entry_t *map_entry) /* OUT */
13815{
060df5ea
A
13816 vm_map_entry_t entry;
13817 vm_map_offset_t start;
13818 vm_map_offset_t end;
13819 kern_return_t kr;
3e170ce0 13820 vm_map_entry_t hole_entry;
1c79356b 13821
2d21ac55 13822StartAgain: ;
1c79356b 13823
2d21ac55 13824 start = *address;
1c79356b 13825
060df5ea 13826 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55
A
13827 {
13828 /*
13829 * Calculate the first possible address.
13830 */
1c79356b 13831
2d21ac55
A
13832 if (start < map->min_offset)
13833 start = map->min_offset;
13834 if (start > map->max_offset)
13835 return(KERN_NO_SPACE);
91447636 13836
2d21ac55
A
13837 /*
13838 * Look for the first possible address;
13839 * if there's already something at this
13840 * address, we have to start after it.
13841 */
1c79356b 13842
6d2010ae
A
13843 if( map->disable_vmentry_reuse == TRUE) {
13844 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 13845 } else {
3e170ce0
A
13846
13847 if (map->holelistenabled) {
13848 hole_entry = (vm_map_entry_t)map->holes_list;
13849
13850 if (hole_entry == NULL) {
13851 /*
13852 * No more space in the map?
13853 */
13854 return(KERN_NO_SPACE);
13855 } else {
13856
13857 boolean_t found_hole = FALSE;
13858
13859 do {
13860 if (hole_entry->vme_start >= start) {
13861 start = hole_entry->vme_start;
13862 found_hole = TRUE;
13863 break;
13864 }
13865
13866 if (hole_entry->vme_end > start) {
13867 found_hole = TRUE;
13868 break;
13869 }
13870 hole_entry = hole_entry->vme_next;
13871
13872 } while (hole_entry != (vm_map_entry_t) map->holes_list);
13873
13874 if (found_hole == FALSE) {
13875 return (KERN_NO_SPACE);
13876 }
13877
13878 entry = hole_entry;
13879 }
6d2010ae 13880 } else {
3e170ce0
A
13881 assert(first_free_is_valid(map));
13882 if (start == map->min_offset) {
13883 if ((entry = map->first_free) != vm_map_to_entry(map))
13884 start = entry->vme_end;
13885 } else {
13886 vm_map_entry_t tmp_entry;
13887 if (vm_map_lookup_entry(map, start, &tmp_entry))
13888 start = tmp_entry->vme_end;
13889 entry = tmp_entry;
13890 }
6d2010ae 13891 }
39236c6e
A
13892 start = vm_map_round_page(start,
13893 VM_MAP_PAGE_MASK(map));
2d21ac55 13894 }
91447636 13895
2d21ac55
A
13896 /*
13897 * In any case, the "entry" always precedes
13898 * the proposed new region throughout the
13899 * loop:
13900 */
1c79356b 13901
2d21ac55
A
13902 while (TRUE) {
13903 register vm_map_entry_t next;
13904
13905 /*
13906 * Find the end of the proposed new region.
13907 * Be sure we didn't go beyond the end, or
13908 * wrap around the address.
13909 */
13910
13911 end = ((start + mask) & ~mask);
39236c6e
A
13912 end = vm_map_round_page(end,
13913 VM_MAP_PAGE_MASK(map));
2d21ac55
A
13914 if (end < start)
13915 return(KERN_NO_SPACE);
13916 start = end;
13917 end += size;
13918
13919 if ((end > map->max_offset) || (end < start)) {
13920 if (map->wait_for_space) {
13921 if (size <= (map->max_offset -
13922 map->min_offset)) {
13923 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13924 vm_map_unlock(map);
13925 thread_block(THREAD_CONTINUE_NULL);
13926 vm_map_lock(map);
13927 goto StartAgain;
13928 }
13929 }
91447636 13930
2d21ac55
A
13931 return(KERN_NO_SPACE);
13932 }
1c79356b 13933
2d21ac55 13934 next = entry->vme_next;
1c79356b 13935
3e170ce0
A
13936 if (map->holelistenabled) {
13937 if (entry->vme_end >= end)
13938 break;
13939 } else {
13940 /*
13941 * If there are no more entries, we must win.
13942 *
13943 * OR
13944 *
13945 * If there is another entry, it must be
13946 * after the end of the potential new region.
13947 */
1c79356b 13948
3e170ce0
A
13949 if (next == vm_map_to_entry(map))
13950 break;
13951
13952 if (next->vme_start >= end)
13953 break;
13954 }
1c79356b 13955
2d21ac55
A
13956 /*
13957 * Didn't fit -- move to the next entry.
13958 */
1c79356b 13959
2d21ac55 13960 entry = next;
3e170ce0
A
13961
13962 if (map->holelistenabled) {
13963 if (entry == (vm_map_entry_t) map->holes_list) {
13964 /*
13965 * Wrapped around
13966 */
13967 return(KERN_NO_SPACE);
13968 }
13969 start = entry->vme_start;
13970 } else {
13971 start = entry->vme_end;
13972 }
13973 }
13974
13975 if (map->holelistenabled) {
13976
13977 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
13978 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
13979 }
2d21ac55 13980 }
3e170ce0 13981
2d21ac55 13982 *address = start;
3e170ce0 13983
2d21ac55
A
13984 } else {
13985 vm_map_entry_t temp_entry;
91447636 13986
2d21ac55
A
13987 /*
13988 * Verify that:
13989 * the address doesn't itself violate
13990 * the mask requirement.
13991 */
1c79356b 13992
2d21ac55
A
13993 if ((start & mask) != 0)
13994 return(KERN_NO_SPACE);
1c79356b 13995
1c79356b 13996
2d21ac55
A
13997 /*
13998 * ... the address is within bounds
13999 */
1c79356b 14000
2d21ac55 14001 end = start + size;
1c79356b 14002
2d21ac55
A
14003 if ((start < map->min_offset) ||
14004 (end > map->max_offset) ||
14005 (start >= end)) {
14006 return(KERN_INVALID_ADDRESS);
14007 }
1c79356b 14008
060df5ea
A
14009 /*
14010 * If we're asked to overwrite whatever was mapped in that
14011 * range, first deallocate that range.
14012 */
14013 if (flags & VM_FLAGS_OVERWRITE) {
14014 vm_map_t zap_map;
14015
14016 /*
14017 * We use a "zap_map" to avoid having to unlock
14018 * the "map" in vm_map_delete(), which would compromise
14019 * the atomicity of the "deallocate" and then "remap"
14020 * combination.
14021 */
14022 zap_map = vm_map_create(PMAP_NULL,
14023 start,
316670eb 14024 end,
060df5ea
A
14025 map->hdr.entries_pageable);
14026 if (zap_map == VM_MAP_NULL) {
14027 return KERN_RESOURCE_SHORTAGE;
14028 }
39236c6e 14029 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 14030 vm_map_disable_hole_optimization(zap_map);
060df5ea
A
14031
14032 kr = vm_map_delete(map, start, end,
fe8ab488
A
14033 (VM_MAP_REMOVE_SAVE_ENTRIES |
14034 VM_MAP_REMOVE_NO_MAP_ALIGN),
060df5ea
A
14035 zap_map);
14036 if (kr == KERN_SUCCESS) {
14037 vm_map_destroy(zap_map,
14038 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14039 zap_map = VM_MAP_NULL;
14040 }
14041 }
14042
2d21ac55
A
14043 /*
14044 * ... the starting address isn't allocated
14045 */
91447636 14046
2d21ac55
A
14047 if (vm_map_lookup_entry(map, start, &temp_entry))
14048 return(KERN_NO_SPACE);
91447636 14049
2d21ac55 14050 entry = temp_entry;
91447636 14051
2d21ac55
A
14052 /*
14053 * ... the next region doesn't overlap the
14054 * end point.
14055 */
1c79356b 14056
2d21ac55
A
14057 if ((entry->vme_next != vm_map_to_entry(map)) &&
14058 (entry->vme_next->vme_start < end))
14059 return(KERN_NO_SPACE);
14060 }
14061 *map_entry = entry;
14062 return(KERN_SUCCESS);
91447636 14063}
1c79356b 14064
91447636
A
14065/*
14066 * vm_map_switch:
14067 *
14068 * Set the address map for the current thread to the specified map
14069 */
1c79356b 14070
91447636
A
14071vm_map_t
14072vm_map_switch(
14073 vm_map_t map)
14074{
14075 int mycpu;
14076 thread_t thread = current_thread();
14077 vm_map_t oldmap = thread->map;
1c79356b 14078
91447636
A
14079 mp_disable_preemption();
14080 mycpu = cpu_number();
1c79356b 14081
91447636
A
14082 /*
14083 * Deactivate the current map and activate the requested map
14084 */
14085 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 14086
91447636
A
14087 mp_enable_preemption();
14088 return(oldmap);
14089}
1c79356b 14090
1c79356b 14091
91447636
A
14092/*
14093 * Routine: vm_map_write_user
14094 *
14095 * Description:
14096 * Copy out data from a kernel space into space in the
14097 * destination map. The space must already exist in the
14098 * destination map.
14099 * NOTE: This routine should only be called by threads
14100 * which can block on a page fault. i.e. kernel mode user
14101 * threads.
14102 *
14103 */
14104kern_return_t
14105vm_map_write_user(
14106 vm_map_t map,
14107 void *src_p,
14108 vm_map_address_t dst_addr,
14109 vm_size_t size)
14110{
14111 kern_return_t kr = KERN_SUCCESS;
1c79356b 14112
91447636
A
14113 if(current_map() == map) {
14114 if (copyout(src_p, dst_addr, size)) {
14115 kr = KERN_INVALID_ADDRESS;
14116 }
14117 } else {
14118 vm_map_t oldmap;
1c79356b 14119
91447636
A
14120 /* take on the identity of the target map while doing */
14121 /* the transfer */
1c79356b 14122
91447636
A
14123 vm_map_reference(map);
14124 oldmap = vm_map_switch(map);
14125 if (copyout(src_p, dst_addr, size)) {
14126 kr = KERN_INVALID_ADDRESS;
1c79356b 14127 }
91447636
A
14128 vm_map_switch(oldmap);
14129 vm_map_deallocate(map);
1c79356b 14130 }
91447636 14131 return kr;
1c79356b
A
14132}
14133
14134/*
91447636
A
14135 * Routine: vm_map_read_user
14136 *
14137 * Description:
14138 * Copy in data from a user space source map into the
14139 * kernel map. The space must already exist in the
14140 * kernel map.
14141 * NOTE: This routine should only be called by threads
14142 * which can block on a page fault. i.e. kernel mode user
14143 * threads.
1c79356b 14144 *
1c79356b
A
14145 */
14146kern_return_t
91447636
A
14147vm_map_read_user(
14148 vm_map_t map,
14149 vm_map_address_t src_addr,
14150 void *dst_p,
14151 vm_size_t size)
1c79356b 14152{
91447636 14153 kern_return_t kr = KERN_SUCCESS;
1c79356b 14154
91447636
A
14155 if(current_map() == map) {
14156 if (copyin(src_addr, dst_p, size)) {
14157 kr = KERN_INVALID_ADDRESS;
14158 }
14159 } else {
14160 vm_map_t oldmap;
1c79356b 14161
91447636
A
14162 /* take on the identity of the target map while doing */
14163 /* the transfer */
14164
14165 vm_map_reference(map);
14166 oldmap = vm_map_switch(map);
14167 if (copyin(src_addr, dst_p, size)) {
14168 kr = KERN_INVALID_ADDRESS;
14169 }
14170 vm_map_switch(oldmap);
14171 vm_map_deallocate(map);
1c79356b 14172 }
91447636
A
14173 return kr;
14174}
14175
1c79356b 14176
91447636
A
14177/*
14178 * vm_map_check_protection:
14179 *
14180 * Assert that the target map allows the specified
14181 * privilege on the entire address region given.
14182 * The entire region must be allocated.
14183 */
2d21ac55
A
14184boolean_t
14185vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14186 vm_map_offset_t end, vm_prot_t protection)
91447636 14187{
2d21ac55
A
14188 vm_map_entry_t entry;
14189 vm_map_entry_t tmp_entry;
1c79356b 14190
91447636 14191 vm_map_lock(map);
1c79356b 14192
2d21ac55 14193 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 14194 {
2d21ac55
A
14195 vm_map_unlock(map);
14196 return (FALSE);
1c79356b
A
14197 }
14198
91447636
A
14199 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14200 vm_map_unlock(map);
14201 return(FALSE);
14202 }
1c79356b 14203
91447636
A
14204 entry = tmp_entry;
14205
14206 while (start < end) {
14207 if (entry == vm_map_to_entry(map)) {
14208 vm_map_unlock(map);
14209 return(FALSE);
1c79356b 14210 }
1c79356b 14211
91447636
A
14212 /*
14213 * No holes allowed!
14214 */
1c79356b 14215
91447636
A
14216 if (start < entry->vme_start) {
14217 vm_map_unlock(map);
14218 return(FALSE);
14219 }
14220
14221 /*
14222 * Check protection associated with entry.
14223 */
14224
14225 if ((entry->protection & protection) != protection) {
14226 vm_map_unlock(map);
14227 return(FALSE);
14228 }
14229
14230 /* go to next entry */
14231
14232 start = entry->vme_end;
14233 entry = entry->vme_next;
14234 }
14235 vm_map_unlock(map);
14236 return(TRUE);
1c79356b
A
14237}
14238
1c79356b 14239kern_return_t
91447636
A
14240vm_map_purgable_control(
14241 vm_map_t map,
14242 vm_map_offset_t address,
14243 vm_purgable_t control,
14244 int *state)
1c79356b 14245{
91447636
A
14246 vm_map_entry_t entry;
14247 vm_object_t object;
14248 kern_return_t kr;
fe8ab488 14249 boolean_t was_nonvolatile;
1c79356b 14250
1c79356b 14251 /*
91447636
A
14252 * Vet all the input parameters and current type and state of the
14253 * underlaying object. Return with an error if anything is amiss.
1c79356b 14254 */
91447636
A
14255 if (map == VM_MAP_NULL)
14256 return(KERN_INVALID_ARGUMENT);
1c79356b 14257
91447636 14258 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7
A
14259 control != VM_PURGABLE_GET_STATE &&
14260 control != VM_PURGABLE_PURGE_ALL)
91447636 14261 return(KERN_INVALID_ARGUMENT);
1c79356b 14262
b0d623f7
A
14263 if (control == VM_PURGABLE_PURGE_ALL) {
14264 vm_purgeable_object_purge_all();
14265 return KERN_SUCCESS;
14266 }
14267
91447636 14268 if (control == VM_PURGABLE_SET_STATE &&
b0d623f7 14269 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 14270 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
14271 return(KERN_INVALID_ARGUMENT);
14272
b0d623f7 14273 vm_map_lock_read(map);
91447636
A
14274
14275 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14276
14277 /*
14278 * Must pass a valid non-submap address.
14279 */
b0d623f7 14280 vm_map_unlock_read(map);
91447636
A
14281 return(KERN_INVALID_ADDRESS);
14282 }
14283
14284 if ((entry->protection & VM_PROT_WRITE) == 0) {
14285 /*
14286 * Can't apply purgable controls to something you can't write.
14287 */
b0d623f7 14288 vm_map_unlock_read(map);
91447636
A
14289 return(KERN_PROTECTION_FAILURE);
14290 }
14291
3e170ce0 14292 object = VME_OBJECT(entry);
fe8ab488
A
14293 if (object == VM_OBJECT_NULL ||
14294 object->purgable == VM_PURGABLE_DENY) {
91447636 14295 /*
fe8ab488 14296 * Object must already be present and be purgeable.
91447636 14297 */
b0d623f7 14298 vm_map_unlock_read(map);
91447636
A
14299 return KERN_INVALID_ARGUMENT;
14300 }
14301
14302 vm_object_lock(object);
14303
39236c6e 14304#if 00
3e170ce0 14305 if (VME_OFFSET(entry) != 0 ||
6d2010ae 14306 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
14307 /*
14308 * Can only apply purgable controls to the whole (existing)
14309 * object at once.
14310 */
b0d623f7 14311 vm_map_unlock_read(map);
91447636
A
14312 vm_object_unlock(object);
14313 return KERN_INVALID_ARGUMENT;
1c79356b 14314 }
39236c6e 14315#endif
fe8ab488
A
14316
14317 assert(!entry->is_sub_map);
14318 assert(!entry->use_pmap); /* purgeable has its own accounting */
14319
b0d623f7 14320 vm_map_unlock_read(map);
1c79356b 14321
fe8ab488
A
14322 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14323
91447636 14324 kr = vm_object_purgable_control(object, control, state);
1c79356b 14325
fe8ab488
A
14326 if (was_nonvolatile &&
14327 object->purgable != VM_PURGABLE_NONVOLATILE &&
14328 map->pmap == kernel_pmap) {
14329#if DEBUG
14330 object->vo_purgeable_volatilizer = kernel_task;
14331#endif /* DEBUG */
14332 }
14333
91447636 14334 vm_object_unlock(object);
1c79356b 14335
91447636
A
14336 return kr;
14337}
1c79356b 14338
91447636 14339kern_return_t
b0d623f7 14340vm_map_page_query_internal(
2d21ac55 14341 vm_map_t target_map,
91447636 14342 vm_map_offset_t offset,
2d21ac55
A
14343 int *disposition,
14344 int *ref_count)
91447636 14345{
b0d623f7
A
14346 kern_return_t kr;
14347 vm_page_info_basic_data_t info;
14348 mach_msg_type_number_t count;
14349
14350 count = VM_PAGE_INFO_BASIC_COUNT;
14351 kr = vm_map_page_info(target_map,
14352 offset,
14353 VM_PAGE_INFO_BASIC,
14354 (vm_page_info_t) &info,
14355 &count);
14356 if (kr == KERN_SUCCESS) {
14357 *disposition = info.disposition;
14358 *ref_count = info.ref_count;
14359 } else {
14360 *disposition = 0;
14361 *ref_count = 0;
14362 }
2d21ac55 14363
b0d623f7
A
14364 return kr;
14365}
14366
14367kern_return_t
14368vm_map_page_info(
14369 vm_map_t map,
14370 vm_map_offset_t offset,
14371 vm_page_info_flavor_t flavor,
14372 vm_page_info_t info,
14373 mach_msg_type_number_t *count)
14374{
14375 vm_map_entry_t map_entry;
14376 vm_object_t object;
14377 vm_page_t m;
14378 kern_return_t kr;
14379 kern_return_t retval = KERN_SUCCESS;
14380 boolean_t top_object;
14381 int disposition;
14382 int ref_count;
b0d623f7
A
14383 vm_page_info_basic_t basic_info;
14384 int depth;
6d2010ae 14385 vm_map_offset_t offset_in_page;
2d21ac55 14386
b0d623f7
A
14387 switch (flavor) {
14388 case VM_PAGE_INFO_BASIC:
14389 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
14390 /*
14391 * The "vm_page_info_basic_data" structure was not
14392 * properly padded, so allow the size to be off by
14393 * one to maintain backwards binary compatibility...
14394 */
14395 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14396 return KERN_INVALID_ARGUMENT;
b0d623f7
A
14397 }
14398 break;
14399 default:
14400 return KERN_INVALID_ARGUMENT;
91447636 14401 }
2d21ac55 14402
b0d623f7
A
14403 disposition = 0;
14404 ref_count = 0;
b0d623f7
A
14405 top_object = TRUE;
14406 depth = 0;
14407
14408 retval = KERN_SUCCESS;
6d2010ae 14409 offset_in_page = offset & PAGE_MASK;
39236c6e 14410 offset = vm_map_trunc_page(offset, PAGE_MASK);
b0d623f7
A
14411
14412 vm_map_lock_read(map);
14413
14414 /*
14415 * First, find the map entry covering "offset", going down
14416 * submaps if necessary.
14417 */
14418 for (;;) {
14419 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14420 vm_map_unlock_read(map);
14421 return KERN_INVALID_ADDRESS;
14422 }
14423 /* compute offset from this map entry's start */
14424 offset -= map_entry->vme_start;
14425 /* compute offset into this map entry's object (or submap) */
3e170ce0 14426 offset += VME_OFFSET(map_entry);
b0d623f7
A
14427
14428 if (map_entry->is_sub_map) {
14429 vm_map_t sub_map;
2d21ac55 14430
3e170ce0 14431 sub_map = VME_SUBMAP(map_entry);
2d21ac55 14432 vm_map_lock_read(sub_map);
b0d623f7 14433 vm_map_unlock_read(map);
2d21ac55 14434
b0d623f7
A
14435 map = sub_map;
14436
14437 ref_count = MAX(ref_count, map->ref_count);
14438 continue;
1c79356b 14439 }
b0d623f7 14440 break;
91447636 14441 }
b0d623f7 14442
3e170ce0 14443 object = VME_OBJECT(map_entry);
b0d623f7
A
14444 if (object == VM_OBJECT_NULL) {
14445 /* no object -> no page */
14446 vm_map_unlock_read(map);
14447 goto done;
14448 }
14449
91447636 14450 vm_object_lock(object);
b0d623f7
A
14451 vm_map_unlock_read(map);
14452
14453 /*
14454 * Go down the VM object shadow chain until we find the page
14455 * we're looking for.
14456 */
14457 for (;;) {
14458 ref_count = MAX(ref_count, object->ref_count);
2d21ac55 14459
91447636 14460 m = vm_page_lookup(object, offset);
2d21ac55 14461
91447636 14462 if (m != VM_PAGE_NULL) {
b0d623f7 14463 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
91447636
A
14464 break;
14465 } else {
2d21ac55
A
14466#if MACH_PAGEMAP
14467 if (object->existence_map) {
b0d623f7
A
14468 if (vm_external_state_get(object->existence_map,
14469 offset) ==
14470 VM_EXTERNAL_STATE_EXISTS) {
2d21ac55
A
14471 /*
14472 * this page has been paged out
14473 */
b0d623f7 14474 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
2d21ac55
A
14475 break;
14476 }
14477 } else
14478#endif
39236c6e
A
14479 if (object->internal &&
14480 object->alive &&
14481 !object->terminating &&
14482 object->pager_ready) {
14483
14484 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14485 if (VM_COMPRESSOR_PAGER_STATE_GET(
14486 object,
14487 offset)
14488 == VM_EXTERNAL_STATE_EXISTS) {
14489 /* the pager has that page */
14490 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14491 break;
14492 }
14493 } else {
b0d623f7 14494 memory_object_t pager;
2d21ac55 14495
b0d623f7
A
14496 vm_object_paging_begin(object);
14497 pager = object->pager;
14498 vm_object_unlock(object);
2d21ac55 14499
2d21ac55 14500 /*
b0d623f7
A
14501 * Ask the default pager if
14502 * it has this page.
2d21ac55 14503 */
b0d623f7
A
14504 kr = memory_object_data_request(
14505 pager,
14506 offset + object->paging_offset,
14507 0, /* just poke the pager */
14508 VM_PROT_READ,
14509 NULL);
14510
14511 vm_object_lock(object);
14512 vm_object_paging_end(object);
14513
14514 if (kr == KERN_SUCCESS) {
14515 /* the default pager has it */
14516 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14517 break;
14518 }
2d21ac55
A
14519 }
14520 }
b0d623f7 14521
2d21ac55
A
14522 if (object->shadow != VM_OBJECT_NULL) {
14523 vm_object_t shadow;
14524
6d2010ae 14525 offset += object->vo_shadow_offset;
2d21ac55
A
14526 shadow = object->shadow;
14527
14528 vm_object_lock(shadow);
14529 vm_object_unlock(object);
14530
14531 object = shadow;
14532 top_object = FALSE;
b0d623f7 14533 depth++;
2d21ac55 14534 } else {
b0d623f7
A
14535// if (!object->internal)
14536// break;
14537// retval = KERN_FAILURE;
14538// goto done_with_object;
14539 break;
91447636 14540 }
91447636
A
14541 }
14542 }
91447636
A
14543 /* The ref_count is not strictly accurate, it measures the number */
14544 /* of entities holding a ref on the object, they may not be mapping */
14545 /* the object or may not be mapping the section holding the */
14546 /* target page but its still a ball park number and though an over- */
14547 /* count, it picks up the copy-on-write cases */
1c79356b 14548
91447636
A
14549 /* We could also get a picture of page sharing from pmap_attributes */
14550 /* but this would under count as only faulted-in mappings would */
14551 /* show up. */
1c79356b 14552
2d21ac55 14553 if (top_object == TRUE && object->shadow)
b0d623f7
A
14554 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14555
14556 if (! object->internal)
14557 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
2d21ac55
A
14558
14559 if (m == VM_PAGE_NULL)
b0d623f7 14560 goto done_with_object;
2d21ac55 14561
91447636 14562 if (m->fictitious) {
b0d623f7
A
14563 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14564 goto done_with_object;
91447636 14565 }
2d21ac55 14566 if (m->dirty || pmap_is_modified(m->phys_page))
b0d623f7 14567 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 14568
2d21ac55 14569 if (m->reference || pmap_is_referenced(m->phys_page))
b0d623f7 14570 disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 14571
2d21ac55 14572 if (m->speculative)
b0d623f7 14573 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
1c79356b 14574
593a1d5f 14575 if (m->cs_validated)
b0d623f7 14576 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
593a1d5f 14577 if (m->cs_tainted)
b0d623f7 14578 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
c18c124e
A
14579 if (m->cs_nx)
14580 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
593a1d5f 14581
b0d623f7 14582done_with_object:
2d21ac55 14583 vm_object_unlock(object);
b0d623f7
A
14584done:
14585
14586 switch (flavor) {
14587 case VM_PAGE_INFO_BASIC:
14588 basic_info = (vm_page_info_basic_t) info;
14589 basic_info->disposition = disposition;
14590 basic_info->ref_count = ref_count;
39236c6e
A
14591 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14592 VM_KERNEL_ADDRPERM(object);
6d2010ae
A
14593 basic_info->offset =
14594 (memory_object_offset_t) offset + offset_in_page;
b0d623f7
A
14595 basic_info->depth = depth;
14596 break;
14597 }
0c530ab8 14598
2d21ac55 14599 return retval;
91447636
A
14600}
14601
14602/*
14603 * vm_map_msync
14604 *
14605 * Synchronises the memory range specified with its backing store
14606 * image by either flushing or cleaning the contents to the appropriate
14607 * memory manager engaging in a memory object synchronize dialog with
14608 * the manager. The client doesn't return until the manager issues
14609 * m_o_s_completed message. MIG Magically converts user task parameter
14610 * to the task's address map.
14611 *
14612 * interpretation of sync_flags
14613 * VM_SYNC_INVALIDATE - discard pages, only return precious
14614 * pages to manager.
14615 *
14616 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14617 * - discard pages, write dirty or precious
14618 * pages back to memory manager.
14619 *
14620 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14621 * - write dirty or precious pages back to
14622 * the memory manager.
14623 *
14624 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
14625 * is a hole in the region, and we would
14626 * have returned KERN_SUCCESS, return
14627 * KERN_INVALID_ADDRESS instead.
14628 *
14629 * NOTE
14630 * The memory object attributes have not yet been implemented, this
14631 * function will have to deal with the invalidate attribute
14632 *
14633 * RETURNS
14634 * KERN_INVALID_TASK Bad task parameter
14635 * KERN_INVALID_ARGUMENT both sync and async were specified.
14636 * KERN_SUCCESS The usual.
14637 * KERN_INVALID_ADDRESS There was a hole in the region.
14638 */
14639
14640kern_return_t
14641vm_map_msync(
14642 vm_map_t map,
14643 vm_map_address_t address,
14644 vm_map_size_t size,
14645 vm_sync_t sync_flags)
14646{
14647 msync_req_t msr;
14648 msync_req_t new_msr;
14649 queue_chain_t req_q; /* queue of requests for this msync */
14650 vm_map_entry_t entry;
14651 vm_map_size_t amount_left;
14652 vm_object_offset_t offset;
14653 boolean_t do_sync_req;
91447636 14654 boolean_t had_hole = FALSE;
2d21ac55 14655 memory_object_t pager;
3e170ce0 14656 vm_map_offset_t pmap_offset;
91447636
A
14657
14658 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
14659 (sync_flags & VM_SYNC_SYNCHRONOUS))
14660 return(KERN_INVALID_ARGUMENT);
1c79356b
A
14661
14662 /*
91447636 14663 * align address and size on page boundaries
1c79356b 14664 */
39236c6e
A
14665 size = (vm_map_round_page(address + size,
14666 VM_MAP_PAGE_MASK(map)) -
14667 vm_map_trunc_page(address,
14668 VM_MAP_PAGE_MASK(map)));
14669 address = vm_map_trunc_page(address,
14670 VM_MAP_PAGE_MASK(map));
1c79356b 14671
91447636
A
14672 if (map == VM_MAP_NULL)
14673 return(KERN_INVALID_TASK);
1c79356b 14674
91447636
A
14675 if (size == 0)
14676 return(KERN_SUCCESS);
1c79356b 14677
91447636
A
14678 queue_init(&req_q);
14679 amount_left = size;
1c79356b 14680
91447636
A
14681 while (amount_left > 0) {
14682 vm_object_size_t flush_size;
14683 vm_object_t object;
1c79356b 14684
91447636
A
14685 vm_map_lock(map);
14686 if (!vm_map_lookup_entry(map,
3e170ce0 14687 address,
39236c6e 14688 &entry)) {
91447636 14689
2d21ac55 14690 vm_map_size_t skip;
91447636
A
14691
14692 /*
14693 * hole in the address map.
14694 */
14695 had_hole = TRUE;
14696
14697 /*
14698 * Check for empty map.
14699 */
14700 if (entry == vm_map_to_entry(map) &&
14701 entry->vme_next == entry) {
14702 vm_map_unlock(map);
14703 break;
14704 }
14705 /*
14706 * Check that we don't wrap and that
14707 * we have at least one real map entry.
14708 */
14709 if ((map->hdr.nentries == 0) ||
14710 (entry->vme_next->vme_start < address)) {
14711 vm_map_unlock(map);
14712 break;
14713 }
14714 /*
14715 * Move up to the next entry if needed
14716 */
14717 skip = (entry->vme_next->vme_start - address);
14718 if (skip >= amount_left)
14719 amount_left = 0;
14720 else
14721 amount_left -= skip;
14722 address = entry->vme_next->vme_start;
14723 vm_map_unlock(map);
14724 continue;
14725 }
1c79356b 14726
91447636 14727 offset = address - entry->vme_start;
3e170ce0 14728 pmap_offset = address;
1c79356b 14729
91447636
A
14730 /*
14731 * do we have more to flush than is contained in this
14732 * entry ?
14733 */
14734 if (amount_left + entry->vme_start + offset > entry->vme_end) {
14735 flush_size = entry->vme_end -
2d21ac55 14736 (entry->vme_start + offset);
91447636
A
14737 } else {
14738 flush_size = amount_left;
14739 }
14740 amount_left -= flush_size;
14741 address += flush_size;
1c79356b 14742
91447636
A
14743 if (entry->is_sub_map == TRUE) {
14744 vm_map_t local_map;
14745 vm_map_offset_t local_offset;
1c79356b 14746
3e170ce0
A
14747 local_map = VME_SUBMAP(entry);
14748 local_offset = VME_OFFSET(entry);
91447636
A
14749 vm_map_unlock(map);
14750 if (vm_map_msync(
2d21ac55
A
14751 local_map,
14752 local_offset,
14753 flush_size,
14754 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
14755 had_hole = TRUE;
14756 }
14757 continue;
14758 }
3e170ce0 14759 object = VME_OBJECT(entry);
1c79356b 14760
91447636
A
14761 /*
14762 * We can't sync this object if the object has not been
14763 * created yet
14764 */
14765 if (object == VM_OBJECT_NULL) {
14766 vm_map_unlock(map);
14767 continue;
14768 }
3e170ce0 14769 offset += VME_OFFSET(entry);
1c79356b 14770
91447636 14771 vm_object_lock(object);
1c79356b 14772
91447636 14773 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
14774 int kill_pages = 0;
14775 boolean_t reusable_pages = FALSE;
91447636
A
14776
14777 if (sync_flags & VM_SYNC_KILLPAGES) {
b0d623f7 14778 if (object->ref_count == 1 && !object->shadow)
91447636
A
14779 kill_pages = 1;
14780 else
14781 kill_pages = -1;
14782 }
14783 if (kill_pages != -1)
3e170ce0
A
14784 vm_object_deactivate_pages(
14785 object,
14786 offset,
14787 (vm_object_size_t) flush_size,
14788 kill_pages,
14789 reusable_pages,
14790 map->pmap,
14791 pmap_offset);
91447636
A
14792 vm_object_unlock(object);
14793 vm_map_unlock(map);
14794 continue;
1c79356b 14795 }
91447636
A
14796 /*
14797 * We can't sync this object if there isn't a pager.
14798 * Don't bother to sync internal objects, since there can't
14799 * be any "permanent" storage for these objects anyway.
14800 */
14801 if ((object->pager == MEMORY_OBJECT_NULL) ||
14802 (object->internal) || (object->private)) {
14803 vm_object_unlock(object);
14804 vm_map_unlock(map);
14805 continue;
14806 }
14807 /*
14808 * keep reference on the object until syncing is done
14809 */
2d21ac55 14810 vm_object_reference_locked(object);
91447636 14811 vm_object_unlock(object);
1c79356b 14812
91447636 14813 vm_map_unlock(map);
1c79356b 14814
91447636 14815 do_sync_req = vm_object_sync(object,
2d21ac55
A
14816 offset,
14817 flush_size,
14818 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
14819 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
14820 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 14821 sync_flags & VM_SYNC_SYNCHRONOUS);
91447636
A
14822 /*
14823 * only send a m_o_s if we returned pages or if the entry
14824 * is writable (ie dirty pages may have already been sent back)
14825 */
b0d623f7 14826 if (!do_sync_req) {
2d21ac55
A
14827 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
14828 /*
14829 * clear out the clustering and read-ahead hints
14830 */
14831 vm_object_lock(object);
14832
14833 object->pages_created = 0;
14834 object->pages_used = 0;
14835 object->sequential = 0;
14836 object->last_alloc = 0;
14837
14838 vm_object_unlock(object);
14839 }
91447636
A
14840 vm_object_deallocate(object);
14841 continue;
1c79356b 14842 }
91447636 14843 msync_req_alloc(new_msr);
1c79356b 14844
91447636
A
14845 vm_object_lock(object);
14846 offset += object->paging_offset;
1c79356b 14847
91447636
A
14848 new_msr->offset = offset;
14849 new_msr->length = flush_size;
14850 new_msr->object = object;
14851 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
2d21ac55
A
14852 re_iterate:
14853
14854 /*
14855 * We can't sync this object if there isn't a pager. The
14856 * pager can disappear anytime we're not holding the object
14857 * lock. So this has to be checked anytime we goto re_iterate.
14858 */
14859
14860 pager = object->pager;
14861
14862 if (pager == MEMORY_OBJECT_NULL) {
14863 vm_object_unlock(object);
14864 vm_object_deallocate(object);
39236c6e
A
14865 msync_req_free(new_msr);
14866 new_msr = NULL;
2d21ac55
A
14867 continue;
14868 }
14869
91447636
A
14870 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14871 /*
14872 * need to check for overlapping entry, if found, wait
14873 * on overlapping msr to be done, then reiterate
14874 */
14875 msr_lock(msr);
14876 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14877 ((offset >= msr->offset &&
14878 offset < (msr->offset + msr->length)) ||
14879 (msr->offset >= offset &&
14880 msr->offset < (offset + flush_size))))
14881 {
14882 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14883 msr_unlock(msr);
14884 vm_object_unlock(object);
14885 thread_block(THREAD_CONTINUE_NULL);
14886 vm_object_lock(object);
14887 goto re_iterate;
14888 }
14889 msr_unlock(msr);
14890 }/* queue_iterate */
1c79356b 14891
91447636 14892 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
2d21ac55
A
14893
14894 vm_object_paging_begin(object);
91447636 14895 vm_object_unlock(object);
1c79356b 14896
91447636
A
14897 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14898
14899 (void) memory_object_synchronize(
2d21ac55
A
14900 pager,
14901 offset,
14902 flush_size,
14903 sync_flags & ~VM_SYNC_CONTIGUOUS);
14904
14905 vm_object_lock(object);
14906 vm_object_paging_end(object);
14907 vm_object_unlock(object);
91447636
A
14908 }/* while */
14909
14910 /*
14911 * wait for memory_object_sychronize_completed messages from pager(s)
14912 */
14913
14914 while (!queue_empty(&req_q)) {
14915 msr = (msync_req_t)queue_first(&req_q);
14916 msr_lock(msr);
14917 while(msr->flag != VM_MSYNC_DONE) {
14918 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14919 msr_unlock(msr);
14920 thread_block(THREAD_CONTINUE_NULL);
14921 msr_lock(msr);
14922 }/* while */
14923 queue_remove(&req_q, msr, msync_req_t, req_q);
14924 msr_unlock(msr);
14925 vm_object_deallocate(msr->object);
14926 msync_req_free(msr);
14927 }/* queue_iterate */
14928
14929 /* for proper msync() behaviour */
14930 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14931 return(KERN_INVALID_ADDRESS);
14932
14933 return(KERN_SUCCESS);
14934}/* vm_msync */
1c79356b 14935
1c79356b 14936/*
91447636
A
14937 * Routine: convert_port_entry_to_map
14938 * Purpose:
14939 * Convert from a port specifying an entry or a task
14940 * to a map. Doesn't consume the port ref; produces a map ref,
14941 * which may be null. Unlike convert_port_to_map, the
14942 * port may be task or a named entry backed.
14943 * Conditions:
14944 * Nothing locked.
1c79356b 14945 */
1c79356b 14946
1c79356b 14947
91447636
A
14948vm_map_t
14949convert_port_entry_to_map(
14950 ipc_port_t port)
14951{
14952 vm_map_t map;
14953 vm_named_entry_t named_entry;
2d21ac55 14954 uint32_t try_failed_count = 0;
1c79356b 14955
91447636
A
14956 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14957 while(TRUE) {
14958 ip_lock(port);
14959 if(ip_active(port) && (ip_kotype(port)
2d21ac55 14960 == IKOT_NAMED_ENTRY)) {
91447636 14961 named_entry =
2d21ac55 14962 (vm_named_entry_t)port->ip_kobject;
b0d623f7 14963 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 14964 ip_unlock(port);
2d21ac55
A
14965
14966 try_failed_count++;
14967 mutex_pause(try_failed_count);
91447636
A
14968 continue;
14969 }
14970 named_entry->ref_count++;
b0d623f7 14971 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
14972 ip_unlock(port);
14973 if ((named_entry->is_sub_map) &&
2d21ac55
A
14974 (named_entry->protection
14975 & VM_PROT_WRITE)) {
91447636
A
14976 map = named_entry->backing.map;
14977 } else {
14978 mach_destroy_memory_entry(port);
14979 return VM_MAP_NULL;
14980 }
14981 vm_map_reference_swap(map);
14982 mach_destroy_memory_entry(port);
14983 break;
14984 }
14985 else
14986 return VM_MAP_NULL;
14987 }
1c79356b 14988 }
91447636
A
14989 else
14990 map = convert_port_to_map(port);
1c79356b 14991
91447636
A
14992 return map;
14993}
1c79356b 14994
91447636
A
14995/*
14996 * Routine: convert_port_entry_to_object
14997 * Purpose:
14998 * Convert from a port specifying a named entry to an
14999 * object. Doesn't consume the port ref; produces a map ref,
15000 * which may be null.
15001 * Conditions:
15002 * Nothing locked.
15003 */
1c79356b 15004
1c79356b 15005
91447636
A
15006vm_object_t
15007convert_port_entry_to_object(
15008 ipc_port_t port)
15009{
39236c6e 15010 vm_object_t object = VM_OBJECT_NULL;
91447636 15011 vm_named_entry_t named_entry;
39236c6e
A
15012 uint32_t try_failed_count = 0;
15013
15014 if (IP_VALID(port) &&
15015 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15016 try_again:
15017 ip_lock(port);
15018 if (ip_active(port) &&
15019 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15020 named_entry = (vm_named_entry_t)port->ip_kobject;
15021 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15022 ip_unlock(port);
39236c6e
A
15023 try_failed_count++;
15024 mutex_pause(try_failed_count);
15025 goto try_again;
15026 }
15027 named_entry->ref_count++;
15028 lck_mtx_unlock(&(named_entry)->Lock);
15029 ip_unlock(port);
15030 if (!(named_entry->is_sub_map) &&
15031 !(named_entry->is_pager) &&
15032 !(named_entry->is_copy) &&
15033 (named_entry->protection & VM_PROT_WRITE)) {
15034 object = named_entry->backing.object;
15035 vm_object_reference(object);
91447636 15036 }
39236c6e 15037 mach_destroy_memory_entry(port);
1c79356b 15038 }
1c79356b 15039 }
91447636
A
15040
15041 return object;
1c79356b 15042}
9bccf70c
A
15043
15044/*
91447636
A
15045 * Export routines to other components for the things we access locally through
15046 * macros.
9bccf70c 15047 */
91447636
A
15048#undef current_map
15049vm_map_t
15050current_map(void)
9bccf70c 15051{
91447636 15052 return (current_map_fast());
9bccf70c
A
15053}
15054
15055/*
15056 * vm_map_reference:
15057 *
15058 * Most code internal to the osfmk will go through a
15059 * macro defining this. This is always here for the
15060 * use of other kernel components.
15061 */
15062#undef vm_map_reference
15063void
15064vm_map_reference(
15065 register vm_map_t map)
15066{
15067 if (map == VM_MAP_NULL)
15068 return;
15069
b0d623f7 15070 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15071#if TASK_SWAPPER
15072 assert(map->res_count > 0);
15073 assert(map->ref_count >= map->res_count);
15074 map->res_count++;
15075#endif
15076 map->ref_count++;
b0d623f7 15077 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15078}
15079
15080/*
15081 * vm_map_deallocate:
15082 *
15083 * Removes a reference from the specified map,
15084 * destroying it if no references remain.
15085 * The map should not be locked.
15086 */
15087void
15088vm_map_deallocate(
15089 register vm_map_t map)
15090{
15091 unsigned int ref;
15092
15093 if (map == VM_MAP_NULL)
15094 return;
15095
b0d623f7 15096 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15097 ref = --map->ref_count;
15098 if (ref > 0) {
15099 vm_map_res_deallocate(map);
b0d623f7 15100 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15101 return;
15102 }
15103 assert(map->ref_count == 0);
b0d623f7 15104 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15105
15106#if TASK_SWAPPER
15107 /*
15108 * The map residence count isn't decremented here because
15109 * the vm_map_delete below will traverse the entire map,
15110 * deleting entries, and the residence counts on objects
15111 * and sharing maps will go away then.
15112 */
15113#endif
15114
2d21ac55 15115 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 15116}
91447636 15117
91447636 15118
0c530ab8
A
15119void
15120vm_map_disable_NX(vm_map_t map)
15121{
15122 if (map == NULL)
15123 return;
15124 if (map->pmap == NULL)
15125 return;
15126
15127 pmap_disable_NX(map->pmap);
15128}
15129
6d2010ae
A
15130void
15131vm_map_disallow_data_exec(vm_map_t map)
15132{
15133 if (map == NULL)
15134 return;
15135
15136 map->map_disallow_data_exec = TRUE;
15137}
15138
0c530ab8
A
15139/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15140 * more descriptive.
15141 */
15142void
15143vm_map_set_32bit(vm_map_t map)
15144{
15145 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15146}
15147
15148
15149void
15150vm_map_set_64bit(vm_map_t map)
15151{
15152 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15153}
15154
15155vm_map_offset_t
3e170ce0 15156vm_compute_max_offset(boolean_t is64)
0c530ab8
A
15157{
15158 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15159}
15160
39236c6e
A
15161uint64_t
15162vm_map_get_max_aslr_slide_pages(vm_map_t map)
15163{
15164 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15165}
15166
0c530ab8 15167boolean_t
2d21ac55
A
15168vm_map_is_64bit(
15169 vm_map_t map)
15170{
15171 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15172}
15173
15174boolean_t
316670eb
A
15175vm_map_has_hard_pagezero(
15176 vm_map_t map,
15177 vm_map_offset_t pagezero_size)
0c530ab8
A
15178{
15179 /*
15180 * XXX FBDP
15181 * We should lock the VM map (for read) here but we can get away
15182 * with it for now because there can't really be any race condition:
15183 * the VM map's min_offset is changed only when the VM map is created
15184 * and when the zero page is established (when the binary gets loaded),
15185 * and this routine gets called only when the task terminates and the
15186 * VM map is being torn down, and when a new map is created via
15187 * load_machfile()/execve().
15188 */
316670eb 15189 return (map->min_offset >= pagezero_size);
0c530ab8
A
15190}
15191
316670eb
A
15192/*
15193 * Raise a VM map's maximun offset.
15194 */
15195kern_return_t
15196vm_map_raise_max_offset(
15197 vm_map_t map,
15198 vm_map_offset_t new_max_offset)
15199{
15200 kern_return_t ret;
15201
15202 vm_map_lock(map);
15203 ret = KERN_INVALID_ADDRESS;
15204
15205 if (new_max_offset >= map->max_offset) {
15206 if (!vm_map_is_64bit(map)) {
15207 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15208 map->max_offset = new_max_offset;
15209 ret = KERN_SUCCESS;
15210 }
15211 } else {
15212 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15213 map->max_offset = new_max_offset;
15214 ret = KERN_SUCCESS;
15215 }
15216 }
15217 }
15218
15219 vm_map_unlock(map);
15220 return ret;
15221}
15222
15223
0c530ab8
A
15224/*
15225 * Raise a VM map's minimum offset.
15226 * To strictly enforce "page zero" reservation.
15227 */
15228kern_return_t
15229vm_map_raise_min_offset(
15230 vm_map_t map,
15231 vm_map_offset_t new_min_offset)
15232{
15233 vm_map_entry_t first_entry;
15234
39236c6e
A
15235 new_min_offset = vm_map_round_page(new_min_offset,
15236 VM_MAP_PAGE_MASK(map));
0c530ab8
A
15237
15238 vm_map_lock(map);
15239
15240 if (new_min_offset < map->min_offset) {
15241 /*
15242 * Can't move min_offset backwards, as that would expose
15243 * a part of the address space that was previously, and for
15244 * possibly good reasons, inaccessible.
15245 */
15246 vm_map_unlock(map);
15247 return KERN_INVALID_ADDRESS;
15248 }
3e170ce0
A
15249 if (new_min_offset >= map->max_offset) {
15250 /* can't go beyond the end of the address space */
15251 vm_map_unlock(map);
15252 return KERN_INVALID_ADDRESS;
15253 }
0c530ab8
A
15254
15255 first_entry = vm_map_first_entry(map);
15256 if (first_entry != vm_map_to_entry(map) &&
15257 first_entry->vme_start < new_min_offset) {
15258 /*
15259 * Some memory was already allocated below the new
15260 * minimun offset. It's too late to change it now...
15261 */
15262 vm_map_unlock(map);
15263 return KERN_NO_SPACE;
15264 }
15265
15266 map->min_offset = new_min_offset;
15267
3e170ce0
A
15268 assert(map->holes_list);
15269 map->holes_list->start = new_min_offset;
15270 assert(new_min_offset < map->holes_list->end);
15271
0c530ab8
A
15272 vm_map_unlock(map);
15273
15274 return KERN_SUCCESS;
15275}
2d21ac55
A
15276
15277/*
15278 * Set the limit on the maximum amount of user wired memory allowed for this map.
15279 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15280 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
15281 * don't have to reach over to the BSD data structures.
15282 */
15283
15284void
15285vm_map_set_user_wire_limit(vm_map_t map,
15286 vm_size_t limit)
15287{
15288 map->user_wire_limit = limit;
15289}
593a1d5f 15290
b0d623f7
A
15291
15292void vm_map_switch_protect(vm_map_t map,
15293 boolean_t val)
593a1d5f
A
15294{
15295 vm_map_lock(map);
b0d623f7 15296 map->switch_protect=val;
593a1d5f 15297 vm_map_unlock(map);
b0d623f7 15298}
b7266188 15299
39236c6e
A
15300/*
15301 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15302 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15303 * bump both counters.
15304 */
15305void
15306vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15307{
15308 pmap_t pmap = vm_map_pmap(map);
15309
fe8ab488 15310 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15311 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15312}
15313
15314void
15315vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15316{
15317 pmap_t pmap = vm_map_pmap(map);
15318
fe8ab488 15319 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15320 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15321}
15322
b7266188
A
15323/* Add (generate) code signature for memory range */
15324#if CONFIG_DYNAMIC_CODE_SIGNING
15325kern_return_t vm_map_sign(vm_map_t map,
15326 vm_map_offset_t start,
15327 vm_map_offset_t end)
15328{
15329 vm_map_entry_t entry;
15330 vm_page_t m;
15331 vm_object_t object;
15332
15333 /*
15334 * Vet all the input parameters and current type and state of the
15335 * underlaying object. Return with an error if anything is amiss.
15336 */
15337 if (map == VM_MAP_NULL)
15338 return(KERN_INVALID_ARGUMENT);
15339
15340 vm_map_lock_read(map);
15341
15342 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15343 /*
15344 * Must pass a valid non-submap address.
15345 */
15346 vm_map_unlock_read(map);
15347 return(KERN_INVALID_ADDRESS);
15348 }
15349
15350 if((entry->vme_start > start) || (entry->vme_end < end)) {
15351 /*
15352 * Map entry doesn't cover the requested range. Not handling
15353 * this situation currently.
15354 */
15355 vm_map_unlock_read(map);
15356 return(KERN_INVALID_ARGUMENT);
15357 }
15358
3e170ce0 15359 object = VME_OBJECT(entry);
b7266188
A
15360 if (object == VM_OBJECT_NULL) {
15361 /*
15362 * Object must already be present or we can't sign.
15363 */
15364 vm_map_unlock_read(map);
15365 return KERN_INVALID_ARGUMENT;
15366 }
15367
15368 vm_object_lock(object);
15369 vm_map_unlock_read(map);
15370
15371 while(start < end) {
15372 uint32_t refmod;
15373
3e170ce0
A
15374 m = vm_page_lookup(object,
15375 start - entry->vme_start + VME_OFFSET(entry));
b7266188
A
15376 if (m==VM_PAGE_NULL) {
15377 /* shoud we try to fault a page here? we can probably
15378 * demand it exists and is locked for this request */
15379 vm_object_unlock(object);
15380 return KERN_FAILURE;
15381 }
15382 /* deal with special page status */
15383 if (m->busy ||
15384 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15385 vm_object_unlock(object);
15386 return KERN_FAILURE;
15387 }
15388
15389 /* Page is OK... now "validate" it */
15390 /* This is the place where we'll call out to create a code
15391 * directory, later */
15392 m->cs_validated = TRUE;
15393
15394 /* The page is now "clean" for codesigning purposes. That means
15395 * we don't consider it as modified (wpmapped) anymore. But
15396 * we'll disconnect the page so we note any future modification
15397 * attempts. */
15398 m->wpmapped = FALSE;
15399 refmod = pmap_disconnect(m->phys_page);
15400
15401 /* Pull the dirty status from the pmap, since we cleared the
15402 * wpmapped bit */
15403 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
316670eb 15404 SET_PAGE_DIRTY(m, FALSE);
b7266188
A
15405 }
15406
15407 /* On to the next page */
15408 start += PAGE_SIZE;
15409 }
15410 vm_object_unlock(object);
15411
15412 return KERN_SUCCESS;
15413}
15414#endif
6d2010ae 15415
fe8ab488
A
15416kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15417{
15418 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
15419 vm_map_entry_t next_entry;
15420 kern_return_t kr = KERN_SUCCESS;
15421 vm_map_t zap_map;
15422
15423 vm_map_lock(map);
15424
15425 /*
15426 * We use a "zap_map" to avoid having to unlock
15427 * the "map" in vm_map_delete().
15428 */
15429 zap_map = vm_map_create(PMAP_NULL,
15430 map->min_offset,
15431 map->max_offset,
15432 map->hdr.entries_pageable);
15433
15434 if (zap_map == VM_MAP_NULL) {
15435 return KERN_RESOURCE_SHORTAGE;
15436 }
15437
15438 vm_map_set_page_shift(zap_map,
15439 VM_MAP_PAGE_SHIFT(map));
3e170ce0 15440 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
15441
15442 for (entry = vm_map_first_entry(map);
15443 entry != vm_map_to_entry(map);
15444 entry = next_entry) {
15445 next_entry = entry->vme_next;
15446
3e170ce0
A
15447 if (VME_OBJECT(entry) &&
15448 !entry->is_sub_map &&
15449 (VME_OBJECT(entry)->internal == TRUE) &&
15450 (VME_OBJECT(entry)->ref_count == 1)) {
fe8ab488 15451
3e170ce0
A
15452 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15453 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488
A
15454
15455 (void)vm_map_delete(map,
15456 entry->vme_start,
15457 entry->vme_end,
15458 VM_MAP_REMOVE_SAVE_ENTRIES,
15459 zap_map);
15460 }
15461 }
15462
15463 vm_map_unlock(map);
15464
15465 /*
15466 * Get rid of the "zap_maps" and all the map entries that
15467 * they may still contain.
15468 */
15469 if (zap_map != VM_MAP_NULL) {
15470 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15471 zap_map = VM_MAP_NULL;
15472 }
15473
15474 return kr;
15475}
15476
6d2010ae
A
15477#if CONFIG_FREEZE
15478
15479kern_return_t vm_map_freeze_walk(
15480 vm_map_t map,
15481 unsigned int *purgeable_count,
15482 unsigned int *wired_count,
15483 unsigned int *clean_count,
15484 unsigned int *dirty_count,
316670eb 15485 unsigned int dirty_budget,
6d2010ae
A
15486 boolean_t *has_shared)
15487{
15488 vm_map_entry_t entry;
15489
15490 vm_map_lock_read(map);
15491
15492 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15493 *has_shared = FALSE;
15494
15495 for (entry = vm_map_first_entry(map);
15496 entry != vm_map_to_entry(map);
15497 entry = entry->vme_next) {
15498 unsigned int purgeable, clean, dirty, wired;
15499 boolean_t shared;
15500
3e170ce0 15501 if ((VME_OBJECT(entry) == 0) ||
6d2010ae 15502 (entry->is_sub_map) ||
3e170ce0 15503 (VME_OBJECT(entry)->phys_contiguous)) {
6d2010ae
A
15504 continue;
15505 }
15506
3e170ce0 15507 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL);
6d2010ae
A
15508
15509 *purgeable_count += purgeable;
15510 *wired_count += wired;
15511 *clean_count += clean;
15512 *dirty_count += dirty;
15513
15514 if (shared) {
15515 *has_shared = TRUE;
15516 }
316670eb
A
15517
15518 /* Adjust pageout budget and finish up if reached */
15519 if (dirty_budget) {
15520 dirty_budget -= dirty;
15521 if (dirty_budget == 0) {
15522 break;
15523 }
15524 }
6d2010ae
A
15525 }
15526
15527 vm_map_unlock_read(map);
15528
15529 return KERN_SUCCESS;
15530}
15531
3e170ce0
A
15532int c_freezer_swapout_count;
15533int c_freezer_compression_count = 0;
15534AbsoluteTime c_freezer_last_yield_ts = 0;
15535
6d2010ae
A
15536kern_return_t vm_map_freeze(
15537 vm_map_t map,
15538 unsigned int *purgeable_count,
15539 unsigned int *wired_count,
15540 unsigned int *clean_count,
15541 unsigned int *dirty_count,
316670eb 15542 unsigned int dirty_budget,
6d2010ae
A
15543 boolean_t *has_shared)
15544{
39236c6e
A
15545 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
15546 kern_return_t kr = KERN_SUCCESS;
15547 boolean_t default_freezer_active = TRUE;
6d2010ae
A
15548
15549 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15550 *has_shared = FALSE;
15551
6d2010ae
A
15552 /*
15553 * We need the exclusive lock here so that we can
15554 * block any page faults or lookups while we are
15555 * in the middle of freezing this vm map.
15556 */
15557 vm_map_lock(map);
15558
39236c6e
A
15559 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15560 default_freezer_active = FALSE;
3e170ce0
A
15561
15562 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15563 kr = KERN_NO_SPACE;
15564 goto done;
15565 }
316670eb 15566 }
3e170ce0 15567 assert(default_freezer_active == FALSE);
316670eb 15568
39236c6e
A
15569 if (default_freezer_active) {
15570 if (map->default_freezer_handle == NULL) {
15571 map->default_freezer_handle = default_freezer_handle_allocate();
15572 }
15573
15574 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
15575 /*
15576 * Can happen if default_freezer_handle passed in is NULL
15577 * Or, a table has already been allocated and associated
15578 * with this handle, i.e. the map is already frozen.
15579 */
15580 goto done;
15581 }
6d2010ae 15582 }
3e170ce0
A
15583 c_freezer_compression_count = 0;
15584 clock_get_uptime(&c_freezer_last_yield_ts);
15585
6d2010ae
A
15586 for (entry2 = vm_map_first_entry(map);
15587 entry2 != vm_map_to_entry(map);
15588 entry2 = entry2->vme_next) {
15589
3e170ce0 15590 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 15591
3e170ce0
A
15592 if (VME_OBJECT(entry2) &&
15593 !entry2->is_sub_map &&
15594 !VME_OBJECT(entry2)->phys_contiguous) {
39236c6e
A
15595 /* If eligible, scan the entry, moving eligible pages over to our parent object */
15596 if (default_freezer_active) {
15597 unsigned int purgeable, clean, dirty, wired;
15598 boolean_t shared;
316670eb 15599
39236c6e
A
15600 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
15601 src_object, map->default_freezer_handle);
15602
15603 *purgeable_count += purgeable;
15604 *wired_count += wired;
15605 *clean_count += clean;
15606 *dirty_count += dirty;
15607
15608 /* Adjust pageout budget and finish up if reached */
15609 if (dirty_budget) {
15610 dirty_budget -= dirty;
15611 if (dirty_budget == 0) {
15612 break;
15613 }
316670eb 15614 }
6d2010ae 15615
39236c6e
A
15616 if (shared) {
15617 *has_shared = TRUE;
15618 }
15619 } else {
3e170ce0
A
15620 if (VME_OBJECT(entry2)->internal == TRUE) {
15621
15622 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15623 /*
15624 * Pages belonging to this object could be swapped to disk.
15625 * Make sure it's not a shared object because we could end
15626 * up just bringing it back in again.
15627 */
15628 if (VME_OBJECT(entry2)->ref_count > 1) {
15629 continue;
15630 }
15631 }
15632 vm_object_compressed_freezer_pageout(VME_OBJECT(entry2));
15633 }
15634
15635 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15636 kr = KERN_NO_SPACE;
15637 break;
39236c6e 15638 }
6d2010ae
A
15639 }
15640 }
15641 }
15642
39236c6e
A
15643 if (default_freezer_active) {
15644 /* Finally, throw out the pages to swap */
15645 default_freezer_pageout(map->default_freezer_handle);
15646 }
6d2010ae
A
15647
15648done:
15649 vm_map_unlock(map);
6d2010ae 15650
3e170ce0
A
15651 if (!default_freezer_active) {
15652 vm_object_compressed_freezer_done();
15653 }
15654 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15655 /*
15656 * reset the counter tracking the # of swapped c_segs
15657 * because we are now done with this freeze session and task.
15658 */
15659 c_freezer_swapout_count = 0;
15660 }
6d2010ae
A
15661 return kr;
15662}
15663
316670eb 15664kern_return_t
6d2010ae
A
15665vm_map_thaw(
15666 vm_map_t map)
15667{
316670eb 15668 kern_return_t kr = KERN_SUCCESS;
6d2010ae 15669
39236c6e
A
15670 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15671 /*
15672 * We will on-demand thaw in the presence of the compressed pager.
15673 */
15674 return kr;
15675 }
15676
6d2010ae
A
15677 vm_map_lock(map);
15678
316670eb 15679 if (map->default_freezer_handle == NULL) {
6d2010ae
A
15680 /*
15681 * This map is not in a frozen state.
15682 */
316670eb 15683 kr = KERN_FAILURE;
6d2010ae
A
15684 goto out;
15685 }
6d2010ae 15686
39236c6e 15687 kr = default_freezer_unpack(map->default_freezer_handle);
6d2010ae
A
15688out:
15689 vm_map_unlock(map);
316670eb
A
15690
15691 return kr;
6d2010ae
A
15692}
15693#endif
e2d2fc5c 15694
e2d2fc5c
A
15695/*
15696 * vm_map_entry_should_cow_for_true_share:
15697 *
15698 * Determines if the map entry should be clipped and setup for copy-on-write
15699 * to avoid applying "true_share" to a large VM object when only a subset is
15700 * targeted.
15701 *
15702 * For now, we target only the map entries created for the Objective C
15703 * Garbage Collector, which initially have the following properties:
15704 * - alias == VM_MEMORY_MALLOC
15705 * - wired_count == 0
15706 * - !needs_copy
15707 * and a VM object with:
15708 * - internal
15709 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
15710 * - !true_share
15711 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
15712 *
15713 * Only non-kernel map entries.
e2d2fc5c
A
15714 */
15715boolean_t
15716vm_map_entry_should_cow_for_true_share(
15717 vm_map_entry_t entry)
15718{
15719 vm_object_t object;
15720
15721 if (entry->is_sub_map) {
15722 /* entry does not point at a VM object */
15723 return FALSE;
15724 }
15725
15726 if (entry->needs_copy) {
15727 /* already set for copy_on_write: done! */
15728 return FALSE;
15729 }
15730
3e170ce0
A
15731 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
15732 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 15733 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
15734 return FALSE;
15735 }
15736
15737 if (entry->wired_count) {
15738 /* wired: can't change the map entry... */
fe8ab488 15739 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
15740 return FALSE;
15741 }
15742
3e170ce0 15743 object = VME_OBJECT(entry);
e2d2fc5c
A
15744
15745 if (object == VM_OBJECT_NULL) {
15746 /* no object yet... */
15747 return FALSE;
15748 }
15749
15750 if (!object->internal) {
15751 /* not an internal object */
15752 return FALSE;
15753 }
15754
15755 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
15756 /* not the default copy strategy */
15757 return FALSE;
15758 }
15759
15760 if (object->true_share) {
15761 /* already true_share: too late to avoid it */
15762 return FALSE;
15763 }
15764
3e170ce0 15765 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
15766 object->vo_size != ANON_CHUNK_SIZE) {
15767 /* ... not an object created for the ObjC Garbage Collector */
15768 return FALSE;
15769 }
15770
3e170ce0 15771 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
15772 object->vo_size != 2048 * 4096) {
15773 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
15774 return FALSE;
15775 }
15776
15777 /*
15778 * All the criteria match: we have a large object being targeted for "true_share".
15779 * To limit the adverse side-effects linked with "true_share", tell the caller to
15780 * try and avoid setting up the entire object for "true_share" by clipping the
15781 * targeted range and setting it up for copy-on-write.
15782 */
15783 return TRUE;
15784}
39236c6e
A
15785
15786vm_map_offset_t
15787vm_map_round_page_mask(
15788 vm_map_offset_t offset,
15789 vm_map_offset_t mask)
15790{
15791 return VM_MAP_ROUND_PAGE(offset, mask);
15792}
15793
15794vm_map_offset_t
15795vm_map_trunc_page_mask(
15796 vm_map_offset_t offset,
15797 vm_map_offset_t mask)
15798{
15799 return VM_MAP_TRUNC_PAGE(offset, mask);
15800}
15801
3e170ce0
A
15802boolean_t
15803vm_map_page_aligned(
15804 vm_map_offset_t offset,
15805 vm_map_offset_t mask)
15806{
15807 return ((offset) & mask) == 0;
15808}
15809
39236c6e
A
15810int
15811vm_map_page_shift(
15812 vm_map_t map)
15813{
15814 return VM_MAP_PAGE_SHIFT(map);
15815}
15816
15817int
15818vm_map_page_size(
15819 vm_map_t map)
15820{
15821 return VM_MAP_PAGE_SIZE(map);
15822}
15823
3e170ce0 15824vm_map_offset_t
39236c6e
A
15825vm_map_page_mask(
15826 vm_map_t map)
15827{
15828 return VM_MAP_PAGE_MASK(map);
15829}
15830
15831kern_return_t
15832vm_map_set_page_shift(
15833 vm_map_t map,
15834 int pageshift)
15835{
15836 if (map->hdr.nentries != 0) {
15837 /* too late to change page size */
15838 return KERN_FAILURE;
15839 }
15840
15841 map->hdr.page_shift = pageshift;
15842
15843 return KERN_SUCCESS;
15844}
15845
fe8ab488
A
15846int
15847vm_map_purge(
15848 vm_map_t map)
15849{
15850 int num_object_purged;
15851 vm_map_entry_t entry;
15852 vm_map_offset_t next_address;
15853 vm_object_t object;
15854 int state;
15855 kern_return_t kr;
15856
15857 num_object_purged = 0;
15858
15859 vm_map_lock_read(map);
15860 entry = vm_map_first_entry(map);
15861 while (entry != vm_map_to_entry(map)) {
15862 if (entry->is_sub_map) {
15863 goto next;
15864 }
15865 if (! (entry->protection & VM_PROT_WRITE)) {
15866 goto next;
15867 }
3e170ce0 15868 object = VME_OBJECT(entry);
fe8ab488
A
15869 if (object == VM_OBJECT_NULL) {
15870 goto next;
15871 }
15872 if (object->purgable != VM_PURGABLE_VOLATILE) {
15873 goto next;
15874 }
15875
15876 vm_object_lock(object);
15877#if 00
3e170ce0 15878 if (VME_OFFSET(entry) != 0 ||
fe8ab488
A
15879 (entry->vme_end - entry->vme_start) != object->vo_size) {
15880 vm_object_unlock(object);
15881 goto next;
15882 }
15883#endif
15884 next_address = entry->vme_end;
15885 vm_map_unlock_read(map);
15886 state = VM_PURGABLE_EMPTY;
15887 kr = vm_object_purgable_control(object,
15888 VM_PURGABLE_SET_STATE,
15889 &state);
15890 if (kr == KERN_SUCCESS) {
15891 num_object_purged++;
15892 }
15893 vm_object_unlock(object);
15894
15895 vm_map_lock_read(map);
15896 if (vm_map_lookup_entry(map, next_address, &entry)) {
15897 continue;
15898 }
15899 next:
15900 entry = entry->vme_next;
15901 }
15902 vm_map_unlock_read(map);
15903
15904 return num_object_purged;
15905}
15906
39236c6e
A
15907kern_return_t
15908vm_map_query_volatile(
15909 vm_map_t map,
15910 mach_vm_size_t *volatile_virtual_size_p,
15911 mach_vm_size_t *volatile_resident_size_p,
3e170ce0
A
15912 mach_vm_size_t *volatile_compressed_size_p,
15913 mach_vm_size_t *volatile_pmap_size_p,
15914 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e
A
15915{
15916 mach_vm_size_t volatile_virtual_size;
15917 mach_vm_size_t volatile_resident_count;
3e170ce0 15918 mach_vm_size_t volatile_compressed_count;
39236c6e 15919 mach_vm_size_t volatile_pmap_count;
3e170ce0 15920 mach_vm_size_t volatile_compressed_pmap_count;
39236c6e
A
15921 mach_vm_size_t resident_count;
15922 vm_map_entry_t entry;
15923 vm_object_t object;
15924
15925 /* map should be locked by caller */
15926
15927 volatile_virtual_size = 0;
15928 volatile_resident_count = 0;
3e170ce0 15929 volatile_compressed_count = 0;
39236c6e 15930 volatile_pmap_count = 0;
3e170ce0 15931 volatile_compressed_pmap_count = 0;
39236c6e
A
15932
15933 for (entry = vm_map_first_entry(map);
15934 entry != vm_map_to_entry(map);
15935 entry = entry->vme_next) {
4bd07ac2
A
15936 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
15937
39236c6e
A
15938 if (entry->is_sub_map) {
15939 continue;
15940 }
15941 if (! (entry->protection & VM_PROT_WRITE)) {
15942 continue;
15943 }
3e170ce0 15944 object = VME_OBJECT(entry);
39236c6e
A
15945 if (object == VM_OBJECT_NULL) {
15946 continue;
15947 }
3e170ce0
A
15948 if (object->purgable != VM_PURGABLE_VOLATILE &&
15949 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
15950 continue;
15951 }
3e170ce0 15952 if (VME_OFFSET(entry)) {
39236c6e
A
15953 /*
15954 * If the map entry has been split and the object now
15955 * appears several times in the VM map, we don't want
15956 * to count the object's resident_page_count more than
15957 * once. We count it only for the first one, starting
15958 * at offset 0 and ignore the other VM map entries.
15959 */
15960 continue;
15961 }
15962 resident_count = object->resident_page_count;
3e170ce0 15963 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
15964 resident_count = 0;
15965 } else {
3e170ce0 15966 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
15967 }
15968
15969 volatile_virtual_size += entry->vme_end - entry->vme_start;
15970 volatile_resident_count += resident_count;
3e170ce0
A
15971 if (object->pager) {
15972 volatile_compressed_count +=
15973 vm_compressor_pager_get_count(object->pager);
15974 }
4bd07ac2
A
15975 pmap_compressed_bytes = 0;
15976 pmap_resident_bytes =
15977 pmap_query_resident(map->pmap,
15978 entry->vme_start,
15979 entry->vme_end,
15980 &pmap_compressed_bytes);
15981 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
15982 volatile_compressed_pmap_count += (pmap_compressed_bytes
15983 / PAGE_SIZE);
39236c6e
A
15984 }
15985
15986 /* map is still locked on return */
15987
15988 *volatile_virtual_size_p = volatile_virtual_size;
15989 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 15990 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 15991 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 15992 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
15993
15994 return KERN_SUCCESS;
15995}
fe8ab488 15996
3e170ce0
A
15997void
15998vm_map_sizes(vm_map_t map,
15999 vm_map_size_t * psize,
16000 vm_map_size_t * pfree,
16001 vm_map_size_t * plargest_free)
16002{
16003 vm_map_entry_t entry;
16004 vm_map_offset_t prev;
16005 vm_map_size_t free, total_free, largest_free;
16006 boolean_t end;
16007
16008 total_free = largest_free = 0;
16009
16010 vm_map_lock_read(map);
16011 if (psize) *psize = map->max_offset - map->min_offset;
16012
16013 prev = map->min_offset;
16014 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16015 {
16016 end = (entry == vm_map_to_entry(map));
16017
16018 if (end) free = entry->vme_end - prev;
16019 else free = entry->vme_start - prev;
16020
16021 total_free += free;
16022 if (free > largest_free) largest_free = free;
16023
16024 if (end) break;
16025 prev = entry->vme_end;
16026 }
16027 vm_map_unlock_read(map);
16028 if (pfree) *pfree = total_free;
16029 if (plargest_free) *plargest_free = largest_free;
16030}
16031
fe8ab488
A
16032#if VM_SCAN_FOR_SHADOW_CHAIN
16033int vm_map_shadow_max(vm_map_t map);
16034int vm_map_shadow_max(
16035 vm_map_t map)
16036{
16037 int shadows, shadows_max;
16038 vm_map_entry_t entry;
16039 vm_object_t object, next_object;
16040
16041 if (map == NULL)
16042 return 0;
16043
16044 shadows_max = 0;
16045
16046 vm_map_lock_read(map);
16047
16048 for (entry = vm_map_first_entry(map);
16049 entry != vm_map_to_entry(map);
16050 entry = entry->vme_next) {
16051 if (entry->is_sub_map) {
16052 continue;
16053 }
3e170ce0 16054 object = VME_OBJECT(entry);
fe8ab488
A
16055 if (object == NULL) {
16056 continue;
16057 }
16058 vm_object_lock_shared(object);
16059 for (shadows = 0;
16060 object->shadow != NULL;
16061 shadows++, object = next_object) {
16062 next_object = object->shadow;
16063 vm_object_lock_shared(next_object);
16064 vm_object_unlock(object);
16065 }
16066 vm_object_unlock(object);
16067 if (shadows > shadows_max) {
16068 shadows_max = shadows;
16069 }
16070 }
16071
16072 vm_map_unlock_read(map);
16073
16074 return shadows_max;
16075}
16076#endif /* VM_SCAN_FOR_SHADOW_CHAIN */