]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-1699.22.73.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
6d2010ae 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
91447636 68#include <libkern/OSAtomic.h>
1c79356b
A
69
70#include <mach/kern_return.h>
71#include <mach/port.h>
72#include <mach/vm_attributes.h>
73#include <mach/vm_param.h>
74#include <mach/vm_behavior.h>
55e303ae 75#include <mach/vm_statistics.h>
91447636 76#include <mach/memory_object.h>
0c530ab8 77#include <mach/mach_vm.h>
91447636 78#include <machine/cpu_capabilities.h>
2d21ac55 79#include <mach/sdt.h>
91447636 80
1c79356b
A
81#include <kern/assert.h>
82#include <kern/counters.h>
91447636 83#include <kern/kalloc.h>
1c79356b 84#include <kern/zalloc.h>
91447636
A
85
86#include <vm/cpm.h>
1c79356b
A
87#include <vm/vm_init.h>
88#include <vm/vm_fault.h>
89#include <vm/vm_map.h>
90#include <vm/vm_object.h>
91#include <vm/vm_page.h>
b0d623f7 92#include <vm/vm_pageout.h>
1c79356b
A
93#include <vm/vm_kern.h>
94#include <ipc/ipc_port.h>
95#include <kern/sched_prim.h>
96#include <kern/misc_protos.h>
55e303ae 97#include <machine/db_machdep.h>
1c79356b
A
98#include <kern/xpr.h>
99
91447636
A
100#include <mach/vm_map_server.h>
101#include <mach/mach_host_server.h>
2d21ac55 102#include <vm/vm_protos.h>
b0d623f7 103#include <vm/vm_purgeable_internal.h>
91447636 104
91447636 105#include <vm/vm_protos.h>
2d21ac55 106#include <vm/vm_shared_region.h>
6d2010ae 107#include <vm/vm_map_store.h>
91447636 108
1c79356b
A
109/* Internal prototypes
110 */
2d21ac55 111
91447636
A
112static void vm_map_simplify_range(
113 vm_map_t map,
114 vm_map_offset_t start,
115 vm_map_offset_t end); /* forward */
116
117static boolean_t vm_map_range_check(
2d21ac55
A
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end,
121 vm_map_entry_t *entry);
1c79356b 122
91447636 123static vm_map_entry_t _vm_map_entry_create(
2d21ac55 124 struct vm_map_header *map_header);
1c79356b 125
91447636 126static void _vm_map_entry_dispose(
2d21ac55
A
127 struct vm_map_header *map_header,
128 vm_map_entry_t entry);
1c79356b 129
91447636 130static void vm_map_pmap_enter(
2d21ac55
A
131 vm_map_t map,
132 vm_map_offset_t addr,
133 vm_map_offset_t end_addr,
134 vm_object_t object,
135 vm_object_offset_t offset,
136 vm_prot_t protection);
1c79356b 137
91447636 138static void _vm_map_clip_end(
2d21ac55
A
139 struct vm_map_header *map_header,
140 vm_map_entry_t entry,
141 vm_map_offset_t end);
91447636
A
142
143static void _vm_map_clip_start(
2d21ac55
A
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t start);
1c79356b 147
91447636 148static void vm_map_entry_delete(
2d21ac55
A
149 vm_map_t map,
150 vm_map_entry_t entry);
1c79356b 151
91447636 152static kern_return_t vm_map_delete(
2d21ac55
A
153 vm_map_t map,
154 vm_map_offset_t start,
155 vm_map_offset_t end,
156 int flags,
157 vm_map_t zap_map);
1c79356b 158
91447636 159static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
160 vm_map_t dst_map,
161 vm_map_entry_t entry,
162 vm_map_copy_t copy,
163 vm_map_address_t start);
1c79356b 164
91447636 165static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
166 vm_map_t dst_map,
167 vm_map_entry_t tmp_entry,
168 vm_map_copy_t copy,
169 vm_map_offset_t start,
170 pmap_t pmap);
1c79356b 171
91447636 172static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
173 vm_map_t src_map,
174 vm_map_address_t src_addr,
175 vm_map_size_t len,
176 boolean_t src_destroy,
177 vm_map_copy_t *copy_result); /* OUT */
1c79356b 178
91447636 179static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
180 vm_map_t map,
181 vm_map_address_t *addr, /* IN/OUT */
182 vm_map_copy_t copy,
183 boolean_t overwrite);
1c79356b 184
91447636 185static void vm_map_fork_share(
2d21ac55
A
186 vm_map_t old_map,
187 vm_map_entry_t old_entry,
188 vm_map_t new_map);
1c79356b 189
91447636 190static boolean_t vm_map_fork_copy(
2d21ac55
A
191 vm_map_t old_map,
192 vm_map_entry_t *old_entry_p,
193 vm_map_t new_map);
1c79356b 194
0c530ab8 195void vm_map_region_top_walk(
2d21ac55
A
196 vm_map_entry_t entry,
197 vm_region_top_info_t top);
1c79356b 198
0c530ab8 199void vm_map_region_walk(
2d21ac55
A
200 vm_map_t map,
201 vm_map_offset_t va,
202 vm_map_entry_t entry,
203 vm_object_offset_t offset,
204 vm_object_size_t range,
205 vm_region_extended_info_t extended,
206 boolean_t look_for_pages);
91447636
A
207
208static kern_return_t vm_map_wire_nested(
2d21ac55
A
209 vm_map_t map,
210 vm_map_offset_t start,
211 vm_map_offset_t end,
212 vm_prot_t access_type,
213 boolean_t user_wire,
214 pmap_t map_pmap,
215 vm_map_offset_t pmap_addr);
91447636
A
216
217static kern_return_t vm_map_unwire_nested(
2d21ac55
A
218 vm_map_t map,
219 vm_map_offset_t start,
220 vm_map_offset_t end,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr);
91447636
A
224
225static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
226 vm_map_t dst_map,
227 vm_map_offset_t dst_addr,
228 vm_map_size_t dst_size);
91447636
A
229
230static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
231 vm_map_t dst_map,
232 vm_map_offset_t dst_addr,
233 vm_map_copy_t copy,
234 boolean_t interruptible,
6d2010ae
A
235 pmap_t pmap,
236 boolean_t discard_on_success);
91447636
A
237
238static kern_return_t vm_map_remap_extract(
2d21ac55
A
239 vm_map_t map,
240 vm_map_offset_t addr,
241 vm_map_size_t size,
242 boolean_t copy,
243 struct vm_map_header *map_header,
244 vm_prot_t *cur_protection,
245 vm_prot_t *max_protection,
246 vm_inherit_t inheritance,
247 boolean_t pageable);
91447636
A
248
249static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
250 vm_map_t map,
251 vm_map_address_t *address,
252 vm_map_size_t size,
253 vm_map_offset_t mask,
060df5ea 254 int flags,
2d21ac55 255 vm_map_entry_t *map_entry);
91447636
A
256
257static void vm_map_region_look_for_page(
2d21ac55
A
258 vm_map_t map,
259 vm_map_offset_t va,
260 vm_object_t object,
261 vm_object_offset_t offset,
262 int max_refcnt,
263 int depth,
264 vm_region_extended_info_t extended);
91447636
A
265
266static int vm_map_region_count_obj_refs(
2d21ac55
A
267 vm_map_entry_t entry,
268 vm_object_t object);
1c79356b 269
b0d623f7
A
270
271static kern_return_t vm_map_willneed(
272 vm_map_t map,
273 vm_map_offset_t start,
274 vm_map_offset_t end);
275
276static kern_return_t vm_map_reuse_pages(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end);
280
281static kern_return_t vm_map_reusable_pages(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286static kern_return_t vm_map_can_reuse(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
6d2010ae
A
291#if CONFIG_FREEZE
292struct default_freezer_table;
293__private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
294__private_extern__ void default_freezer_mapping_free(void**, boolean_t all);
295#endif
296
1c79356b
A
297/*
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry. This preserves the
303 * wire count; it's used for map splitting and zone changing in
304 * vm_map_copyout.
305 */
306#define vm_map_entry_copy(NEW,OLD) \
307MACRO_BEGIN \
2d21ac55
A
308 *(NEW) = *(OLD); \
309 (NEW)->is_shared = FALSE; \
310 (NEW)->needs_wakeup = FALSE; \
311 (NEW)->in_transition = FALSE; \
312 (NEW)->wired_count = 0; \
313 (NEW)->user_wired_count = 0; \
b0d623f7 314 (NEW)->permanent = FALSE; \
1c79356b
A
315MACRO_END
316
317#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
318
2d21ac55
A
319/*
320 * Decide if we want to allow processes to execute from their data or stack areas.
321 * override_nx() returns true if we do. Data/stack execution can be enabled independently
322 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
323 * or allow_stack_exec to enable data execution for that type of data area for that particular
324 * ABI (or both by or'ing the flags together). These are initialized in the architecture
325 * specific pmap files since the default behavior varies according to architecture. The
326 * main reason it varies is because of the need to provide binary compatibility with old
327 * applications that were written before these restrictions came into being. In the old
328 * days, an app could execute anything it could read, but this has slowly been tightened
329 * up over time. The default behavior is:
330 *
331 * 32-bit PPC apps may execute from both stack and data areas
332 * 32-bit Intel apps may exeucte from data areas but not stack
333 * 64-bit PPC/Intel apps may not execute from either data or stack
334 *
335 * An application on any architecture may override these defaults by explicitly
336 * adding PROT_EXEC permission to the page in question with the mprotect(2)
337 * system call. This code here just determines what happens when an app tries to
338 * execute from a page that lacks execute permission.
339 *
340 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
341 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
342 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
343 * execution from data areas for a particular binary even if the arch normally permits it. As
344 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
345 * to support some complicated use cases, notably browsers with out-of-process plugins that
346 * are not all NX-safe.
2d21ac55
A
347 */
348
349extern int allow_data_exec, allow_stack_exec;
350
351int
352override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
353{
354 int current_abi;
355
356 /*
357 * Determine if the app is running in 32 or 64 bit mode.
358 */
359
360 if (vm_map_is_64bit(map))
361 current_abi = VM_ABI_64;
362 else
363 current_abi = VM_ABI_32;
364
365 /*
366 * Determine if we should allow the execution based on whether it's a
367 * stack or data area and the current architecture.
368 */
369
370 if (user_tag == VM_MEMORY_STACK)
371 return allow_stack_exec & current_abi;
372
6d2010ae 373 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
374}
375
376
1c79356b
A
377/*
378 * Virtual memory maps provide for the mapping, protection,
379 * and sharing of virtual memory objects. In addition,
380 * this module provides for an efficient virtual copy of
381 * memory from one map to another.
382 *
383 * Synchronization is required prior to most operations.
384 *
385 * Maps consist of an ordered doubly-linked list of simple
386 * entries; a single hint is used to speed up lookups.
387 *
388 * Sharing maps have been deleted from this version of Mach.
389 * All shared objects are now mapped directly into the respective
390 * maps. This requires a change in the copy on write strategy;
391 * the asymmetric (delayed) strategy is used for shared temporary
392 * objects instead of the symmetric (shadow) strategy. All maps
393 * are now "top level" maps (either task map, kernel map or submap
394 * of the kernel map).
395 *
396 * Since portions of maps are specified by start/end addreses,
397 * which may not align with existing map entries, all
398 * routines merely "clip" entries to these start/end values.
399 * [That is, an entry is split into two, bordering at a
400 * start or end value.] Note that these clippings may not
401 * always be necessary (as the two resulting entries are then
402 * not changed); however, the clipping is done for convenience.
403 * No attempt is currently made to "glue back together" two
404 * abutting entries.
405 *
406 * The symmetric (shadow) copy strategy implements virtual copy
407 * by copying VM object references from one map to
408 * another, and then marking both regions as copy-on-write.
409 * It is important to note that only one writeable reference
410 * to a VM object region exists in any map when this strategy
411 * is used -- this means that shadow object creation can be
412 * delayed until a write operation occurs. The symmetric (delayed)
413 * strategy allows multiple maps to have writeable references to
414 * the same region of a vm object, and hence cannot delay creating
415 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
416 * Copying of permanent objects is completely different; see
417 * vm_object_copy_strategically() in vm_object.c.
418 */
419
91447636
A
420static zone_t vm_map_zone; /* zone for vm_map structures */
421static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
422static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
423static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
1c79356b
A
424
425
426/*
427 * Placeholder object for submap operations. This object is dropped
428 * into the range by a call to vm_map_find, and removed when
429 * vm_map_submap creates the submap.
430 */
431
432vm_object_t vm_submap_object;
433
91447636 434static void *map_data;
b0d623f7 435static vm_size_t map_data_size;
91447636 436static void *kentry_data;
b0d623f7 437static vm_size_t kentry_data_size;
91447636 438static int kentry_count = 2048; /* to init kentry_data_size */
1c79356b 439
6d2010ae
A
440#if CONFIG_EMBEDDED
441#define NO_COALESCE_LIMIT 0
442#else
b0d623f7 443#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
6d2010ae 444#endif
1c79356b 445
55e303ae 446/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 447unsigned int not_in_kdp = 1;
55e303ae 448
6d2010ae
A
449unsigned int vm_map_set_cache_attr_count = 0;
450
451kern_return_t
452vm_map_set_cache_attr(
453 vm_map_t map,
454 vm_map_offset_t va)
455{
456 vm_map_entry_t map_entry;
457 vm_object_t object;
458 kern_return_t kr = KERN_SUCCESS;
459
460 vm_map_lock_read(map);
461
462 if (!vm_map_lookup_entry(map, va, &map_entry) ||
463 map_entry->is_sub_map) {
464 /*
465 * that memory is not properly mapped
466 */
467 kr = KERN_INVALID_ARGUMENT;
468 goto done;
469 }
470 object = map_entry->object.vm_object;
471
472 if (object == VM_OBJECT_NULL) {
473 /*
474 * there should be a VM object here at this point
475 */
476 kr = KERN_INVALID_ARGUMENT;
477 goto done;
478 }
479 vm_object_lock(object);
480 object->set_cache_attr = TRUE;
481 vm_object_unlock(object);
482
483 vm_map_set_cache_attr_count++;
484done:
485 vm_map_unlock_read(map);
486
487 return kr;
488}
489
490
593a1d5f
A
491#if CONFIG_CODE_DECRYPTION
492/*
493 * vm_map_apple_protected:
494 * This remaps the requested part of the object with an object backed by
495 * the decrypting pager.
496 * crypt_info contains entry points and session data for the crypt module.
497 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
498 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
499 */
0c530ab8
A
500kern_return_t
501vm_map_apple_protected(
502 vm_map_t map,
503 vm_map_offset_t start,
593a1d5f
A
504 vm_map_offset_t end,
505 struct pager_crypt_info *crypt_info)
0c530ab8
A
506{
507 boolean_t map_locked;
508 kern_return_t kr;
509 vm_map_entry_t map_entry;
510 memory_object_t protected_mem_obj;
511 vm_object_t protected_object;
512 vm_map_offset_t map_addr;
513
514 vm_map_lock_read(map);
515 map_locked = TRUE;
516
517 /* lookup the protected VM object */
518 if (!vm_map_lookup_entry(map,
519 start,
520 &map_entry) ||
593a1d5f 521 map_entry->vme_end < end ||
0c530ab8
A
522 map_entry->is_sub_map) {
523 /* that memory is not properly mapped */
524 kr = KERN_INVALID_ARGUMENT;
525 goto done;
526 }
527 protected_object = map_entry->object.vm_object;
528 if (protected_object == VM_OBJECT_NULL) {
529 /* there should be a VM object here at this point */
530 kr = KERN_INVALID_ARGUMENT;
531 goto done;
532 }
533
b0d623f7
A
534 /* make sure protected object stays alive while map is unlocked */
535 vm_object_reference(protected_object);
536
537 vm_map_unlock_read(map);
538 map_locked = FALSE;
539
0c530ab8
A
540 /*
541 * Lookup (and create if necessary) the protected memory object
542 * matching that VM object.
543 * If successful, this also grabs a reference on the memory object,
544 * to guarantee that it doesn't go away before we get a chance to map
545 * it.
546 */
593a1d5f 547 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
b0d623f7
A
548
549 /* release extra ref on protected object */
550 vm_object_deallocate(protected_object);
551
0c530ab8
A
552 if (protected_mem_obj == NULL) {
553 kr = KERN_FAILURE;
554 goto done;
555 }
556
0c530ab8
A
557 /* map this memory object in place of the current one */
558 map_addr = start;
2d21ac55
A
559 kr = vm_map_enter_mem_object(map,
560 &map_addr,
561 end - start,
562 (mach_vm_offset_t) 0,
563 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
564 (ipc_port_t) protected_mem_obj,
565 (map_entry->offset +
566 (start - map_entry->vme_start)),
567 TRUE,
568 map_entry->protection,
569 map_entry->max_protection,
570 map_entry->inheritance);
0c530ab8 571 assert(map_addr == start);
0c530ab8
A
572 /*
573 * Release the reference obtained by apple_protect_pager_setup().
574 * The mapping (if it succeeded) is now holding a reference on the
575 * memory object.
576 */
577 memory_object_deallocate(protected_mem_obj);
578
579done:
580 if (map_locked) {
581 vm_map_unlock_read(map);
582 }
583 return kr;
584}
593a1d5f 585#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
586
587
b0d623f7
A
588lck_grp_t vm_map_lck_grp;
589lck_grp_attr_t vm_map_lck_grp_attr;
590lck_attr_t vm_map_lck_attr;
591
592
593a1d5f
A
593/*
594 * vm_map_init:
595 *
596 * Initialize the vm_map module. Must be called before
597 * any other vm_map routines.
598 *
599 * Map and entry structures are allocated from zones -- we must
600 * initialize those zones.
601 *
602 * There are three zones of interest:
603 *
604 * vm_map_zone: used to allocate maps.
605 * vm_map_entry_zone: used to allocate map entries.
606 * vm_map_kentry_zone: used to allocate map entries for the kernel.
607 *
608 * The kernel allocates map entries from a special zone that is initially
609 * "crammed" with memory. It would be difficult (perhaps impossible) for
610 * the kernel to allocate more memory to a entry zone when it became
611 * empty since the very act of allocating memory implies the creation
612 * of a new entry.
613 */
1c79356b
A
614void
615vm_map_init(
616 void)
617{
2d21ac55
A
618 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
619 PAGE_SIZE, "maps");
0b4c1975
A
620 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
621
91447636 622 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
2d21ac55
A
623 1024*1024, PAGE_SIZE*5,
624 "non-kernel map entries");
0b4c1975 625 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
1c79356b 626
91447636 627 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
2d21ac55
A
628 kentry_data_size, kentry_data_size,
629 "kernel map entries");
0b4c1975 630 zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
1c79356b 631
91447636 632 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
2d21ac55 633 16*1024, PAGE_SIZE, "map copies");
0b4c1975 634 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b
A
635
636 /*
637 * Cram the map and kentry zones with initial data.
638 * Set kentry_zone non-collectible to aid zone_gc().
639 */
640 zone_change(vm_map_zone, Z_COLLECT, FALSE);
641 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
642 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
b0d623f7 643 zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
6d2010ae
A
644 zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
645 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
646
1c79356b
A
647 zcram(vm_map_zone, map_data, map_data_size);
648 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
b0d623f7
A
649
650 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
651 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
652 lck_attr_setdefault(&vm_map_lck_attr);
1c79356b
A
653}
654
655void
656vm_map_steal_memory(
657 void)
658{
b0d623f7 659 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
660 map_data = pmap_steal_memory(map_data_size);
661
662#if 0
663 /*
664 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
665 * physical page (i.e. that beyond the kernel image and page tables)
666 * individually; we guess at most one entry per eight pages in the
667 * real world. This works out to roughly .1 of 1% of physical memory,
668 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
669 */
670#endif
671 kentry_count = pmap_free_pages() / 8;
672
673
674 kentry_data_size =
b0d623f7 675 round_page(kentry_count * sizeof(struct vm_map_entry));
1c79356b
A
676 kentry_data = pmap_steal_memory(kentry_data_size);
677}
678
679/*
680 * vm_map_create:
681 *
682 * Creates and returns a new empty VM map with
683 * the given physical map structure, and having
684 * the given lower and upper address bounds.
685 */
686vm_map_t
687vm_map_create(
91447636
A
688 pmap_t pmap,
689 vm_map_offset_t min,
690 vm_map_offset_t max,
691 boolean_t pageable)
1c79356b 692{
2d21ac55 693 static int color_seed = 0;
1c79356b
A
694 register vm_map_t result;
695
696 result = (vm_map_t) zalloc(vm_map_zone);
697 if (result == VM_MAP_NULL)
698 panic("vm_map_create");
699
700 vm_map_first_entry(result) = vm_map_to_entry(result);
701 vm_map_last_entry(result) = vm_map_to_entry(result);
702 result->hdr.nentries = 0;
703 result->hdr.entries_pageable = pageable;
704
6d2010ae
A
705 vm_map_store_init( &(result->hdr) );
706
1c79356b 707 result->size = 0;
2d21ac55
A
708 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
709 result->user_wire_size = 0;
1c79356b
A
710 result->ref_count = 1;
711#if TASK_SWAPPER
712 result->res_count = 1;
713 result->sw_state = MAP_SW_IN;
714#endif /* TASK_SWAPPER */
715 result->pmap = pmap;
716 result->min_offset = min;
717 result->max_offset = max;
718 result->wiring_required = FALSE;
719 result->no_zero_fill = FALSE;
9bccf70c 720 result->mapped = FALSE;
1c79356b 721 result->wait_for_space = FALSE;
b0d623f7 722 result->switch_protect = FALSE;
6d2010ae
A
723 result->disable_vmentry_reuse = FALSE;
724 result->map_disallow_data_exec = FALSE;
725 result->highest_entry_end = 0;
1c79356b
A
726 result->first_free = vm_map_to_entry(result);
727 result->hint = vm_map_to_entry(result);
2d21ac55 728 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae
A
729 result->jit_entry_exists = FALSE;
730#if CONFIG_FREEZE
731 result->default_freezer_toc = NULL;
732#endif
1c79356b 733 vm_map_lock_init(result);
b0d623f7
A
734 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
735
1c79356b
A
736 return(result);
737}
738
739/*
740 * vm_map_entry_create: [ internal use only ]
741 *
742 * Allocates a VM map entry for insertion in the
743 * given map (or map copy). No fields are filled.
744 */
745#define vm_map_entry_create(map) \
2d21ac55 746 _vm_map_entry_create(&(map)->hdr)
1c79356b
A
747
748#define vm_map_copy_entry_create(copy) \
2d21ac55 749 _vm_map_entry_create(&(copy)->cpy_hdr)
1c79356b 750
91447636 751static vm_map_entry_t
1c79356b
A
752_vm_map_entry_create(
753 register struct vm_map_header *map_header)
754{
755 register zone_t zone;
756 register vm_map_entry_t entry;
757
758 if (map_header->entries_pageable)
2d21ac55 759 zone = vm_map_entry_zone;
1c79356b 760 else
2d21ac55 761 zone = vm_map_kentry_zone;
1c79356b
A
762
763 entry = (vm_map_entry_t) zalloc(zone);
764 if (entry == VM_MAP_ENTRY_NULL)
765 panic("vm_map_entry_create");
6d2010ae 766 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1c79356b
A
767
768 return(entry);
769}
770
771/*
772 * vm_map_entry_dispose: [ internal use only ]
773 *
774 * Inverse of vm_map_entry_create.
2d21ac55
A
775 *
776 * write map lock held so no need to
777 * do anything special to insure correctness
778 * of the stores
1c79356b
A
779 */
780#define vm_map_entry_dispose(map, entry) \
6d2010ae
A
781 vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE); \
782 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
783
784#define vm_map_copy_entry_dispose(map, entry) \
785 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
786
91447636 787static void
1c79356b
A
788_vm_map_entry_dispose(
789 register struct vm_map_header *map_header,
790 register vm_map_entry_t entry)
791{
792 register zone_t zone;
793
794 if (map_header->entries_pageable)
2d21ac55 795 zone = vm_map_entry_zone;
1c79356b 796 else
2d21ac55 797 zone = vm_map_kentry_zone;
1c79356b 798
91447636 799 zfree(zone, entry);
1c79356b
A
800}
801
91447636 802#if MACH_ASSERT
91447636 803static boolean_t first_free_check = FALSE;
6d2010ae 804boolean_t
1c79356b
A
805first_free_is_valid(
806 vm_map_t map)
807{
1c79356b
A
808 if (!first_free_check)
809 return TRUE;
2d21ac55 810
6d2010ae 811 return( first_free_is_valid_store( map ));
1c79356b 812}
91447636 813#endif /* MACH_ASSERT */
1c79356b 814
1c79356b
A
815
816#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 817 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
818
819#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 820 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 821
1c79356b 822#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
823/*
824 * vm_map_res_reference:
825 *
826 * Adds another valid residence count to the given map.
827 *
828 * Map is locked so this function can be called from
829 * vm_map_swapin.
830 *
831 */
832void vm_map_res_reference(register vm_map_t map)
833{
834 /* assert map is locked */
835 assert(map->res_count >= 0);
836 assert(map->ref_count >= map->res_count);
837 if (map->res_count == 0) {
b0d623f7 838 lck_mtx_unlock(&map->s_lock);
1c79356b
A
839 vm_map_lock(map);
840 vm_map_swapin(map);
b0d623f7 841 lck_mtx_lock(&map->s_lock);
1c79356b
A
842 ++map->res_count;
843 vm_map_unlock(map);
844 } else
845 ++map->res_count;
846}
847
848/*
849 * vm_map_reference_swap:
850 *
851 * Adds valid reference and residence counts to the given map.
852 *
853 * The map may not be in memory (i.e. zero residence count).
854 *
855 */
856void vm_map_reference_swap(register vm_map_t map)
857{
858 assert(map != VM_MAP_NULL);
b0d623f7 859 lck_mtx_lock(&map->s_lock);
1c79356b
A
860 assert(map->res_count >= 0);
861 assert(map->ref_count >= map->res_count);
862 map->ref_count++;
863 vm_map_res_reference(map);
b0d623f7 864 lck_mtx_unlock(&map->s_lock);
1c79356b
A
865}
866
867/*
868 * vm_map_res_deallocate:
869 *
870 * Decrement residence count on a map; possibly causing swapout.
871 *
872 * The map must be in memory (i.e. non-zero residence count).
873 *
874 * The map is locked, so this function is callable from vm_map_deallocate.
875 *
876 */
877void vm_map_res_deallocate(register vm_map_t map)
878{
879 assert(map->res_count > 0);
880 if (--map->res_count == 0) {
b0d623f7 881 lck_mtx_unlock(&map->s_lock);
1c79356b
A
882 vm_map_lock(map);
883 vm_map_swapout(map);
884 vm_map_unlock(map);
b0d623f7 885 lck_mtx_lock(&map->s_lock);
1c79356b
A
886 }
887 assert(map->ref_count >= map->res_count);
888}
889#endif /* MACH_ASSERT && TASK_SWAPPER */
890
1c79356b
A
891/*
892 * vm_map_destroy:
893 *
894 * Actually destroy a map.
895 */
896void
897vm_map_destroy(
2d21ac55
A
898 vm_map_t map,
899 int flags)
91447636 900{
1c79356b 901 vm_map_lock(map);
2d21ac55
A
902
903 /* clean up regular map entries */
904 (void) vm_map_delete(map, map->min_offset, map->max_offset,
905 flags, VM_MAP_NULL);
906 /* clean up leftover special mappings (commpage, etc...) */
2d21ac55
A
907 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
908 flags, VM_MAP_NULL);
6d2010ae
A
909
910#if CONFIG_FREEZE
911 if (map->default_freezer_toc){
912 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
913 }
914#endif
1c79356b
A
915 vm_map_unlock(map);
916
2d21ac55
A
917 assert(map->hdr.nentries == 0);
918
55e303ae
A
919 if(map->pmap)
920 pmap_destroy(map->pmap);
1c79356b 921
91447636 922 zfree(vm_map_zone, map);
1c79356b
A
923}
924
925#if TASK_SWAPPER
926/*
927 * vm_map_swapin/vm_map_swapout
928 *
929 * Swap a map in and out, either referencing or releasing its resources.
930 * These functions are internal use only; however, they must be exported
931 * because they may be called from macros, which are exported.
932 *
933 * In the case of swapout, there could be races on the residence count,
934 * so if the residence count is up, we return, assuming that a
935 * vm_map_deallocate() call in the near future will bring us back.
936 *
937 * Locking:
938 * -- We use the map write lock for synchronization among races.
939 * -- The map write lock, and not the simple s_lock, protects the
940 * swap state of the map.
941 * -- If a map entry is a share map, then we hold both locks, in
942 * hierarchical order.
943 *
944 * Synchronization Notes:
945 * 1) If a vm_map_swapin() call happens while swapout in progress, it
946 * will block on the map lock and proceed when swapout is through.
947 * 2) A vm_map_reference() call at this time is illegal, and will
948 * cause a panic. vm_map_reference() is only allowed on resident
949 * maps, since it refuses to block.
950 * 3) A vm_map_swapin() call during a swapin will block, and
951 * proceeed when the first swapin is done, turning into a nop.
952 * This is the reason the res_count is not incremented until
953 * after the swapin is complete.
954 * 4) There is a timing hole after the checks of the res_count, before
955 * the map lock is taken, during which a swapin may get the lock
956 * before a swapout about to happen. If this happens, the swapin
957 * will detect the state and increment the reference count, causing
958 * the swapout to be a nop, thereby delaying it until a later
959 * vm_map_deallocate. If the swapout gets the lock first, then
960 * the swapin will simply block until the swapout is done, and
961 * then proceed.
962 *
963 * Because vm_map_swapin() is potentially an expensive operation, it
964 * should be used with caution.
965 *
966 * Invariants:
967 * 1) A map with a residence count of zero is either swapped, or
968 * being swapped.
969 * 2) A map with a non-zero residence count is either resident,
970 * or being swapped in.
971 */
972
973int vm_map_swap_enable = 1;
974
975void vm_map_swapin (vm_map_t map)
976{
977 register vm_map_entry_t entry;
2d21ac55 978
1c79356b
A
979 if (!vm_map_swap_enable) /* debug */
980 return;
981
982 /*
983 * Map is locked
984 * First deal with various races.
985 */
986 if (map->sw_state == MAP_SW_IN)
987 /*
988 * we raced with swapout and won. Returning will incr.
989 * the res_count, turning the swapout into a nop.
990 */
991 return;
992
993 /*
994 * The residence count must be zero. If we raced with another
995 * swapin, the state would have been IN; if we raced with a
996 * swapout (after another competing swapin), we must have lost
997 * the race to get here (see above comment), in which case
998 * res_count is still 0.
999 */
1000 assert(map->res_count == 0);
1001
1002 /*
1003 * There are no intermediate states of a map going out or
1004 * coming in, since the map is locked during the transition.
1005 */
1006 assert(map->sw_state == MAP_SW_OUT);
1007
1008 /*
1009 * We now operate upon each map entry. If the entry is a sub-
1010 * or share-map, we call vm_map_res_reference upon it.
1011 * If the entry is an object, we call vm_object_res_reference
1012 * (this may iterate through the shadow chain).
1013 * Note that we hold the map locked the entire time,
1014 * even if we get back here via a recursive call in
1015 * vm_map_res_reference.
1016 */
1017 entry = vm_map_first_entry(map);
1018
1019 while (entry != vm_map_to_entry(map)) {
1020 if (entry->object.vm_object != VM_OBJECT_NULL) {
1021 if (entry->is_sub_map) {
1022 vm_map_t lmap = entry->object.sub_map;
b0d623f7 1023 lck_mtx_lock(&lmap->s_lock);
1c79356b 1024 vm_map_res_reference(lmap);
b0d623f7 1025 lck_mtx_unlock(&lmap->s_lock);
1c79356b
A
1026 } else {
1027 vm_object_t object = entry->object.vm_object;
1028 vm_object_lock(object);
1029 /*
1030 * This call may iterate through the
1031 * shadow chain.
1032 */
1033 vm_object_res_reference(object);
1034 vm_object_unlock(object);
1035 }
1036 }
1037 entry = entry->vme_next;
1038 }
1039 assert(map->sw_state == MAP_SW_OUT);
1040 map->sw_state = MAP_SW_IN;
1041}
1042
1043void vm_map_swapout(vm_map_t map)
1044{
1045 register vm_map_entry_t entry;
1046
1047 /*
1048 * Map is locked
1049 * First deal with various races.
1050 * If we raced with a swapin and lost, the residence count
1051 * will have been incremented to 1, and we simply return.
1052 */
b0d623f7 1053 lck_mtx_lock(&map->s_lock);
1c79356b 1054 if (map->res_count != 0) {
b0d623f7 1055 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1056 return;
1057 }
b0d623f7 1058 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1059
1060 /*
1061 * There are no intermediate states of a map going out or
1062 * coming in, since the map is locked during the transition.
1063 */
1064 assert(map->sw_state == MAP_SW_IN);
1065
1066 if (!vm_map_swap_enable)
1067 return;
1068
1069 /*
1070 * We now operate upon each map entry. If the entry is a sub-
1071 * or share-map, we call vm_map_res_deallocate upon it.
1072 * If the entry is an object, we call vm_object_res_deallocate
1073 * (this may iterate through the shadow chain).
1074 * Note that we hold the map locked the entire time,
1075 * even if we get back here via a recursive call in
1076 * vm_map_res_deallocate.
1077 */
1078 entry = vm_map_first_entry(map);
1079
1080 while (entry != vm_map_to_entry(map)) {
1081 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082 if (entry->is_sub_map) {
1083 vm_map_t lmap = entry->object.sub_map;
b0d623f7 1084 lck_mtx_lock(&lmap->s_lock);
1c79356b 1085 vm_map_res_deallocate(lmap);
b0d623f7 1086 lck_mtx_unlock(&lmap->s_lock);
1c79356b
A
1087 } else {
1088 vm_object_t object = entry->object.vm_object;
1089 vm_object_lock(object);
1090 /*
1091 * This call may take a long time,
1092 * since it could actively push
1093 * out pages (if we implement it
1094 * that way).
1095 */
1096 vm_object_res_deallocate(object);
1097 vm_object_unlock(object);
1098 }
1099 }
1100 entry = entry->vme_next;
1101 }
1102 assert(map->sw_state == MAP_SW_IN);
1103 map->sw_state = MAP_SW_OUT;
1104}
1105
1106#endif /* TASK_SWAPPER */
1107
1c79356b
A
1108/*
1109 * vm_map_lookup_entry: [ internal use only ]
1110 *
6d2010ae
A
1111 * Calls into the vm map store layer to find the map
1112 * entry containing (or immediately preceding) the
1113 * specified address in the given map; the entry is returned
1c79356b
A
1114 * in the "entry" parameter. The boolean
1115 * result indicates whether the address is
1116 * actually contained in the map.
1117 */
1118boolean_t
1119vm_map_lookup_entry(
91447636
A
1120 register vm_map_t map,
1121 register vm_map_offset_t address,
1c79356b
A
1122 vm_map_entry_t *entry) /* OUT */
1123{
6d2010ae 1124 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1125}
1126
1127/*
1128 * Routine: vm_map_find_space
1129 * Purpose:
1130 * Allocate a range in the specified virtual address map,
1131 * returning the entry allocated for that range.
1132 * Used by kmem_alloc, etc.
1133 *
1134 * The map must be NOT be locked. It will be returned locked
1135 * on KERN_SUCCESS, unlocked on failure.
1136 *
1137 * If an entry is allocated, the object/offset fields
1138 * are initialized to zero.
1139 */
1140kern_return_t
1141vm_map_find_space(
1142 register vm_map_t map,
91447636
A
1143 vm_map_offset_t *address, /* OUT */
1144 vm_map_size_t size,
1145 vm_map_offset_t mask,
0c530ab8 1146 int flags,
1c79356b
A
1147 vm_map_entry_t *o_entry) /* OUT */
1148{
1149 register vm_map_entry_t entry, new_entry;
91447636
A
1150 register vm_map_offset_t start;
1151 register vm_map_offset_t end;
1152
1153 if (size == 0) {
1154 *address = 0;
1155 return KERN_INVALID_ARGUMENT;
1156 }
1c79356b 1157
2d21ac55
A
1158 if (flags & VM_FLAGS_GUARD_AFTER) {
1159 /* account for the back guard page in the size */
1160 size += PAGE_SIZE_64;
1161 }
1162
1c79356b
A
1163 new_entry = vm_map_entry_create(map);
1164
1165 /*
1166 * Look for the first possible address; if there's already
1167 * something at this address, we have to start after it.
1168 */
1169
1170 vm_map_lock(map);
1171
6d2010ae
A
1172 if( map->disable_vmentry_reuse == TRUE) {
1173 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1174 } else {
1175 assert(first_free_is_valid(map));
1176 if ((entry = map->first_free) == vm_map_to_entry(map))
1177 start = map->min_offset;
1178 else
1179 start = entry->vme_end;
1180 }
1c79356b
A
1181
1182 /*
1183 * In any case, the "entry" always precedes
1184 * the proposed new region throughout the loop:
1185 */
1186
1187 while (TRUE) {
1188 register vm_map_entry_t next;
1189
1190 /*
1191 * Find the end of the proposed new region.
1192 * Be sure we didn't go beyond the end, or
1193 * wrap around the address.
1194 */
1195
2d21ac55
A
1196 if (flags & VM_FLAGS_GUARD_BEFORE) {
1197 /* reserve space for the front guard page */
1198 start += PAGE_SIZE_64;
1199 }
1c79356b 1200 end = ((start + mask) & ~mask);
2d21ac55 1201
1c79356b
A
1202 if (end < start) {
1203 vm_map_entry_dispose(map, new_entry);
1204 vm_map_unlock(map);
1205 return(KERN_NO_SPACE);
1206 }
1207 start = end;
1208 end += size;
1209
1210 if ((end > map->max_offset) || (end < start)) {
1211 vm_map_entry_dispose(map, new_entry);
1212 vm_map_unlock(map);
1213 return(KERN_NO_SPACE);
1214 }
1215
1216 /*
1217 * If there are no more entries, we must win.
1218 */
1219
1220 next = entry->vme_next;
1221 if (next == vm_map_to_entry(map))
1222 break;
1223
1224 /*
1225 * If there is another entry, it must be
1226 * after the end of the potential new region.
1227 */
1228
1229 if (next->vme_start >= end)
1230 break;
1231
1232 /*
1233 * Didn't fit -- move to the next entry.
1234 */
1235
1236 entry = next;
1237 start = entry->vme_end;
1238 }
1239
1240 /*
1241 * At this point,
1242 * "start" and "end" should define the endpoints of the
1243 * available new range, and
1244 * "entry" should refer to the region before the new
1245 * range, and
1246 *
1247 * the map should be locked.
1248 */
1249
2d21ac55
A
1250 if (flags & VM_FLAGS_GUARD_BEFORE) {
1251 /* go back for the front guard page */
1252 start -= PAGE_SIZE_64;
1253 }
1c79356b
A
1254 *address = start;
1255
1256 new_entry->vme_start = start;
1257 new_entry->vme_end = end;
1258 assert(page_aligned(new_entry->vme_start));
1259 assert(page_aligned(new_entry->vme_end));
1260
1261 new_entry->is_shared = FALSE;
1262 new_entry->is_sub_map = FALSE;
1263 new_entry->use_pmap = FALSE;
1264 new_entry->object.vm_object = VM_OBJECT_NULL;
1265 new_entry->offset = (vm_object_offset_t) 0;
1266
1267 new_entry->needs_copy = FALSE;
1268
1269 new_entry->inheritance = VM_INHERIT_DEFAULT;
1270 new_entry->protection = VM_PROT_DEFAULT;
1271 new_entry->max_protection = VM_PROT_ALL;
1272 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1273 new_entry->wired_count = 0;
1274 new_entry->user_wired_count = 0;
1275
1276 new_entry->in_transition = FALSE;
1277 new_entry->needs_wakeup = FALSE;
2d21ac55 1278 new_entry->no_cache = FALSE;
b0d623f7
A
1279 new_entry->permanent = FALSE;
1280 new_entry->superpage_size = 0;
2d21ac55
A
1281
1282 new_entry->alias = 0;
b0d623f7 1283 new_entry->zero_wired_pages = FALSE;
1c79356b 1284
0c530ab8
A
1285 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1286
1c79356b
A
1287 /*
1288 * Insert the new entry into the list
1289 */
1290
6d2010ae 1291 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1292
1293 map->size += size;
1294
1295 /*
1296 * Update the lookup hint
1297 */
0c530ab8 1298 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1299
1300 *o_entry = new_entry;
1301 return(KERN_SUCCESS);
1302}
1303
1304int vm_map_pmap_enter_print = FALSE;
1305int vm_map_pmap_enter_enable = FALSE;
1306
1307/*
91447636 1308 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1309 *
1310 * Description:
1311 * Force pages from the specified object to be entered into
1312 * the pmap at the specified address if they are present.
1313 * As soon as a page not found in the object the scan ends.
1314 *
1315 * Returns:
1316 * Nothing.
1317 *
1318 * In/out conditions:
1319 * The source map should not be locked on entry.
1320 */
91447636 1321static void
1c79356b
A
1322vm_map_pmap_enter(
1323 vm_map_t map,
91447636
A
1324 register vm_map_offset_t addr,
1325 register vm_map_offset_t end_addr,
1c79356b
A
1326 register vm_object_t object,
1327 vm_object_offset_t offset,
1328 vm_prot_t protection)
1329{
2d21ac55
A
1330 int type_of_fault;
1331 kern_return_t kr;
0b4e3aa0 1332
55e303ae
A
1333 if(map->pmap == 0)
1334 return;
1335
1c79356b
A
1336 while (addr < end_addr) {
1337 register vm_page_t m;
1338
1339 vm_object_lock(object);
1c79356b
A
1340
1341 m = vm_page_lookup(object, offset);
91447636
A
1342 /*
1343 * ENCRYPTED SWAP:
1344 * The user should never see encrypted data, so do not
1345 * enter an encrypted page in the page table.
1346 */
1347 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
2d21ac55
A
1348 m->fictitious ||
1349 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1350 vm_object_unlock(object);
1351 return;
1352 }
1353
1c79356b
A
1354 if (vm_map_pmap_enter_print) {
1355 printf("vm_map_pmap_enter:");
2d21ac55
A
1356 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1357 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1358 }
2d21ac55 1359 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae
A
1360 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1361 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2d21ac55 1362 &type_of_fault);
1c79356b 1363
1c79356b
A
1364 vm_object_unlock(object);
1365
1366 offset += PAGE_SIZE_64;
1367 addr += PAGE_SIZE;
1368 }
1369}
1370
91447636
A
1371boolean_t vm_map_pmap_is_empty(
1372 vm_map_t map,
1373 vm_map_offset_t start,
1374 vm_map_offset_t end);
1375boolean_t vm_map_pmap_is_empty(
1376 vm_map_t map,
1377 vm_map_offset_t start,
1378 vm_map_offset_t end)
1379{
2d21ac55
A
1380#ifdef MACHINE_PMAP_IS_EMPTY
1381 return pmap_is_empty(map->pmap, start, end);
1382#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1383 vm_map_offset_t offset;
1384 ppnum_t phys_page;
1385
1386 if (map->pmap == NULL) {
1387 return TRUE;
1388 }
2d21ac55 1389
91447636
A
1390 for (offset = start;
1391 offset < end;
1392 offset += PAGE_SIZE) {
1393 phys_page = pmap_find_phys(map->pmap, offset);
1394 if (phys_page) {
1395 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1396 "page %d at 0x%llx\n",
2d21ac55
A
1397 map, (long long)start, (long long)end,
1398 phys_page, (long long)offset);
91447636
A
1399 return FALSE;
1400 }
1401 }
1402 return TRUE;
2d21ac55 1403#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1404}
1405
1c79356b
A
1406/*
1407 * Routine: vm_map_enter
1408 *
1409 * Description:
1410 * Allocate a range in the specified virtual address map.
1411 * The resulting range will refer to memory defined by
1412 * the given memory object and offset into that object.
1413 *
1414 * Arguments are as defined in the vm_map call.
1415 */
91447636
A
1416int _map_enter_debug = 0;
1417static unsigned int vm_map_enter_restore_successes = 0;
1418static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1419kern_return_t
1420vm_map_enter(
91447636 1421 vm_map_t map,
593a1d5f 1422 vm_map_offset_t *address, /* IN/OUT */
91447636 1423 vm_map_size_t size,
593a1d5f 1424 vm_map_offset_t mask,
1c79356b
A
1425 int flags,
1426 vm_object_t object,
1427 vm_object_offset_t offset,
1428 boolean_t needs_copy,
1429 vm_prot_t cur_protection,
1430 vm_prot_t max_protection,
1431 vm_inherit_t inheritance)
1432{
91447636 1433 vm_map_entry_t entry, new_entry;
2d21ac55 1434 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1435 vm_map_offset_t end, tmp_end;
b0d623f7
A
1436 vm_map_offset_t tmp2_start, tmp2_end;
1437 vm_map_offset_t step;
1c79356b 1438 kern_return_t result = KERN_SUCCESS;
91447636
A
1439 vm_map_t zap_old_map = VM_MAP_NULL;
1440 vm_map_t zap_new_map = VM_MAP_NULL;
1441 boolean_t map_locked = FALSE;
1442 boolean_t pmap_empty = TRUE;
1443 boolean_t new_mapping_established = FALSE;
1444 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1445 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1446 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55
A
1447 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1448 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
b0d623f7
A
1449 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1450 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1c79356b 1451 char alias;
2d21ac55 1452 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f
A
1453 kern_return_t kr;
1454
b0d623f7
A
1455 if (superpage_size) {
1456 switch (superpage_size) {
1457 /*
1458 * Note that the current implementation only supports
1459 * a single size for superpages, SUPERPAGE_SIZE, per
1460 * architecture. As soon as more sizes are supposed
1461 * to be supported, SUPERPAGE_SIZE has to be replaced
1462 * with a lookup of the size depending on superpage_size.
1463 */
1464#ifdef __x86_64__
6d2010ae
A
1465 case SUPERPAGE_SIZE_ANY:
1466 /* handle it like 2 MB and round up to page size */
1467 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
1468 case SUPERPAGE_SIZE_2MB:
1469 break;
1470#endif
1471 default:
1472 return KERN_INVALID_ARGUMENT;
1473 }
1474 mask = SUPERPAGE_SIZE-1;
1475 if (size & (SUPERPAGE_SIZE-1))
1476 return KERN_INVALID_ARGUMENT;
1477 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1478 }
1479
6d2010ae 1480
593a1d5f 1481#if CONFIG_EMBEDDED
6d2010ae
A
1482 if (cur_protection & VM_PROT_WRITE){
1483 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
593a1d5f
A
1484 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1485 cur_protection &= ~VM_PROT_EXECUTE;
1486 }
1487 }
593a1d5f 1488#endif /* CONFIG_EMBEDDED */
1c79356b 1489
2d21ac55
A
1490 if (is_submap) {
1491 if (purgable) {
1492 /* submaps can not be purgeable */
1493 return KERN_INVALID_ARGUMENT;
1494 }
1495 if (object == VM_OBJECT_NULL) {
1496 /* submaps can not be created lazily */
1497 return KERN_INVALID_ARGUMENT;
1498 }
1499 }
1500 if (flags & VM_FLAGS_ALREADY) {
1501 /*
1502 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1503 * is already present. For it to be meaningul, the requested
1504 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1505 * we shouldn't try and remove what was mapped there first
1506 * (!VM_FLAGS_OVERWRITE).
1507 */
1508 if ((flags & VM_FLAGS_ANYWHERE) ||
1509 (flags & VM_FLAGS_OVERWRITE)) {
1510 return KERN_INVALID_ARGUMENT;
1511 }
1512 }
1513
6d2010ae 1514 effective_min_offset = map->min_offset;
b0d623f7 1515
2d21ac55
A
1516 if (flags & VM_FLAGS_BEYOND_MAX) {
1517 /*
b0d623f7 1518 * Allow an insertion beyond the map's max offset.
2d21ac55
A
1519 */
1520 if (vm_map_is_64bit(map))
1521 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1522 else
1523 effective_max_offset = 0x00000000FFFFF000ULL;
1524 } else {
1525 effective_max_offset = map->max_offset;
1526 }
1527
1528 if (size == 0 ||
1529 (offset & PAGE_MASK_64) != 0) {
91447636
A
1530 *address = 0;
1531 return KERN_INVALID_ARGUMENT;
1532 }
1533
1c79356b 1534 VM_GET_FLAGS_ALIAS(flags, alias);
2d21ac55 1535
1c79356b
A
1536#define RETURN(value) { result = value; goto BailOut; }
1537
1538 assert(page_aligned(*address));
1539 assert(page_aligned(size));
91447636
A
1540
1541 /*
1542 * Only zero-fill objects are allowed to be purgable.
1543 * LP64todo - limit purgable objects to 32-bits for now
1544 */
1545 if (purgable &&
1546 (offset != 0 ||
1547 (object != VM_OBJECT_NULL &&
6d2010ae 1548 (object->vo_size != size ||
2d21ac55 1549 object->purgable == VM_PURGABLE_DENY))
b0d623f7 1550 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
1551 return KERN_INVALID_ARGUMENT;
1552
1553 if (!anywhere && overwrite) {
1554 /*
1555 * Create a temporary VM map to hold the old mappings in the
1556 * affected area while we create the new one.
1557 * This avoids releasing the VM map lock in
1558 * vm_map_entry_delete() and allows atomicity
1559 * when we want to replace some mappings with a new one.
1560 * It also allows us to restore the old VM mappings if the
1561 * new mapping fails.
1562 */
1563 zap_old_map = vm_map_create(PMAP_NULL,
1564 *address,
1565 *address + size,
b0d623f7 1566 map->hdr.entries_pageable);
91447636
A
1567 }
1568
2d21ac55 1569StartAgain: ;
1c79356b
A
1570
1571 start = *address;
1572
1573 if (anywhere) {
1574 vm_map_lock(map);
91447636 1575 map_locked = TRUE;
6d2010ae
A
1576
1577 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1578 result = KERN_INVALID_ARGUMENT;
1579 goto BailOut;
1580 }
1c79356b
A
1581
1582 /*
1583 * Calculate the first possible address.
1584 */
1585
2d21ac55
A
1586 if (start < effective_min_offset)
1587 start = effective_min_offset;
1588 if (start > effective_max_offset)
1c79356b
A
1589 RETURN(KERN_NO_SPACE);
1590
1591 /*
1592 * Look for the first possible address;
1593 * if there's already something at this
1594 * address, we have to start after it.
1595 */
1596
6d2010ae
A
1597 if( map->disable_vmentry_reuse == TRUE) {
1598 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 1599 } else {
6d2010ae
A
1600 assert(first_free_is_valid(map));
1601
1602 entry = map->first_free;
1603
1604 if (entry == vm_map_to_entry(map)) {
1605 entry = NULL;
1606 } else {
1607 if (entry->vme_next == vm_map_to_entry(map)){
1608 /*
1609 * Hole at the end of the map.
1610 */
1611 entry = NULL;
1612 } else {
1613 if (start < (entry->vme_next)->vme_start ) {
1614 start = entry->vme_end;
1615 } else {
1616 /*
1617 * Need to do a lookup.
1618 */
1619 entry = NULL;
1620 }
1621 }
1622 }
1623
1624 if (entry == NULL) {
1625 vm_map_entry_t tmp_entry;
1626 if (vm_map_lookup_entry(map, start, &tmp_entry))
1627 start = tmp_entry->vme_end;
1628 entry = tmp_entry;
1629 }
1c79356b
A
1630 }
1631
1632 /*
1633 * In any case, the "entry" always precedes
1634 * the proposed new region throughout the
1635 * loop:
1636 */
1637
1638 while (TRUE) {
1639 register vm_map_entry_t next;
1640
2d21ac55 1641 /*
1c79356b
A
1642 * Find the end of the proposed new region.
1643 * Be sure we didn't go beyond the end, or
1644 * wrap around the address.
1645 */
1646
1647 end = ((start + mask) & ~mask);
1648 if (end < start)
1649 RETURN(KERN_NO_SPACE);
1650 start = end;
1651 end += size;
1652
2d21ac55 1653 if ((end > effective_max_offset) || (end < start)) {
1c79356b 1654 if (map->wait_for_space) {
2d21ac55
A
1655 if (size <= (effective_max_offset -
1656 effective_min_offset)) {
1c79356b
A
1657 assert_wait((event_t)map,
1658 THREAD_ABORTSAFE);
1659 vm_map_unlock(map);
91447636
A
1660 map_locked = FALSE;
1661 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1662 goto StartAgain;
1663 }
1664 }
1665 RETURN(KERN_NO_SPACE);
1666 }
1667
1668 /*
1669 * If there are no more entries, we must win.
1670 */
1671
1672 next = entry->vme_next;
1673 if (next == vm_map_to_entry(map))
1674 break;
1675
1676 /*
1677 * If there is another entry, it must be
1678 * after the end of the potential new region.
1679 */
1680
1681 if (next->vme_start >= end)
1682 break;
1683
1684 /*
1685 * Didn't fit -- move to the next entry.
1686 */
1687
1688 entry = next;
1689 start = entry->vme_end;
1690 }
1691 *address = start;
1692 } else {
1c79356b
A
1693 /*
1694 * Verify that:
1695 * the address doesn't itself violate
1696 * the mask requirement.
1697 */
1698
1699 vm_map_lock(map);
91447636 1700 map_locked = TRUE;
1c79356b
A
1701 if ((start & mask) != 0)
1702 RETURN(KERN_NO_SPACE);
1703
1704 /*
1705 * ... the address is within bounds
1706 */
1707
1708 end = start + size;
1709
2d21ac55
A
1710 if ((start < effective_min_offset) ||
1711 (end > effective_max_offset) ||
1c79356b
A
1712 (start >= end)) {
1713 RETURN(KERN_INVALID_ADDRESS);
1714 }
1715
91447636
A
1716 if (overwrite && zap_old_map != VM_MAP_NULL) {
1717 /*
1718 * Fixed mapping and "overwrite" flag: attempt to
1719 * remove all existing mappings in the specified
1720 * address range, saving them in our "zap_old_map".
1721 */
1722 (void) vm_map_delete(map, start, end,
1723 VM_MAP_REMOVE_SAVE_ENTRIES,
1724 zap_old_map);
1725 }
1726
1c79356b
A
1727 /*
1728 * ... the starting address isn't allocated
1729 */
1730
2d21ac55
A
1731 if (vm_map_lookup_entry(map, start, &entry)) {
1732 if (! (flags & VM_FLAGS_ALREADY)) {
1733 RETURN(KERN_NO_SPACE);
1734 }
1735 /*
1736 * Check if what's already there is what we want.
1737 */
1738 tmp_start = start;
1739 tmp_offset = offset;
1740 if (entry->vme_start < start) {
1741 tmp_start -= start - entry->vme_start;
1742 tmp_offset -= start - entry->vme_start;
1743
1744 }
1745 for (; entry->vme_start < end;
1746 entry = entry->vme_next) {
4a3eedf9
A
1747 /*
1748 * Check if the mapping's attributes
1749 * match the existing map entry.
1750 */
2d21ac55
A
1751 if (entry == vm_map_to_entry(map) ||
1752 entry->vme_start != tmp_start ||
1753 entry->is_sub_map != is_submap ||
2d21ac55
A
1754 entry->offset != tmp_offset ||
1755 entry->needs_copy != needs_copy ||
1756 entry->protection != cur_protection ||
1757 entry->max_protection != max_protection ||
1758 entry->inheritance != inheritance ||
1759 entry->alias != alias) {
1760 /* not the same mapping ! */
1761 RETURN(KERN_NO_SPACE);
1762 }
4a3eedf9
A
1763 /*
1764 * Check if the same object is being mapped.
1765 */
1766 if (is_submap) {
1767 if (entry->object.sub_map !=
1768 (vm_map_t) object) {
1769 /* not the same submap */
1770 RETURN(KERN_NO_SPACE);
1771 }
1772 } else {
1773 if (entry->object.vm_object != object) {
1774 /* not the same VM object... */
1775 vm_object_t obj2;
1776
1777 obj2 = entry->object.vm_object;
1778 if ((obj2 == VM_OBJECT_NULL ||
1779 obj2->internal) &&
1780 (object == VM_OBJECT_NULL ||
1781 object->internal)) {
1782 /*
1783 * ... but both are
1784 * anonymous memory,
1785 * so equivalent.
1786 */
1787 } else {
1788 RETURN(KERN_NO_SPACE);
1789 }
1790 }
1791 }
1792
2d21ac55
A
1793 tmp_offset += entry->vme_end - entry->vme_start;
1794 tmp_start += entry->vme_end - entry->vme_start;
1795 if (entry->vme_end >= end) {
1796 /* reached the end of our mapping */
1797 break;
1798 }
1799 }
1800 /* it all matches: let's use what's already there ! */
1801 RETURN(KERN_MEMORY_PRESENT);
1802 }
1c79356b
A
1803
1804 /*
1805 * ... the next region doesn't overlap the
1806 * end point.
1807 */
1808
1809 if ((entry->vme_next != vm_map_to_entry(map)) &&
1810 (entry->vme_next->vme_start < end))
1811 RETURN(KERN_NO_SPACE);
1812 }
1813
1814 /*
1815 * At this point,
1816 * "start" and "end" should define the endpoints of the
1817 * available new range, and
1818 * "entry" should refer to the region before the new
1819 * range, and
1820 *
1821 * the map should be locked.
1822 */
1823
1824 /*
1825 * See whether we can avoid creating a new entry (and object) by
1826 * extending one of our neighbors. [So far, we only attempt to
91447636
A
1827 * extend from below.] Note that we can never extend/join
1828 * purgable objects because they need to remain distinct
1829 * entities in order to implement their "volatile object"
1830 * semantics.
1c79356b
A
1831 */
1832
91447636
A
1833 if (purgable) {
1834 if (object == VM_OBJECT_NULL) {
1835 object = vm_object_allocate(size);
1836 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2d21ac55 1837 object->purgable = VM_PURGABLE_NONVOLATILE;
91447636
A
1838 offset = (vm_object_offset_t)0;
1839 }
2d21ac55
A
1840 } else if ((is_submap == FALSE) &&
1841 (object == VM_OBJECT_NULL) &&
1842 (entry != vm_map_to_entry(map)) &&
1843 (entry->vme_end == start) &&
1844 (!entry->is_shared) &&
1845 (!entry->is_sub_map) &&
6d2010ae 1846 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2d21ac55
A
1847 (entry->inheritance == inheritance) &&
1848 (entry->protection == cur_protection) &&
1849 (entry->max_protection == max_protection) &&
1850 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1851 (entry->in_transition == 0) &&
1852 (entry->no_cache == no_cache) &&
b0d623f7
A
1853 ((entry->vme_end - entry->vme_start) + size <=
1854 (alias == VM_MEMORY_REALLOC ?
1855 ANON_CHUNK_SIZE :
1856 NO_COALESCE_LIMIT)) &&
2d21ac55 1857 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1c79356b 1858 if (vm_object_coalesce(entry->object.vm_object,
2d21ac55
A
1859 VM_OBJECT_NULL,
1860 entry->offset,
1861 (vm_object_offset_t) 0,
1862 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1863 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
1864
1865 /*
1866 * Coalesced the two objects - can extend
1867 * the previous map entry to include the
1868 * new range.
1869 */
1870 map->size += (end - entry->vme_end);
1871 entry->vme_end = end;
6d2010ae 1872 vm_map_store_update_first_free(map, map->first_free);
1c79356b
A
1873 RETURN(KERN_SUCCESS);
1874 }
1875 }
1876
b0d623f7
A
1877 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1878 new_entry = NULL;
1879
1880 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1881 tmp2_end = tmp2_start + step;
1882 /*
1883 * Create a new entry
1884 * LP64todo - for now, we can only allocate 4GB internal objects
1885 * because the default pager can't page bigger ones. Remove this
1886 * when it can.
1887 *
1888 * XXX FBDP
1889 * The reserved "page zero" in each process's address space can
1890 * be arbitrarily large. Splitting it into separate 4GB objects and
1891 * therefore different VM map entries serves no purpose and just
1892 * slows down operations on the VM map, so let's not split the
1893 * allocation into 4GB chunks if the max protection is NONE. That
1894 * memory should never be accessible, so it will never get to the
1895 * default pager.
1896 */
1897 tmp_start = tmp2_start;
1898 if (object == VM_OBJECT_NULL &&
1899 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1900 max_protection != VM_PROT_NONE &&
1901 superpage_size == 0)
1902 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1903 else
1904 tmp_end = tmp2_end;
1905 do {
1906 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1907 object, offset, needs_copy,
1908 FALSE, FALSE,
1909 cur_protection, max_protection,
1910 VM_BEHAVIOR_DEFAULT,
6d2010ae
A
1911 (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1912 0, no_cache,
b0d623f7
A
1913 permanent, superpage_size);
1914 new_entry->alias = alias;
6d2010ae
A
1915 if (flags & VM_FLAGS_MAP_JIT){
1916 if (!(map->jit_entry_exists)){
1917 new_entry->used_for_jit = TRUE;
1918 map->jit_entry_exists = TRUE;
1919 }
1920 }
1921
b0d623f7
A
1922 if (is_submap) {
1923 vm_map_t submap;
1924 boolean_t submap_is_64bit;
1925 boolean_t use_pmap;
1926
1927 new_entry->is_sub_map = TRUE;
1928 submap = (vm_map_t) object;
1929 submap_is_64bit = vm_map_is_64bit(submap);
1930 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1931 #ifndef NO_NESTED_PMAP
1932 if (use_pmap && submap->pmap == NULL) {
1933 /* we need a sub pmap to nest... */
1934 submap->pmap = pmap_create(0, submap_is_64bit);
1935 if (submap->pmap == NULL) {
1936 /* let's proceed without nesting... */
1937 }
2d21ac55 1938 }
b0d623f7
A
1939 if (use_pmap && submap->pmap != NULL) {
1940 kr = pmap_nest(map->pmap,
1941 submap->pmap,
1942 tmp_start,
1943 tmp_start,
1944 tmp_end - tmp_start);
1945 if (kr != KERN_SUCCESS) {
1946 printf("vm_map_enter: "
1947 "pmap_nest(0x%llx,0x%llx) "
1948 "error 0x%x\n",
1949 (long long)tmp_start,
1950 (long long)tmp_end,
1951 kr);
1952 } else {
1953 /* we're now nested ! */
1954 new_entry->use_pmap = TRUE;
1955 pmap_empty = FALSE;
1956 }
1957 }
1958 #endif /* NO_NESTED_PMAP */
2d21ac55 1959 }
b0d623f7
A
1960 entry = new_entry;
1961
1962 if (superpage_size) {
1963 vm_page_t pages, m;
1964 vm_object_t sp_object;
1965
1966 entry->offset = 0;
1967
1968 /* allocate one superpage */
1969 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 1970 if (kr != KERN_SUCCESS) {
b0d623f7
A
1971 new_mapping_established = TRUE; /* will cause deallocation of whole range */
1972 RETURN(kr);
1973 }
1974
1975 /* create one vm_object per superpage */
1976 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
1977 sp_object->phys_contiguous = TRUE;
6d2010ae 1978 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
b0d623f7
A
1979 entry->object.vm_object = sp_object;
1980
1981 /* enter the base pages into the object */
1982 vm_object_lock(sp_object);
1983 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
1984 m = pages;
1985 pmap_zero_page(m->phys_page);
1986 pages = NEXT_PAGE(m);
1987 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1988 vm_page_insert(m, sp_object, offset);
2d21ac55 1989 }
b0d623f7 1990 vm_object_unlock(sp_object);
2d21ac55 1991 }
b0d623f7
A
1992 } while (tmp_end != tmp2_end &&
1993 (tmp_start = tmp_end) &&
1994 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
1995 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
1996 }
91447636 1997
1c79356b 1998 vm_map_unlock(map);
91447636
A
1999 map_locked = FALSE;
2000
2001 new_mapping_established = TRUE;
1c79356b
A
2002
2003 /* Wire down the new entry if the user
2004 * requested all new map entries be wired.
2005 */
b0d623f7 2006 if ((map->wiring_required)||(superpage_size)) {
91447636 2007 pmap_empty = FALSE; /* pmap won't be empty */
1c79356b 2008 result = vm_map_wire(map, start, end,
91447636
A
2009 new_entry->protection, TRUE);
2010 RETURN(result);
1c79356b
A
2011 }
2012
2013 if ((object != VM_OBJECT_NULL) &&
2014 (vm_map_pmap_enter_enable) &&
2015 (!anywhere) &&
2016 (!needs_copy) &&
2017 (size < (128*1024))) {
91447636 2018 pmap_empty = FALSE; /* pmap won't be empty */
0c530ab8 2019
2d21ac55 2020 if (override_nx(map, alias) && cur_protection)
0c530ab8 2021 cur_protection |= VM_PROT_EXECUTE;
2d21ac55 2022
1c79356b
A
2023 vm_map_pmap_enter(map, start, end,
2024 object, offset, cur_protection);
2025 }
2026
2d21ac55 2027BailOut: ;
593a1d5f
A
2028 if (result == KERN_SUCCESS) {
2029 vm_prot_t pager_prot;
2030 memory_object_t pager;
91447636 2031
593a1d5f
A
2032 if (pmap_empty &&
2033 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2034 assert(vm_map_pmap_is_empty(map,
2035 *address,
2036 *address+size));
2037 }
2038
2039 /*
2040 * For "named" VM objects, let the pager know that the
2041 * memory object is being mapped. Some pagers need to keep
2042 * track of this, to know when they can reclaim the memory
2043 * object, for example.
2044 * VM calls memory_object_map() for each mapping (specifying
2045 * the protection of each mapping) and calls
2046 * memory_object_last_unmap() when all the mappings are gone.
2047 */
2048 pager_prot = max_protection;
2049 if (needs_copy) {
2050 /*
2051 * Copy-On-Write mapping: won't modify
2052 * the memory object.
2053 */
2054 pager_prot &= ~VM_PROT_WRITE;
2055 }
2056 if (!is_submap &&
2057 object != VM_OBJECT_NULL &&
2058 object->named &&
2059 object->pager != MEMORY_OBJECT_NULL) {
2060 vm_object_lock(object);
2061 pager = object->pager;
2062 if (object->named &&
2063 pager != MEMORY_OBJECT_NULL) {
2064 assert(object->pager_ready);
2065 vm_object_mapping_wait(object, THREAD_UNINT);
2066 vm_object_mapping_begin(object);
2067 vm_object_unlock(object);
2068
2069 kr = memory_object_map(pager, pager_prot);
2070 assert(kr == KERN_SUCCESS);
2071
2072 vm_object_lock(object);
2073 vm_object_mapping_end(object);
2074 }
2075 vm_object_unlock(object);
2076 }
2077 } else {
91447636
A
2078 if (new_mapping_established) {
2079 /*
2080 * We have to get rid of the new mappings since we
2081 * won't make them available to the user.
2082 * Try and do that atomically, to minimize the risk
2083 * that someone else create new mappings that range.
2084 */
2085 zap_new_map = vm_map_create(PMAP_NULL,
2086 *address,
2087 *address + size,
b0d623f7 2088 map->hdr.entries_pageable);
91447636
A
2089 if (!map_locked) {
2090 vm_map_lock(map);
2091 map_locked = TRUE;
2092 }
2093 (void) vm_map_delete(map, *address, *address+size,
2094 VM_MAP_REMOVE_SAVE_ENTRIES,
2095 zap_new_map);
2096 }
2097 if (zap_old_map != VM_MAP_NULL &&
2098 zap_old_map->hdr.nentries != 0) {
2099 vm_map_entry_t entry1, entry2;
2100
2101 /*
2102 * The new mapping failed. Attempt to restore
2103 * the old mappings, saved in the "zap_old_map".
2104 */
2105 if (!map_locked) {
2106 vm_map_lock(map);
2107 map_locked = TRUE;
2108 }
2109
2110 /* first check if the coast is still clear */
2111 start = vm_map_first_entry(zap_old_map)->vme_start;
2112 end = vm_map_last_entry(zap_old_map)->vme_end;
2113 if (vm_map_lookup_entry(map, start, &entry1) ||
2114 vm_map_lookup_entry(map, end, &entry2) ||
2115 entry1 != entry2) {
2116 /*
2117 * Part of that range has already been
2118 * re-mapped: we can't restore the old
2119 * mappings...
2120 */
2121 vm_map_enter_restore_failures++;
2122 } else {
2123 /*
2124 * Transfer the saved map entries from
2125 * "zap_old_map" to the original "map",
2126 * inserting them all after "entry1".
2127 */
2128 for (entry2 = vm_map_first_entry(zap_old_map);
2129 entry2 != vm_map_to_entry(zap_old_map);
2130 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2131 vm_map_size_t entry_size;
2132
2133 entry_size = (entry2->vme_end -
2134 entry2->vme_start);
6d2010ae 2135 vm_map_store_entry_unlink(zap_old_map,
91447636 2136 entry2);
2d21ac55 2137 zap_old_map->size -= entry_size;
6d2010ae 2138 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2139 map->size += entry_size;
91447636
A
2140 entry1 = entry2;
2141 }
2142 if (map->wiring_required) {
2143 /*
2144 * XXX TODO: we should rewire the
2145 * old pages here...
2146 */
2147 }
2148 vm_map_enter_restore_successes++;
2149 }
2150 }
2151 }
2152
2153 if (map_locked) {
2154 vm_map_unlock(map);
2155 }
2156
2157 /*
2158 * Get rid of the "zap_maps" and all the map entries that
2159 * they may still contain.
2160 */
2161 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 2162 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2163 zap_old_map = VM_MAP_NULL;
2164 }
2165 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 2166 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2167 zap_new_map = VM_MAP_NULL;
2168 }
2169
2170 return result;
1c79356b
A
2171
2172#undef RETURN
2173}
2174
91447636 2175kern_return_t
2d21ac55
A
2176vm_map_enter_mem_object(
2177 vm_map_t target_map,
2178 vm_map_offset_t *address,
2179 vm_map_size_t initial_size,
2180 vm_map_offset_t mask,
2181 int flags,
2182 ipc_port_t port,
2183 vm_object_offset_t offset,
2184 boolean_t copy,
2185 vm_prot_t cur_protection,
2186 vm_prot_t max_protection,
2187 vm_inherit_t inheritance)
91447636 2188{
2d21ac55
A
2189 vm_map_address_t map_addr;
2190 vm_map_size_t map_size;
2191 vm_object_t object;
2192 vm_object_size_t size;
2193 kern_return_t result;
6d2010ae
A
2194 boolean_t mask_cur_protection, mask_max_protection;
2195
2196 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2197 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2198 cur_protection &= ~VM_PROT_IS_MASK;
2199 max_protection &= ~VM_PROT_IS_MASK;
91447636
A
2200
2201 /*
2d21ac55 2202 * Check arguments for validity
91447636 2203 */
2d21ac55
A
2204 if ((target_map == VM_MAP_NULL) ||
2205 (cur_protection & ~VM_PROT_ALL) ||
2206 (max_protection & ~VM_PROT_ALL) ||
2207 (inheritance > VM_INHERIT_LAST_VALID) ||
2208 initial_size == 0)
2209 return KERN_INVALID_ARGUMENT;
6d2010ae 2210
2d21ac55
A
2211 map_addr = vm_map_trunc_page(*address);
2212 map_size = vm_map_round_page(initial_size);
2213 size = vm_object_round_page(initial_size);
593a1d5f 2214
2d21ac55
A
2215 /*
2216 * Find the vm object (if any) corresponding to this port.
2217 */
2218 if (!IP_VALID(port)) {
2219 object = VM_OBJECT_NULL;
2220 offset = 0;
2221 copy = FALSE;
2222 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2223 vm_named_entry_t named_entry;
2224
2225 named_entry = (vm_named_entry_t) port->ip_kobject;
2226 /* a few checks to make sure user is obeying rules */
2227 if (size == 0) {
2228 if (offset >= named_entry->size)
2229 return KERN_INVALID_RIGHT;
2230 size = named_entry->size - offset;
2231 }
6d2010ae
A
2232 if (mask_max_protection) {
2233 max_protection &= named_entry->protection;
2234 }
2235 if (mask_cur_protection) {
2236 cur_protection &= named_entry->protection;
2237 }
2d21ac55
A
2238 if ((named_entry->protection & max_protection) !=
2239 max_protection)
2240 return KERN_INVALID_RIGHT;
2241 if ((named_entry->protection & cur_protection) !=
2242 cur_protection)
2243 return KERN_INVALID_RIGHT;
2244 if (named_entry->size < (offset + size))
2245 return KERN_INVALID_ARGUMENT;
2246
2247 /* the callers parameter offset is defined to be the */
2248 /* offset from beginning of named entry offset in object */
2249 offset = offset + named_entry->offset;
2250
2251 named_entry_lock(named_entry);
2252 if (named_entry->is_sub_map) {
2253 vm_map_t submap;
2254
2255 submap = named_entry->backing.map;
2256 vm_map_lock(submap);
2257 vm_map_reference(submap);
2258 vm_map_unlock(submap);
2259 named_entry_unlock(named_entry);
2260
2261 result = vm_map_enter(target_map,
2262 &map_addr,
2263 map_size,
2264 mask,
2265 flags | VM_FLAGS_SUBMAP,
2266 (vm_object_t) submap,
2267 offset,
2268 copy,
2269 cur_protection,
2270 max_protection,
2271 inheritance);
2272 if (result != KERN_SUCCESS) {
2273 vm_map_deallocate(submap);
2274 } else {
2275 /*
2276 * No need to lock "submap" just to check its
2277 * "mapped" flag: that flag is never reset
2278 * once it's been set and if we race, we'll
2279 * just end up setting it twice, which is OK.
2280 */
2281 if (submap->mapped == FALSE) {
2282 /*
2283 * This submap has never been mapped.
2284 * Set its "mapped" flag now that it
2285 * has been mapped.
2286 * This happens only for the first ever
2287 * mapping of a "submap".
2288 */
2289 vm_map_lock(submap);
2290 submap->mapped = TRUE;
2291 vm_map_unlock(submap);
2292 }
2293 *address = map_addr;
2294 }
2295 return result;
2296
2297 } else if (named_entry->is_pager) {
2298 unsigned int access;
2299 vm_prot_t protections;
2300 unsigned int wimg_mode;
2d21ac55
A
2301
2302 protections = named_entry->protection & VM_PROT_ALL;
2303 access = GET_MAP_MEM(named_entry->protection);
2304
2305 object = vm_object_enter(named_entry->backing.pager,
2306 named_entry->size,
2307 named_entry->internal,
2308 FALSE,
2309 FALSE);
2310 if (object == VM_OBJECT_NULL) {
2311 named_entry_unlock(named_entry);
2312 return KERN_INVALID_OBJECT;
2313 }
2314
2315 /* JMM - drop reference on pager here */
2316
2317 /* create an extra ref for the named entry */
2318 vm_object_lock(object);
2319 vm_object_reference_locked(object);
2320 named_entry->backing.object = object;
2321 named_entry->is_pager = FALSE;
2322 named_entry_unlock(named_entry);
2323
2324 wimg_mode = object->wimg_bits;
6d2010ae 2325
2d21ac55
A
2326 if (access == MAP_MEM_IO) {
2327 wimg_mode = VM_WIMG_IO;
2328 } else if (access == MAP_MEM_COPYBACK) {
2329 wimg_mode = VM_WIMG_USE_DEFAULT;
2330 } else if (access == MAP_MEM_WTHRU) {
2331 wimg_mode = VM_WIMG_WTHRU;
2332 } else if (access == MAP_MEM_WCOMB) {
2333 wimg_mode = VM_WIMG_WCOMB;
2334 }
2d21ac55
A
2335
2336 /* wait for object (if any) to be ready */
2337 if (!named_entry->internal) {
2338 while (!object->pager_ready) {
2339 vm_object_wait(
2340 object,
2341 VM_OBJECT_EVENT_PAGER_READY,
2342 THREAD_UNINT);
2343 vm_object_lock(object);
2344 }
2345 }
2346
6d2010ae
A
2347 if (object->wimg_bits != wimg_mode)
2348 vm_object_change_wimg_mode(object, wimg_mode);
2d21ac55 2349
2d21ac55 2350 object->true_share = TRUE;
6d2010ae 2351
2d21ac55
A
2352 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2353 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2354 vm_object_unlock(object);
2355 } else {
2356 /* This is the case where we are going to map */
2357 /* an already mapped object. If the object is */
2358 /* not ready it is internal. An external */
2359 /* object cannot be mapped until it is ready */
2360 /* we can therefore avoid the ready check */
2361 /* in this case. */
2362 object = named_entry->backing.object;
2363 assert(object != VM_OBJECT_NULL);
2364 named_entry_unlock(named_entry);
2365 vm_object_reference(object);
2366 }
2367 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2368 /*
2369 * JMM - This is temporary until we unify named entries
2370 * and raw memory objects.
2371 *
2372 * Detected fake ip_kotype for a memory object. In
2373 * this case, the port isn't really a port at all, but
2374 * instead is just a raw memory object.
2375 */
2376
2377 object = vm_object_enter((memory_object_t)port,
2378 size, FALSE, FALSE, FALSE);
2379 if (object == VM_OBJECT_NULL)
2380 return KERN_INVALID_OBJECT;
2381
2382 /* wait for object (if any) to be ready */
2383 if (object != VM_OBJECT_NULL) {
2384 if (object == kernel_object) {
2385 printf("Warning: Attempt to map kernel object"
2386 " by a non-private kernel entity\n");
2387 return KERN_INVALID_OBJECT;
2388 }
b0d623f7 2389 if (!object->pager_ready) {
2d21ac55 2390 vm_object_lock(object);
b0d623f7
A
2391
2392 while (!object->pager_ready) {
2393 vm_object_wait(object,
2394 VM_OBJECT_EVENT_PAGER_READY,
2395 THREAD_UNINT);
2396 vm_object_lock(object);
2397 }
2398 vm_object_unlock(object);
2d21ac55 2399 }
2d21ac55
A
2400 }
2401 } else {
2402 return KERN_INVALID_OBJECT;
2403 }
2404
593a1d5f
A
2405 if (object != VM_OBJECT_NULL &&
2406 object->named &&
2407 object->pager != MEMORY_OBJECT_NULL &&
2408 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2409 memory_object_t pager;
2410 vm_prot_t pager_prot;
2411 kern_return_t kr;
2412
2413 /*
2414 * For "named" VM objects, let the pager know that the
2415 * memory object is being mapped. Some pagers need to keep
2416 * track of this, to know when they can reclaim the memory
2417 * object, for example.
2418 * VM calls memory_object_map() for each mapping (specifying
2419 * the protection of each mapping) and calls
2420 * memory_object_last_unmap() when all the mappings are gone.
2421 */
2422 pager_prot = max_protection;
2423 if (copy) {
2424 /*
2425 * Copy-On-Write mapping: won't modify the
2426 * memory object.
2427 */
2428 pager_prot &= ~VM_PROT_WRITE;
2429 }
2430 vm_object_lock(object);
2431 pager = object->pager;
2432 if (object->named &&
2433 pager != MEMORY_OBJECT_NULL &&
2434 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2435 assert(object->pager_ready);
2436 vm_object_mapping_wait(object, THREAD_UNINT);
2437 vm_object_mapping_begin(object);
2438 vm_object_unlock(object);
2439
2440 kr = memory_object_map(pager, pager_prot);
2441 assert(kr == KERN_SUCCESS);
2442
2443 vm_object_lock(object);
2444 vm_object_mapping_end(object);
2445 }
2446 vm_object_unlock(object);
2447 }
2448
2d21ac55
A
2449 /*
2450 * Perform the copy if requested
2451 */
2452
2453 if (copy) {
2454 vm_object_t new_object;
2455 vm_object_offset_t new_offset;
2456
2457 result = vm_object_copy_strategically(object, offset, size,
2458 &new_object, &new_offset,
2459 &copy);
2460
2461
2462 if (result == KERN_MEMORY_RESTART_COPY) {
2463 boolean_t success;
2464 boolean_t src_needs_copy;
2465
2466 /*
2467 * XXX
2468 * We currently ignore src_needs_copy.
2469 * This really is the issue of how to make
2470 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2471 * non-kernel users to use. Solution forthcoming.
2472 * In the meantime, since we don't allow non-kernel
2473 * memory managers to specify symmetric copy,
2474 * we won't run into problems here.
2475 */
2476 new_object = object;
2477 new_offset = offset;
2478 success = vm_object_copy_quickly(&new_object,
2479 new_offset, size,
2480 &src_needs_copy,
2481 &copy);
2482 assert(success);
2483 result = KERN_SUCCESS;
2484 }
2485 /*
2486 * Throw away the reference to the
2487 * original object, as it won't be mapped.
2488 */
2489
2490 vm_object_deallocate(object);
2491
2492 if (result != KERN_SUCCESS)
2493 return result;
2494
2495 object = new_object;
2496 offset = new_offset;
2497 }
2498
2499 result = vm_map_enter(target_map,
2500 &map_addr, map_size,
2501 (vm_map_offset_t)mask,
2502 flags,
2503 object, offset,
2504 copy,
2505 cur_protection, max_protection, inheritance);
2506 if (result != KERN_SUCCESS)
2507 vm_object_deallocate(object);
2508 *address = map_addr;
2509 return result;
2510}
2511
b0d623f7
A
2512
2513
2514
2515kern_return_t
2516vm_map_enter_mem_object_control(
2517 vm_map_t target_map,
2518 vm_map_offset_t *address,
2519 vm_map_size_t initial_size,
2520 vm_map_offset_t mask,
2521 int flags,
2522 memory_object_control_t control,
2523 vm_object_offset_t offset,
2524 boolean_t copy,
2525 vm_prot_t cur_protection,
2526 vm_prot_t max_protection,
2527 vm_inherit_t inheritance)
2528{
2529 vm_map_address_t map_addr;
2530 vm_map_size_t map_size;
2531 vm_object_t object;
2532 vm_object_size_t size;
2533 kern_return_t result;
2534 memory_object_t pager;
2535 vm_prot_t pager_prot;
2536 kern_return_t kr;
2537
2538 /*
2539 * Check arguments for validity
2540 */
2541 if ((target_map == VM_MAP_NULL) ||
2542 (cur_protection & ~VM_PROT_ALL) ||
2543 (max_protection & ~VM_PROT_ALL) ||
2544 (inheritance > VM_INHERIT_LAST_VALID) ||
2545 initial_size == 0)
2546 return KERN_INVALID_ARGUMENT;
2547
2548 map_addr = vm_map_trunc_page(*address);
2549 map_size = vm_map_round_page(initial_size);
2550 size = vm_object_round_page(initial_size);
2551
2552 object = memory_object_control_to_vm_object(control);
2553
2554 if (object == VM_OBJECT_NULL)
2555 return KERN_INVALID_OBJECT;
2556
2557 if (object == kernel_object) {
2558 printf("Warning: Attempt to map kernel object"
2559 " by a non-private kernel entity\n");
2560 return KERN_INVALID_OBJECT;
2561 }
2562
2563 vm_object_lock(object);
2564 object->ref_count++;
2565 vm_object_res_reference(object);
2566
2567 /*
2568 * For "named" VM objects, let the pager know that the
2569 * memory object is being mapped. Some pagers need to keep
2570 * track of this, to know when they can reclaim the memory
2571 * object, for example.
2572 * VM calls memory_object_map() for each mapping (specifying
2573 * the protection of each mapping) and calls
2574 * memory_object_last_unmap() when all the mappings are gone.
2575 */
2576 pager_prot = max_protection;
2577 if (copy) {
2578 pager_prot &= ~VM_PROT_WRITE;
2579 }
2580 pager = object->pager;
2581 if (object->named &&
2582 pager != MEMORY_OBJECT_NULL &&
2583 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2584 assert(object->pager_ready);
2585 vm_object_mapping_wait(object, THREAD_UNINT);
2586 vm_object_mapping_begin(object);
2587 vm_object_unlock(object);
2588
2589 kr = memory_object_map(pager, pager_prot);
2590 assert(kr == KERN_SUCCESS);
2591
2592 vm_object_lock(object);
2593 vm_object_mapping_end(object);
2594 }
2595 vm_object_unlock(object);
2596
2597 /*
2598 * Perform the copy if requested
2599 */
2600
2601 if (copy) {
2602 vm_object_t new_object;
2603 vm_object_offset_t new_offset;
2604
2605 result = vm_object_copy_strategically(object, offset, size,
2606 &new_object, &new_offset,
2607 &copy);
2608
2609
2610 if (result == KERN_MEMORY_RESTART_COPY) {
2611 boolean_t success;
2612 boolean_t src_needs_copy;
2613
2614 /*
2615 * XXX
2616 * We currently ignore src_needs_copy.
2617 * This really is the issue of how to make
2618 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2619 * non-kernel users to use. Solution forthcoming.
2620 * In the meantime, since we don't allow non-kernel
2621 * memory managers to specify symmetric copy,
2622 * we won't run into problems here.
2623 */
2624 new_object = object;
2625 new_offset = offset;
2626 success = vm_object_copy_quickly(&new_object,
2627 new_offset, size,
2628 &src_needs_copy,
2629 &copy);
2630 assert(success);
2631 result = KERN_SUCCESS;
2632 }
2633 /*
2634 * Throw away the reference to the
2635 * original object, as it won't be mapped.
2636 */
2637
2638 vm_object_deallocate(object);
2639
2640 if (result != KERN_SUCCESS)
2641 return result;
2642
2643 object = new_object;
2644 offset = new_offset;
2645 }
2646
2647 result = vm_map_enter(target_map,
2648 &map_addr, map_size,
2649 (vm_map_offset_t)mask,
2650 flags,
2651 object, offset,
2652 copy,
2653 cur_protection, max_protection, inheritance);
2654 if (result != KERN_SUCCESS)
2655 vm_object_deallocate(object);
2656 *address = map_addr;
2657
2658 return result;
2659}
2660
2661
2d21ac55
A
2662#if VM_CPM
2663
2664#ifdef MACH_ASSERT
2665extern pmap_paddr_t avail_start, avail_end;
2666#endif
2667
2668/*
2669 * Allocate memory in the specified map, with the caveat that
2670 * the memory is physically contiguous. This call may fail
2671 * if the system can't find sufficient contiguous memory.
2672 * This call may cause or lead to heart-stopping amounts of
2673 * paging activity.
2674 *
2675 * Memory obtained from this call should be freed in the
2676 * normal way, viz., via vm_deallocate.
2677 */
2678kern_return_t
2679vm_map_enter_cpm(
2680 vm_map_t map,
2681 vm_map_offset_t *addr,
2682 vm_map_size_t size,
2683 int flags)
2684{
2685 vm_object_t cpm_obj;
2686 pmap_t pmap;
2687 vm_page_t m, pages;
2688 kern_return_t kr;
2689 vm_map_offset_t va, start, end, offset;
2690#if MACH_ASSERT
2691 vm_map_offset_t prev_addr;
2692#endif /* MACH_ASSERT */
2693
2694 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2695
2696 if (!vm_allocate_cpm_enabled)
2697 return KERN_FAILURE;
2698
2699 if (size == 0) {
2700 *addr = 0;
2701 return KERN_SUCCESS;
2702 }
2703 if (anywhere)
2704 *addr = vm_map_min(map);
2705 else
2706 *addr = vm_map_trunc_page(*addr);
2707 size = vm_map_round_page(size);
2708
2709 /*
2710 * LP64todo - cpm_allocate should probably allow
2711 * allocations of >4GB, but not with the current
2712 * algorithm, so just cast down the size for now.
2713 */
2714 if (size > VM_MAX_ADDRESS)
2715 return KERN_RESOURCE_SHORTAGE;
2716 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 2717 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
2718 return kr;
2719
2720 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2721 assert(cpm_obj != VM_OBJECT_NULL);
2722 assert(cpm_obj->internal);
2723 assert(cpm_obj->size == (vm_object_size_t)size);
2724 assert(cpm_obj->can_persist == FALSE);
2725 assert(cpm_obj->pager_created == FALSE);
2726 assert(cpm_obj->pageout == FALSE);
2727 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
2728
2729 /*
2730 * Insert pages into object.
2731 */
2732
2733 vm_object_lock(cpm_obj);
2734 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2735 m = pages;
2736 pages = NEXT_PAGE(m);
0c530ab8 2737 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
2738
2739 assert(!m->gobbled);
2740 assert(!m->wanted);
2741 assert(!m->pageout);
2742 assert(!m->tabled);
b0d623f7 2743 assert(VM_PAGE_WIRED(m));
91447636
A
2744 /*
2745 * ENCRYPTED SWAP:
2746 * "m" is not supposed to be pageable, so it
2747 * should not be encrypted. It wouldn't be safe
2748 * to enter it in a new VM object while encrypted.
2749 */
2750 ASSERT_PAGE_DECRYPTED(m);
2751 assert(m->busy);
0c530ab8 2752 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
91447636
A
2753
2754 m->busy = FALSE;
2755 vm_page_insert(m, cpm_obj, offset);
2756 }
2757 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2758 vm_object_unlock(cpm_obj);
2759
2760 /*
2761 * Hang onto a reference on the object in case a
2762 * multi-threaded application for some reason decides
2763 * to deallocate the portion of the address space into
2764 * which we will insert this object.
2765 *
2766 * Unfortunately, we must insert the object now before
2767 * we can talk to the pmap module about which addresses
2768 * must be wired down. Hence, the race with a multi-
2769 * threaded app.
2770 */
2771 vm_object_reference(cpm_obj);
2772
2773 /*
2774 * Insert object into map.
2775 */
2776
2777 kr = vm_map_enter(
2d21ac55
A
2778 map,
2779 addr,
2780 size,
2781 (vm_map_offset_t)0,
2782 flags,
2783 cpm_obj,
2784 (vm_object_offset_t)0,
2785 FALSE,
2786 VM_PROT_ALL,
2787 VM_PROT_ALL,
2788 VM_INHERIT_DEFAULT);
91447636
A
2789
2790 if (kr != KERN_SUCCESS) {
2791 /*
2792 * A CPM object doesn't have can_persist set,
2793 * so all we have to do is deallocate it to
2794 * free up these pages.
2795 */
2796 assert(cpm_obj->pager_created == FALSE);
2797 assert(cpm_obj->can_persist == FALSE);
2798 assert(cpm_obj->pageout == FALSE);
2799 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2800 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2801 vm_object_deallocate(cpm_obj); /* kill creation ref */
2802 }
2803
2804 /*
2805 * Inform the physical mapping system that the
2806 * range of addresses may not fault, so that
2807 * page tables and such can be locked down as well.
2808 */
2809 start = *addr;
2810 end = start + size;
2811 pmap = vm_map_pmap(map);
2812 pmap_pageable(pmap, start, end, FALSE);
2813
2814 /*
2815 * Enter each page into the pmap, to avoid faults.
2816 * Note that this loop could be coded more efficiently,
2817 * if the need arose, rather than looking up each page
2818 * again.
2819 */
2820 for (offset = 0, va = start; offset < size;
2821 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
2822 int type_of_fault;
2823
91447636
A
2824 vm_object_lock(cpm_obj);
2825 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 2826 assert(m != VM_PAGE_NULL);
2d21ac55
A
2827
2828 vm_page_zero_fill(m);
2829
2830 type_of_fault = DBG_ZERO_FILL_FAULT;
2831
6d2010ae
A
2832 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2833 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2d21ac55
A
2834 &type_of_fault);
2835
2836 vm_object_unlock(cpm_obj);
91447636
A
2837 }
2838
2839#if MACH_ASSERT
2840 /*
2841 * Verify ordering in address space.
2842 */
2843 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2844 vm_object_lock(cpm_obj);
2845 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2846 vm_object_unlock(cpm_obj);
2847 if (m == VM_PAGE_NULL)
2848 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2849 cpm_obj, offset);
2850 assert(m->tabled);
2851 assert(!m->busy);
2852 assert(!m->wanted);
2853 assert(!m->fictitious);
2854 assert(!m->private);
2855 assert(!m->absent);
2856 assert(!m->error);
2857 assert(!m->cleaning);
2858 assert(!m->precious);
2859 assert(!m->clustered);
2860 if (offset != 0) {
2861 if (m->phys_page != prev_addr + 1) {
2862 printf("start 0x%x end 0x%x va 0x%x\n",
2863 start, end, va);
2864 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2865 printf("m 0x%x prev_address 0x%x\n", m,
2866 prev_addr);
2867 panic("vm_allocate_cpm: pages not contig!");
2868 }
2869 }
2870 prev_addr = m->phys_page;
2871 }
2872#endif /* MACH_ASSERT */
2873
2874 vm_object_deallocate(cpm_obj); /* kill extra ref */
2875
2876 return kr;
2877}
2878
2879
2880#else /* VM_CPM */
2881
2882/*
2883 * Interface is defined in all cases, but unless the kernel
2884 * is built explicitly for this option, the interface does
2885 * nothing.
2886 */
2887
2888kern_return_t
2889vm_map_enter_cpm(
2890 __unused vm_map_t map,
2891 __unused vm_map_offset_t *addr,
2892 __unused vm_map_size_t size,
2893 __unused int flags)
2894{
2895 return KERN_FAILURE;
2896}
2897#endif /* VM_CPM */
2898
b0d623f7
A
2899/* Not used without nested pmaps */
2900#ifndef NO_NESTED_PMAP
2d21ac55
A
2901/*
2902 * Clip and unnest a portion of a nested submap mapping.
2903 */
b0d623f7
A
2904
2905
2d21ac55
A
2906static void
2907vm_map_clip_unnest(
2908 vm_map_t map,
2909 vm_map_entry_t entry,
2910 vm_map_offset_t start_unnest,
2911 vm_map_offset_t end_unnest)
2912{
b0d623f7
A
2913 vm_map_offset_t old_start_unnest = start_unnest;
2914 vm_map_offset_t old_end_unnest = end_unnest;
2915
2d21ac55
A
2916 assert(entry->is_sub_map);
2917 assert(entry->object.sub_map != NULL);
2918
b0d623f7
A
2919 /*
2920 * Query the platform for the optimal unnest range.
2921 * DRK: There's some duplication of effort here, since
2922 * callers may have adjusted the range to some extent. This
2923 * routine was introduced to support 1GiB subtree nesting
2924 * for x86 platforms, which can also nest on 2MiB boundaries
2925 * depending on size/alignment.
2926 */
2927 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2928 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2929 }
2930
2d21ac55
A
2931 if (entry->vme_start > start_unnest ||
2932 entry->vme_end < end_unnest) {
2933 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2934 "bad nested entry: start=0x%llx end=0x%llx\n",
2935 (long long)start_unnest, (long long)end_unnest,
2936 (long long)entry->vme_start, (long long)entry->vme_end);
2937 }
b0d623f7 2938
2d21ac55
A
2939 if (start_unnest > entry->vme_start) {
2940 _vm_map_clip_start(&map->hdr,
2941 entry,
2942 start_unnest);
6d2010ae 2943 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
2944 }
2945 if (entry->vme_end > end_unnest) {
2946 _vm_map_clip_end(&map->hdr,
2947 entry,
2948 end_unnest);
6d2010ae 2949 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
2950 }
2951
2952 pmap_unnest(map->pmap,
2953 entry->vme_start,
2954 entry->vme_end - entry->vme_start);
2955 if ((map->mapped) && (map->ref_count)) {
2956 /* clean up parent map/maps */
2957 vm_map_submap_pmap_clean(
2958 map, entry->vme_start,
2959 entry->vme_end,
2960 entry->object.sub_map,
2961 entry->offset);
2962 }
2963 entry->use_pmap = FALSE;
2964}
b0d623f7 2965#endif /* NO_NESTED_PMAP */
2d21ac55 2966
1c79356b
A
2967/*
2968 * vm_map_clip_start: [ internal use only ]
2969 *
2970 * Asserts that the given entry begins at or after
2971 * the specified address; if necessary,
2972 * it splits the entry into two.
2973 */
2d21ac55
A
2974static void
2975vm_map_clip_start(
2976 vm_map_t map,
2977 vm_map_entry_t entry,
2978 vm_map_offset_t startaddr)
2979{
0c530ab8 2980#ifndef NO_NESTED_PMAP
2d21ac55
A
2981 if (entry->use_pmap &&
2982 startaddr >= entry->vme_start) {
2983 vm_map_offset_t start_unnest, end_unnest;
2984
2985 /*
2986 * Make sure "startaddr" is no longer in a nested range
2987 * before we clip. Unnest only the minimum range the platform
2988 * can handle.
b0d623f7
A
2989 * vm_map_clip_unnest may perform additional adjustments to
2990 * the unnest range.
2d21ac55
A
2991 */
2992 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2993 end_unnest = start_unnest + pmap_nesting_size_min;
2994 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2995 }
2996#endif /* NO_NESTED_PMAP */
2997 if (startaddr > entry->vme_start) {
2998 if (entry->object.vm_object &&
2999 !entry->is_sub_map &&
3000 entry->object.vm_object->phys_contiguous) {
3001 pmap_remove(map->pmap,
3002 (addr64_t)(entry->vme_start),
3003 (addr64_t)(entry->vme_end));
3004 }
3005 _vm_map_clip_start(&map->hdr, entry, startaddr);
6d2010ae 3006 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
3007 }
3008}
3009
1c79356b
A
3010
3011#define vm_map_copy_clip_start(copy, entry, startaddr) \
3012 MACRO_BEGIN \
3013 if ((startaddr) > (entry)->vme_start) \
3014 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3015 MACRO_END
3016
3017/*
3018 * This routine is called only when it is known that
3019 * the entry must be split.
3020 */
91447636 3021static void
1c79356b
A
3022_vm_map_clip_start(
3023 register struct vm_map_header *map_header,
3024 register vm_map_entry_t entry,
91447636 3025 register vm_map_offset_t start)
1c79356b
A
3026{
3027 register vm_map_entry_t new_entry;
3028
3029 /*
3030 * Split off the front portion --
3031 * note that we must insert the new
3032 * entry BEFORE this one, so that
3033 * this entry has the specified starting
3034 * address.
3035 */
3036
3037 new_entry = _vm_map_entry_create(map_header);
3038 vm_map_entry_copy_full(new_entry, entry);
3039
3040 new_entry->vme_end = start;
3041 entry->offset += (start - entry->vme_start);
3042 entry->vme_start = start;
3043
6d2010ae 3044 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
3045
3046 if (entry->is_sub_map)
2d21ac55 3047 vm_map_reference(new_entry->object.sub_map);
1c79356b
A
3048 else
3049 vm_object_reference(new_entry->object.vm_object);
3050}
3051
3052
3053/*
3054 * vm_map_clip_end: [ internal use only ]
3055 *
3056 * Asserts that the given entry ends at or before
3057 * the specified address; if necessary,
3058 * it splits the entry into two.
3059 */
2d21ac55
A
3060static void
3061vm_map_clip_end(
3062 vm_map_t map,
3063 vm_map_entry_t entry,
3064 vm_map_offset_t endaddr)
3065{
3066 if (endaddr > entry->vme_end) {
3067 /*
3068 * Within the scope of this clipping, limit "endaddr" to
3069 * the end of this map entry...
3070 */
3071 endaddr = entry->vme_end;
3072 }
3073#ifndef NO_NESTED_PMAP
3074 if (entry->use_pmap) {
3075 vm_map_offset_t start_unnest, end_unnest;
3076
3077 /*
3078 * Make sure the range between the start of this entry and
3079 * the new "endaddr" is no longer nested before we clip.
3080 * Unnest only the minimum range the platform can handle.
b0d623f7
A
3081 * vm_map_clip_unnest may perform additional adjustments to
3082 * the unnest range.
2d21ac55
A
3083 */
3084 start_unnest = entry->vme_start;
3085 end_unnest =
3086 (endaddr + pmap_nesting_size_min - 1) &
3087 ~(pmap_nesting_size_min - 1);
3088 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3089 }
3090#endif /* NO_NESTED_PMAP */
3091 if (endaddr < entry->vme_end) {
3092 if (entry->object.vm_object &&
3093 !entry->is_sub_map &&
3094 entry->object.vm_object->phys_contiguous) {
3095 pmap_remove(map->pmap,
3096 (addr64_t)(entry->vme_start),
3097 (addr64_t)(entry->vme_end));
3098 }
3099 _vm_map_clip_end(&map->hdr, entry, endaddr);
6d2010ae 3100 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
3101 }
3102}
0c530ab8 3103
1c79356b
A
3104
3105#define vm_map_copy_clip_end(copy, entry, endaddr) \
3106 MACRO_BEGIN \
3107 if ((endaddr) < (entry)->vme_end) \
3108 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3109 MACRO_END
3110
3111/*
3112 * This routine is called only when it is known that
3113 * the entry must be split.
3114 */
91447636 3115static void
1c79356b
A
3116_vm_map_clip_end(
3117 register struct vm_map_header *map_header,
3118 register vm_map_entry_t entry,
2d21ac55 3119 register vm_map_offset_t end)
1c79356b
A
3120{
3121 register vm_map_entry_t new_entry;
3122
3123 /*
3124 * Create a new entry and insert it
3125 * AFTER the specified entry
3126 */
3127
3128 new_entry = _vm_map_entry_create(map_header);
3129 vm_map_entry_copy_full(new_entry, entry);
3130
3131 new_entry->vme_start = entry->vme_end = end;
3132 new_entry->offset += (end - entry->vme_start);
3133
6d2010ae 3134 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
3135
3136 if (entry->is_sub_map)
2d21ac55 3137 vm_map_reference(new_entry->object.sub_map);
1c79356b
A
3138 else
3139 vm_object_reference(new_entry->object.vm_object);
3140}
3141
3142
3143/*
3144 * VM_MAP_RANGE_CHECK: [ internal use only ]
3145 *
3146 * Asserts that the starting and ending region
3147 * addresses fall within the valid range of the map.
3148 */
2d21ac55
A
3149#define VM_MAP_RANGE_CHECK(map, start, end) \
3150 MACRO_BEGIN \
3151 if (start < vm_map_min(map)) \
3152 start = vm_map_min(map); \
3153 if (end > vm_map_max(map)) \
3154 end = vm_map_max(map); \
3155 if (start > end) \
3156 start = end; \
3157 MACRO_END
1c79356b
A
3158
3159/*
3160 * vm_map_range_check: [ internal use only ]
3161 *
3162 * Check that the region defined by the specified start and
3163 * end addresses are wholly contained within a single map
3164 * entry or set of adjacent map entries of the spacified map,
3165 * i.e. the specified region contains no unmapped space.
3166 * If any or all of the region is unmapped, FALSE is returned.
3167 * Otherwise, TRUE is returned and if the output argument 'entry'
3168 * is not NULL it points to the map entry containing the start
3169 * of the region.
3170 *
3171 * The map is locked for reading on entry and is left locked.
3172 */
91447636 3173static boolean_t
1c79356b
A
3174vm_map_range_check(
3175 register vm_map_t map,
91447636
A
3176 register vm_map_offset_t start,
3177 register vm_map_offset_t end,
1c79356b
A
3178 vm_map_entry_t *entry)
3179{
3180 vm_map_entry_t cur;
91447636 3181 register vm_map_offset_t prev;
1c79356b
A
3182
3183 /*
3184 * Basic sanity checks first
3185 */
3186 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3187 return (FALSE);
3188
3189 /*
3190 * Check first if the region starts within a valid
3191 * mapping for the map.
3192 */
3193 if (!vm_map_lookup_entry(map, start, &cur))
3194 return (FALSE);
3195
3196 /*
3197 * Optimize for the case that the region is contained
3198 * in a single map entry.
3199 */
3200 if (entry != (vm_map_entry_t *) NULL)
3201 *entry = cur;
3202 if (end <= cur->vme_end)
3203 return (TRUE);
3204
3205 /*
3206 * If the region is not wholly contained within a
3207 * single entry, walk the entries looking for holes.
3208 */
3209 prev = cur->vme_end;
3210 cur = cur->vme_next;
3211 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3212 if (end <= cur->vme_end)
3213 return (TRUE);
3214 prev = cur->vme_end;
3215 cur = cur->vme_next;
3216 }
3217 return (FALSE);
3218}
3219
3220/*
3221 * vm_map_submap: [ kernel use only ]
3222 *
3223 * Mark the given range as handled by a subordinate map.
3224 *
3225 * This range must have been created with vm_map_find using
3226 * the vm_submap_object, and no other operations may have been
3227 * performed on this range prior to calling vm_map_submap.
3228 *
3229 * Only a limited number of operations can be performed
3230 * within this rage after calling vm_map_submap:
3231 * vm_fault
3232 * [Don't try vm_map_copyin!]
3233 *
3234 * To remove a submapping, one must first remove the
3235 * range from the superior map, and then destroy the
3236 * submap (if desired). [Better yet, don't try it.]
3237 */
3238kern_return_t
3239vm_map_submap(
91447636
A
3240 vm_map_t map,
3241 vm_map_offset_t start,
3242 vm_map_offset_t end,
1c79356b 3243 vm_map_t submap,
91447636 3244 vm_map_offset_t offset,
0c530ab8 3245#ifdef NO_NESTED_PMAP
91447636 3246 __unused
0c530ab8 3247#endif /* NO_NESTED_PMAP */
1c79356b
A
3248 boolean_t use_pmap)
3249{
3250 vm_map_entry_t entry;
3251 register kern_return_t result = KERN_INVALID_ARGUMENT;
3252 register vm_object_t object;
3253
3254 vm_map_lock(map);
3255
2d21ac55 3256 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 3257 entry = entry->vme_next;
2d21ac55 3258 }
1c79356b 3259
2d21ac55
A
3260 if (entry == vm_map_to_entry(map) ||
3261 entry->is_sub_map) {
1c79356b
A
3262 vm_map_unlock(map);
3263 return KERN_INVALID_ARGUMENT;
3264 }
3265
2d21ac55
A
3266 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3267 vm_map_clip_start(map, entry, start);
1c79356b
A
3268 vm_map_clip_end(map, entry, end);
3269
3270 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3271 (!entry->is_sub_map) &&
3272 ((object = entry->object.vm_object) == vm_submap_object) &&
3273 (object->resident_page_count == 0) &&
3274 (object->copy == VM_OBJECT_NULL) &&
3275 (object->shadow == VM_OBJECT_NULL) &&
3276 (!object->pager_created)) {
2d21ac55
A
3277 entry->offset = (vm_object_offset_t)offset;
3278 entry->object.vm_object = VM_OBJECT_NULL;
3279 vm_object_deallocate(object);
3280 entry->is_sub_map = TRUE;
3281 entry->object.sub_map = submap;
3282 vm_map_reference(submap);
3283 submap->mapped = TRUE;
3284
0c530ab8 3285#ifndef NO_NESTED_PMAP
2d21ac55
A
3286 if (use_pmap) {
3287 /* nest if platform code will allow */
3288 if(submap->pmap == NULL) {
3289 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3290 if(submap->pmap == PMAP_NULL) {
3291 vm_map_unlock(map);
3292 return(KERN_NO_SPACE);
55e303ae 3293 }
55e303ae 3294 }
2d21ac55
A
3295 result = pmap_nest(map->pmap,
3296 (entry->object.sub_map)->pmap,
3297 (addr64_t)start,
3298 (addr64_t)start,
3299 (uint64_t)(end - start));
3300 if(result)
3301 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3302 entry->use_pmap = TRUE;
3303 }
0c530ab8 3304#else /* NO_NESTED_PMAP */
2d21ac55 3305 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 3306#endif /* NO_NESTED_PMAP */
2d21ac55 3307 result = KERN_SUCCESS;
1c79356b
A
3308 }
3309 vm_map_unlock(map);
3310
3311 return(result);
3312}
3313
3314/*
3315 * vm_map_protect:
3316 *
3317 * Sets the protection of the specified address
3318 * region in the target map. If "set_max" is
3319 * specified, the maximum protection is to be set;
3320 * otherwise, only the current protection is affected.
3321 */
3322kern_return_t
3323vm_map_protect(
3324 register vm_map_t map,
91447636
A
3325 register vm_map_offset_t start,
3326 register vm_map_offset_t end,
1c79356b
A
3327 register vm_prot_t new_prot,
3328 register boolean_t set_max)
3329{
3330 register vm_map_entry_t current;
2d21ac55 3331 register vm_map_offset_t prev;
1c79356b
A
3332 vm_map_entry_t entry;
3333 vm_prot_t new_max;
1c79356b
A
3334
3335 XPR(XPR_VM_MAP,
2d21ac55 3336 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 3337 map, start, end, new_prot, set_max);
1c79356b
A
3338
3339 vm_map_lock(map);
3340
91447636
A
3341 /* LP64todo - remove this check when vm_map_commpage64()
3342 * no longer has to stuff in a map_entry for the commpage
3343 * above the map's max_offset.
3344 */
3345 if (start >= map->max_offset) {
3346 vm_map_unlock(map);
3347 return(KERN_INVALID_ADDRESS);
3348 }
3349
b0d623f7
A
3350 while(1) {
3351 /*
3352 * Lookup the entry. If it doesn't start in a valid
3353 * entry, return an error.
3354 */
3355 if (! vm_map_lookup_entry(map, start, &entry)) {
3356 vm_map_unlock(map);
3357 return(KERN_INVALID_ADDRESS);
3358 }
3359
3360 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3361 start = SUPERPAGE_ROUND_DOWN(start);
3362 continue;
3363 }
3364 break;
3365 }
3366 if (entry->superpage_size)
3367 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
3368
3369 /*
3370 * Make a first pass to check for protection and address
3371 * violations.
3372 */
3373
3374 current = entry;
3375 prev = current->vme_start;
3376 while ((current != vm_map_to_entry(map)) &&
3377 (current->vme_start < end)) {
3378
3379 /*
3380 * If there is a hole, return an error.
3381 */
3382 if (current->vme_start != prev) {
3383 vm_map_unlock(map);
3384 return(KERN_INVALID_ADDRESS);
3385 }
3386
3387 new_max = current->max_protection;
3388 if(new_prot & VM_PROT_COPY) {
3389 new_max |= VM_PROT_WRITE;
3390 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3391 vm_map_unlock(map);
3392 return(KERN_PROTECTION_FAILURE);
3393 }
3394 } else {
3395 if ((new_prot & new_max) != new_prot) {
3396 vm_map_unlock(map);
3397 return(KERN_PROTECTION_FAILURE);
3398 }
3399 }
3400
593a1d5f
A
3401#if CONFIG_EMBEDDED
3402 if (new_prot & VM_PROT_WRITE) {
6d2010ae 3403 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
593a1d5f
A
3404 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3405 new_prot &= ~VM_PROT_EXECUTE;
3406 }
3407 }
3408#endif
3409
1c79356b
A
3410 prev = current->vme_end;
3411 current = current->vme_next;
3412 }
3413 if (end > prev) {
3414 vm_map_unlock(map);
3415 return(KERN_INVALID_ADDRESS);
3416 }
3417
3418 /*
3419 * Go back and fix up protections.
3420 * Clip to start here if the range starts within
3421 * the entry.
3422 */
3423
3424 current = entry;
2d21ac55
A
3425 if (current != vm_map_to_entry(map)) {
3426 /* clip and unnest if necessary */
3427 vm_map_clip_start(map, current, start);
1c79356b 3428 }
2d21ac55 3429
1c79356b
A
3430 while ((current != vm_map_to_entry(map)) &&
3431 (current->vme_start < end)) {
3432
3433 vm_prot_t old_prot;
3434
3435 vm_map_clip_end(map, current, end);
3436
2d21ac55
A
3437 assert(!current->use_pmap); /* clipping did unnest if needed */
3438
1c79356b
A
3439 old_prot = current->protection;
3440
3441 if(new_prot & VM_PROT_COPY) {
3442 /* caller is asking specifically to copy the */
3443 /* mapped data, this implies that max protection */
3444 /* will include write. Caller must be prepared */
3445 /* for loss of shared memory communication in the */
3446 /* target area after taking this step */
6d2010ae
A
3447
3448 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3449 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3450 current->offset = 0;
3451 }
1c79356b
A
3452 current->needs_copy = TRUE;
3453 current->max_protection |= VM_PROT_WRITE;
3454 }
3455
3456 if (set_max)
3457 current->protection =
3458 (current->max_protection =
2d21ac55
A
3459 new_prot & ~VM_PROT_COPY) &
3460 old_prot;
1c79356b
A
3461 else
3462 current->protection = new_prot & ~VM_PROT_COPY;
3463
3464 /*
3465 * Update physical map if necessary.
3466 * If the request is to turn off write protection,
3467 * we won't do it for real (in pmap). This is because
3468 * it would cause copy-on-write to fail. We've already
3469 * set, the new protection in the map, so if a
3470 * write-protect fault occurred, it will be fixed up
3471 * properly, COW or not.
3472 */
1c79356b 3473 if (current->protection != old_prot) {
1c79356b
A
3474 /* Look one level in we support nested pmaps */
3475 /* from mapped submaps which are direct entries */
3476 /* in our map */
0c530ab8 3477
2d21ac55 3478 vm_prot_t prot;
0c530ab8 3479
2d21ac55
A
3480 prot = current->protection & ~VM_PROT_WRITE;
3481
3482 if (override_nx(map, current->alias) && prot)
0c530ab8 3483 prot |= VM_PROT_EXECUTE;
2d21ac55 3484
0c530ab8 3485 if (current->is_sub_map && current->use_pmap) {
1c79356b 3486 pmap_protect(current->object.sub_map->pmap,
2d21ac55
A
3487 current->vme_start,
3488 current->vme_end,
3489 prot);
1c79356b 3490 } else {
2d21ac55
A
3491 pmap_protect(map->pmap,
3492 current->vme_start,
3493 current->vme_end,
3494 prot);
1c79356b 3495 }
1c79356b
A
3496 }
3497 current = current->vme_next;
3498 }
3499
5353443c 3500 current = entry;
91447636
A
3501 while ((current != vm_map_to_entry(map)) &&
3502 (current->vme_start <= end)) {
5353443c
A
3503 vm_map_simplify_entry(map, current);
3504 current = current->vme_next;
3505 }
3506
1c79356b
A
3507 vm_map_unlock(map);
3508 return(KERN_SUCCESS);
3509}
3510
3511/*
3512 * vm_map_inherit:
3513 *
3514 * Sets the inheritance of the specified address
3515 * range in the target map. Inheritance
3516 * affects how the map will be shared with
3517 * child maps at the time of vm_map_fork.
3518 */
3519kern_return_t
3520vm_map_inherit(
3521 register vm_map_t map,
91447636
A
3522 register vm_map_offset_t start,
3523 register vm_map_offset_t end,
1c79356b
A
3524 register vm_inherit_t new_inheritance)
3525{
3526 register vm_map_entry_t entry;
3527 vm_map_entry_t temp_entry;
3528
3529 vm_map_lock(map);
3530
3531 VM_MAP_RANGE_CHECK(map, start, end);
3532
3533 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3534 entry = temp_entry;
1c79356b
A
3535 }
3536 else {
3537 temp_entry = temp_entry->vme_next;
3538 entry = temp_entry;
3539 }
3540
3541 /* first check entire range for submaps which can't support the */
3542 /* given inheritance. */
3543 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3544 if(entry->is_sub_map) {
91447636
A
3545 if(new_inheritance == VM_INHERIT_COPY) {
3546 vm_map_unlock(map);
1c79356b 3547 return(KERN_INVALID_ARGUMENT);
91447636 3548 }
1c79356b
A
3549 }
3550
3551 entry = entry->vme_next;
3552 }
3553
3554 entry = temp_entry;
2d21ac55
A
3555 if (entry != vm_map_to_entry(map)) {
3556 /* clip and unnest if necessary */
3557 vm_map_clip_start(map, entry, start);
3558 }
1c79356b
A
3559
3560 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3561 vm_map_clip_end(map, entry, end);
2d21ac55 3562 assert(!entry->use_pmap); /* clip did unnest if needed */
1c79356b
A
3563
3564 entry->inheritance = new_inheritance;
3565
3566 entry = entry->vme_next;
3567 }
3568
3569 vm_map_unlock(map);
3570 return(KERN_SUCCESS);
3571}
3572
2d21ac55
A
3573/*
3574 * Update the accounting for the amount of wired memory in this map. If the user has
3575 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3576 */
3577
3578static kern_return_t
3579add_wire_counts(
3580 vm_map_t map,
3581 vm_map_entry_t entry,
3582 boolean_t user_wire)
3583{
3584 vm_map_size_t size;
3585
3586 if (user_wire) {
6d2010ae 3587 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
3588
3589 /*
3590 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3591 * this map entry.
3592 */
3593
3594 if (entry->user_wired_count == 0) {
3595 size = entry->vme_end - entry->vme_start;
3596
3597 /*
3598 * Since this is the first time the user is wiring this map entry, check to see if we're
3599 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3600 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3601 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3602 * limit, then we fail.
3603 */
3604
3605 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
3606 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3607 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
3608 return KERN_RESOURCE_SHORTAGE;
3609
3610 /*
3611 * The first time the user wires an entry, we also increment the wired_count and add this to
3612 * the total that has been wired in the map.
3613 */
3614
3615 if (entry->wired_count >= MAX_WIRE_COUNT)
3616 return KERN_FAILURE;
3617
3618 entry->wired_count++;
3619 map->user_wire_size += size;
3620 }
3621
3622 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3623 return KERN_FAILURE;
3624
3625 entry->user_wired_count++;
3626
3627 } else {
3628
3629 /*
3630 * The kernel's wiring the memory. Just bump the count and continue.
3631 */
3632
3633 if (entry->wired_count >= MAX_WIRE_COUNT)
3634 panic("vm_map_wire: too many wirings");
3635
3636 entry->wired_count++;
3637 }
3638
3639 return KERN_SUCCESS;
3640}
3641
3642/*
3643 * Update the memory wiring accounting now that the given map entry is being unwired.
3644 */
3645
3646static void
3647subtract_wire_counts(
3648 vm_map_t map,
3649 vm_map_entry_t entry,
3650 boolean_t user_wire)
3651{
3652
3653 if (user_wire) {
3654
3655 /*
3656 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3657 */
3658
3659 if (entry->user_wired_count == 1) {
3660
3661 /*
3662 * We're removing the last user wire reference. Decrement the wired_count and the total
3663 * user wired memory for this map.
3664 */
3665
3666 assert(entry->wired_count >= 1);
3667 entry->wired_count--;
3668 map->user_wire_size -= entry->vme_end - entry->vme_start;
3669 }
3670
3671 assert(entry->user_wired_count >= 1);
3672 entry->user_wired_count--;
3673
3674 } else {
3675
3676 /*
3677 * The kernel is unwiring the memory. Just update the count.
3678 */
3679
3680 assert(entry->wired_count >= 1);
3681 entry->wired_count--;
3682 }
3683}
3684
1c79356b
A
3685/*
3686 * vm_map_wire:
3687 *
3688 * Sets the pageability of the specified address range in the
3689 * target map as wired. Regions specified as not pageable require
3690 * locked-down physical memory and physical page maps. The
3691 * access_type variable indicates types of accesses that must not
3692 * generate page faults. This is checked against protection of
3693 * memory being locked-down.
3694 *
3695 * The map must not be locked, but a reference must remain to the
3696 * map throughout the call.
3697 */
91447636 3698static kern_return_t
1c79356b
A
3699vm_map_wire_nested(
3700 register vm_map_t map,
91447636
A
3701 register vm_map_offset_t start,
3702 register vm_map_offset_t end,
1c79356b
A
3703 register vm_prot_t access_type,
3704 boolean_t user_wire,
9bccf70c 3705 pmap_t map_pmap,
91447636 3706 vm_map_offset_t pmap_addr)
1c79356b
A
3707{
3708 register vm_map_entry_t entry;
3709 struct vm_map_entry *first_entry, tmp_entry;
91447636
A
3710 vm_map_t real_map;
3711 register vm_map_offset_t s,e;
1c79356b
A
3712 kern_return_t rc;
3713 boolean_t need_wakeup;
3714 boolean_t main_map = FALSE;
9bccf70c 3715 wait_interrupt_t interruptible_state;
0b4e3aa0 3716 thread_t cur_thread;
1c79356b 3717 unsigned int last_timestamp;
91447636 3718 vm_map_size_t size;
1c79356b
A
3719
3720 vm_map_lock(map);
3721 if(map_pmap == NULL)
3722 main_map = TRUE;
3723 last_timestamp = map->timestamp;
3724
3725 VM_MAP_RANGE_CHECK(map, start, end);
3726 assert(page_aligned(start));
3727 assert(page_aligned(end));
0b4e3aa0
A
3728 if (start == end) {
3729 /* We wired what the caller asked for, zero pages */
3730 vm_map_unlock(map);
3731 return KERN_SUCCESS;
3732 }
1c79356b 3733
2d21ac55
A
3734 need_wakeup = FALSE;
3735 cur_thread = current_thread();
3736
3737 s = start;
3738 rc = KERN_SUCCESS;
3739
3740 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 3741 entry = first_entry;
2d21ac55
A
3742 /*
3743 * vm_map_clip_start will be done later.
3744 * We don't want to unnest any nested submaps here !
3745 */
1c79356b
A
3746 } else {
3747 /* Start address is not in map */
2d21ac55
A
3748 rc = KERN_INVALID_ADDRESS;
3749 goto done;
1c79356b
A
3750 }
3751
2d21ac55
A
3752 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3753 /*
3754 * At this point, we have wired from "start" to "s".
3755 * We still need to wire from "s" to "end".
3756 *
3757 * "entry" hasn't been clipped, so it could start before "s"
3758 * and/or end after "end".
3759 */
3760
3761 /* "e" is how far we want to wire in this entry */
3762 e = entry->vme_end;
3763 if (e > end)
3764 e = end;
3765
1c79356b
A
3766 /*
3767 * If another thread is wiring/unwiring this entry then
3768 * block after informing other thread to wake us up.
3769 */
3770 if (entry->in_transition) {
9bccf70c
A
3771 wait_result_t wait_result;
3772
1c79356b
A
3773 /*
3774 * We have not clipped the entry. Make sure that
3775 * the start address is in range so that the lookup
3776 * below will succeed.
2d21ac55
A
3777 * "s" is the current starting point: we've already
3778 * wired from "start" to "s" and we still have
3779 * to wire from "s" to "end".
1c79356b 3780 */
1c79356b
A
3781
3782 entry->needs_wakeup = TRUE;
3783
3784 /*
3785 * wake up anybody waiting on entries that we have
3786 * already wired.
3787 */
3788 if (need_wakeup) {
3789 vm_map_entry_wakeup(map);
3790 need_wakeup = FALSE;
3791 }
3792 /*
3793 * User wiring is interruptible
3794 */
9bccf70c 3795 wait_result = vm_map_entry_wait(map,
2d21ac55
A
3796 (user_wire) ? THREAD_ABORTSAFE :
3797 THREAD_UNINT);
9bccf70c 3798 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
3799 /*
3800 * undo the wirings we have done so far
3801 * We do not clear the needs_wakeup flag,
3802 * because we cannot tell if we were the
3803 * only one waiting.
3804 */
2d21ac55
A
3805 rc = KERN_FAILURE;
3806 goto done;
1c79356b
A
3807 }
3808
1c79356b
A
3809 /*
3810 * Cannot avoid a lookup here. reset timestamp.
3811 */
3812 last_timestamp = map->timestamp;
3813
3814 /*
3815 * The entry could have been clipped, look it up again.
3816 * Worse that can happen is, it may not exist anymore.
3817 */
3818 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
3819 /*
3820 * User: undo everything upto the previous
3821 * entry. let vm_map_unwire worry about
3822 * checking the validity of the range.
3823 */
2d21ac55
A
3824 rc = KERN_FAILURE;
3825 goto done;
1c79356b
A
3826 }
3827 entry = first_entry;
3828 continue;
3829 }
2d21ac55
A
3830
3831 if (entry->is_sub_map) {
91447636
A
3832 vm_map_offset_t sub_start;
3833 vm_map_offset_t sub_end;
3834 vm_map_offset_t local_start;
3835 vm_map_offset_t local_end;
1c79356b 3836 pmap_t pmap;
2d21ac55
A
3837
3838 vm_map_clip_start(map, entry, s);
1c79356b
A
3839 vm_map_clip_end(map, entry, end);
3840
9bccf70c 3841 sub_start = entry->offset;
2d21ac55
A
3842 sub_end = entry->vme_end;
3843 sub_end += entry->offset - entry->vme_start;
3844
1c79356b
A
3845 local_end = entry->vme_end;
3846 if(map_pmap == NULL) {
2d21ac55
A
3847 vm_object_t object;
3848 vm_object_offset_t offset;
3849 vm_prot_t prot;
3850 boolean_t wired;
3851 vm_map_entry_t local_entry;
3852 vm_map_version_t version;
3853 vm_map_t lookup_map;
3854
1c79356b
A
3855 if(entry->use_pmap) {
3856 pmap = entry->object.sub_map->pmap;
9bccf70c
A
3857 /* ppc implementation requires that */
3858 /* submaps pmap address ranges line */
3859 /* up with parent map */
3860#ifdef notdef
3861 pmap_addr = sub_start;
3862#endif
2d21ac55 3863 pmap_addr = s;
1c79356b
A
3864 } else {
3865 pmap = map->pmap;
2d21ac55 3866 pmap_addr = s;
1c79356b 3867 }
2d21ac55 3868
1c79356b 3869 if (entry->wired_count) {
2d21ac55
A
3870 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3871 goto done;
3872
3873 /*
3874 * The map was not unlocked:
3875 * no need to goto re-lookup.
3876 * Just go directly to next entry.
3877 */
1c79356b 3878 entry = entry->vme_next;
2d21ac55 3879 s = entry->vme_start;
1c79356b
A
3880 continue;
3881
2d21ac55 3882 }
9bccf70c 3883
2d21ac55
A
3884 /* call vm_map_lookup_locked to */
3885 /* cause any needs copy to be */
3886 /* evaluated */
3887 local_start = entry->vme_start;
3888 lookup_map = map;
3889 vm_map_lock_write_to_read(map);
3890 if(vm_map_lookup_locked(
3891 &lookup_map, local_start,
3892 access_type,
3893 OBJECT_LOCK_EXCLUSIVE,
3894 &version, &object,
3895 &offset, &prot, &wired,
3896 NULL,
3897 &real_map)) {
1c79356b 3898
2d21ac55
A
3899 vm_map_unlock_read(lookup_map);
3900 vm_map_unwire(map, start,
3901 s, user_wire);
3902 return(KERN_FAILURE);
3903 }
3904 if(real_map != lookup_map)
3905 vm_map_unlock(real_map);
3906 vm_map_unlock_read(lookup_map);
3907 vm_map_lock(map);
3908 vm_object_unlock(object);
1c79356b 3909
2d21ac55
A
3910 /* we unlocked, so must re-lookup */
3911 if (!vm_map_lookup_entry(map,
3912 local_start,
3913 &local_entry)) {
3914 rc = KERN_FAILURE;
3915 goto done;
3916 }
3917
3918 /*
3919 * entry could have been "simplified",
3920 * so re-clip
3921 */
3922 entry = local_entry;
3923 assert(s == local_start);
3924 vm_map_clip_start(map, entry, s);
3925 vm_map_clip_end(map, entry, end);
3926 /* re-compute "e" */
3927 e = entry->vme_end;
3928 if (e > end)
3929 e = end;
3930
3931 /* did we have a change of type? */
3932 if (!entry->is_sub_map) {
3933 last_timestamp = map->timestamp;
3934 continue;
1c79356b
A
3935 }
3936 } else {
9bccf70c 3937 local_start = entry->vme_start;
2d21ac55
A
3938 pmap = map_pmap;
3939 }
3940
3941 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3942 goto done;
3943
3944 entry->in_transition = TRUE;
3945
3946 vm_map_unlock(map);
3947 rc = vm_map_wire_nested(entry->object.sub_map,
1c79356b
A
3948 sub_start, sub_end,
3949 access_type,
2d21ac55
A
3950 user_wire, pmap, pmap_addr);
3951 vm_map_lock(map);
9bccf70c 3952
1c79356b
A
3953 /*
3954 * Find the entry again. It could have been clipped
3955 * after we unlocked the map.
3956 */
9bccf70c
A
3957 if (!vm_map_lookup_entry(map, local_start,
3958 &first_entry))
3959 panic("vm_map_wire: re-lookup failed");
3960 entry = first_entry;
1c79356b 3961
2d21ac55
A
3962 assert(local_start == s);
3963 /* re-compute "e" */
3964 e = entry->vme_end;
3965 if (e > end)
3966 e = end;
3967
1c79356b
A
3968 last_timestamp = map->timestamp;
3969 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 3970 (entry->vme_start < e)) {
1c79356b
A
3971 assert(entry->in_transition);
3972 entry->in_transition = FALSE;
3973 if (entry->needs_wakeup) {
3974 entry->needs_wakeup = FALSE;
3975 need_wakeup = TRUE;
3976 }
3977 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 3978 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
3979 }
3980 entry = entry->vme_next;
3981 }
3982 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 3983 goto done;
1c79356b 3984 }
2d21ac55
A
3985
3986 /* no need to relookup again */
3987 s = entry->vme_start;
1c79356b
A
3988 continue;
3989 }
3990
3991 /*
3992 * If this entry is already wired then increment
3993 * the appropriate wire reference count.
3994 */
9bccf70c 3995 if (entry->wired_count) {
1c79356b
A
3996 /*
3997 * entry is already wired down, get our reference
3998 * after clipping to our range.
3999 */
2d21ac55 4000 vm_map_clip_start(map, entry, s);
1c79356b 4001 vm_map_clip_end(map, entry, end);
1c79356b 4002
2d21ac55
A
4003 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4004 goto done;
4005
4006 /* map was not unlocked: no need to relookup */
1c79356b 4007 entry = entry->vme_next;
2d21ac55 4008 s = entry->vme_start;
1c79356b
A
4009 continue;
4010 }
4011
4012 /*
4013 * Unwired entry or wire request transmitted via submap
4014 */
4015
4016
4017 /*
4018 * Perform actions of vm_map_lookup that need the write
4019 * lock on the map: create a shadow object for a
4020 * copy-on-write region, or an object for a zero-fill
4021 * region.
4022 */
4023 size = entry->vme_end - entry->vme_start;
4024 /*
4025 * If wiring a copy-on-write page, we need to copy it now
4026 * even if we're only (currently) requesting read access.
4027 * This is aggressive, but once it's wired we can't move it.
4028 */
4029 if (entry->needs_copy) {
4030 vm_object_shadow(&entry->object.vm_object,
4031 &entry->offset, size);
4032 entry->needs_copy = FALSE;
4033 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4034 entry->object.vm_object = vm_object_allocate(size);
4035 entry->offset = (vm_object_offset_t)0;
4036 }
4037
2d21ac55 4038 vm_map_clip_start(map, entry, s);
1c79356b
A
4039 vm_map_clip_end(map, entry, end);
4040
2d21ac55 4041 /* re-compute "e" */
1c79356b 4042 e = entry->vme_end;
2d21ac55
A
4043 if (e > end)
4044 e = end;
1c79356b
A
4045
4046 /*
4047 * Check for holes and protection mismatch.
4048 * Holes: Next entry should be contiguous unless this
4049 * is the end of the region.
4050 * Protection: Access requested must be allowed, unless
4051 * wiring is by protection class
4052 */
2d21ac55
A
4053 if ((entry->vme_end < end) &&
4054 ((entry->vme_next == vm_map_to_entry(map)) ||
4055 (entry->vme_next->vme_start > entry->vme_end))) {
4056 /* found a hole */
4057 rc = KERN_INVALID_ADDRESS;
4058 goto done;
4059 }
4060 if ((entry->protection & access_type) != access_type) {
4061 /* found a protection problem */
4062 rc = KERN_PROTECTION_FAILURE;
4063 goto done;
1c79356b
A
4064 }
4065
4066 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4067
2d21ac55
A
4068 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4069 goto done;
1c79356b
A
4070
4071 entry->in_transition = TRUE;
4072
4073 /*
4074 * This entry might get split once we unlock the map.
4075 * In vm_fault_wire(), we need the current range as
4076 * defined by this entry. In order for this to work
4077 * along with a simultaneous clip operation, we make a
4078 * temporary copy of this entry and use that for the
4079 * wiring. Note that the underlying objects do not
4080 * change during a clip.
4081 */
4082 tmp_entry = *entry;
4083
4084 /*
4085 * The in_transition state guarentees that the entry
4086 * (or entries for this range, if split occured) will be
4087 * there when the map lock is acquired for the second time.
4088 */
4089 vm_map_unlock(map);
0b4e3aa0 4090
9bccf70c
A
4091 if (!user_wire && cur_thread != THREAD_NULL)
4092 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
4093 else
4094 interruptible_state = THREAD_UNINT;
9bccf70c 4095
1c79356b 4096 if(map_pmap)
9bccf70c 4097 rc = vm_fault_wire(map,
2d21ac55 4098 &tmp_entry, map_pmap, pmap_addr);
1c79356b 4099 else
9bccf70c 4100 rc = vm_fault_wire(map,
2d21ac55
A
4101 &tmp_entry, map->pmap,
4102 tmp_entry.vme_start);
0b4e3aa0
A
4103
4104 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 4105 thread_interrupt_level(interruptible_state);
0b4e3aa0 4106
1c79356b
A
4107 vm_map_lock(map);
4108
4109 if (last_timestamp+1 != map->timestamp) {
4110 /*
4111 * Find the entry again. It could have been clipped
4112 * after we unlocked the map.
4113 */
4114 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 4115 &first_entry))
1c79356b
A
4116 panic("vm_map_wire: re-lookup failed");
4117
4118 entry = first_entry;
4119 }
4120
4121 last_timestamp = map->timestamp;
4122
4123 while ((entry != vm_map_to_entry(map)) &&
4124 (entry->vme_start < tmp_entry.vme_end)) {
4125 assert(entry->in_transition);
4126 entry->in_transition = FALSE;
4127 if (entry->needs_wakeup) {
4128 entry->needs_wakeup = FALSE;
4129 need_wakeup = TRUE;
4130 }
4131 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 4132 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
4133 }
4134 entry = entry->vme_next;
4135 }
4136
4137 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 4138 goto done;
1c79356b 4139 }
2d21ac55
A
4140
4141 s = entry->vme_start;
1c79356b 4142 } /* end while loop through map entries */
2d21ac55
A
4143
4144done:
4145 if (rc == KERN_SUCCESS) {
4146 /* repair any damage we may have made to the VM map */
4147 vm_map_simplify_range(map, start, end);
4148 }
4149
1c79356b
A
4150 vm_map_unlock(map);
4151
4152 /*
4153 * wake up anybody waiting on entries we wired.
4154 */
4155 if (need_wakeup)
4156 vm_map_entry_wakeup(map);
4157
2d21ac55
A
4158 if (rc != KERN_SUCCESS) {
4159 /* undo what has been wired so far */
4160 vm_map_unwire(map, start, s, user_wire);
4161 }
4162
4163 return rc;
1c79356b
A
4164
4165}
4166
4167kern_return_t
4168vm_map_wire(
4169 register vm_map_t map,
91447636
A
4170 register vm_map_offset_t start,
4171 register vm_map_offset_t end,
1c79356b
A
4172 register vm_prot_t access_type,
4173 boolean_t user_wire)
4174{
4175
4176 kern_return_t kret;
4177
1c79356b 4178 kret = vm_map_wire_nested(map, start, end, access_type,
2d21ac55 4179 user_wire, (pmap_t)NULL, 0);
1c79356b
A
4180 return kret;
4181}
4182
4183/*
4184 * vm_map_unwire:
4185 *
4186 * Sets the pageability of the specified address range in the target
4187 * as pageable. Regions specified must have been wired previously.
4188 *
4189 * The map must not be locked, but a reference must remain to the map
4190 * throughout the call.
4191 *
4192 * Kernel will panic on failures. User unwire ignores holes and
4193 * unwired and intransition entries to avoid losing memory by leaving
4194 * it unwired.
4195 */
91447636 4196static kern_return_t
1c79356b
A
4197vm_map_unwire_nested(
4198 register vm_map_t map,
91447636
A
4199 register vm_map_offset_t start,
4200 register vm_map_offset_t end,
1c79356b 4201 boolean_t user_wire,
9bccf70c 4202 pmap_t map_pmap,
91447636 4203 vm_map_offset_t pmap_addr)
1c79356b
A
4204{
4205 register vm_map_entry_t entry;
4206 struct vm_map_entry *first_entry, tmp_entry;
4207 boolean_t need_wakeup;
4208 boolean_t main_map = FALSE;
4209 unsigned int last_timestamp;
4210
4211 vm_map_lock(map);
4212 if(map_pmap == NULL)
4213 main_map = TRUE;
4214 last_timestamp = map->timestamp;
4215
4216 VM_MAP_RANGE_CHECK(map, start, end);
4217 assert(page_aligned(start));
4218 assert(page_aligned(end));
4219
2d21ac55
A
4220 if (start == end) {
4221 /* We unwired what the caller asked for: zero pages */
4222 vm_map_unlock(map);
4223 return KERN_SUCCESS;
4224 }
4225
1c79356b
A
4226 if (vm_map_lookup_entry(map, start, &first_entry)) {
4227 entry = first_entry;
2d21ac55
A
4228 /*
4229 * vm_map_clip_start will be done later.
4230 * We don't want to unnest any nested sub maps here !
4231 */
1c79356b
A
4232 }
4233 else {
2d21ac55
A
4234 if (!user_wire) {
4235 panic("vm_map_unwire: start not found");
4236 }
1c79356b
A
4237 /* Start address is not in map. */
4238 vm_map_unlock(map);
4239 return(KERN_INVALID_ADDRESS);
4240 }
4241
b0d623f7
A
4242 if (entry->superpage_size) {
4243 /* superpages are always wired */
4244 vm_map_unlock(map);
4245 return KERN_INVALID_ADDRESS;
4246 }
4247
1c79356b
A
4248 need_wakeup = FALSE;
4249 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4250 if (entry->in_transition) {
4251 /*
4252 * 1)
4253 * Another thread is wiring down this entry. Note
4254 * that if it is not for the other thread we would
4255 * be unwiring an unwired entry. This is not
4256 * permitted. If we wait, we will be unwiring memory
4257 * we did not wire.
4258 *
4259 * 2)
4260 * Another thread is unwiring this entry. We did not
4261 * have a reference to it, because if we did, this
4262 * entry will not be getting unwired now.
4263 */
2d21ac55
A
4264 if (!user_wire) {
4265 /*
4266 * XXX FBDP
4267 * This could happen: there could be some
4268 * overlapping vslock/vsunlock operations
4269 * going on.
4270 * We should probably just wait and retry,
4271 * but then we have to be careful that this
4272 * entry could get "simplified" after
4273 * "in_transition" gets unset and before
4274 * we re-lookup the entry, so we would
4275 * have to re-clip the entry to avoid
4276 * re-unwiring what we have already unwired...
4277 * See vm_map_wire_nested().
4278 *
4279 * Or we could just ignore "in_transition"
4280 * here and proceed to decement the wired
4281 * count(s) on this entry. That should be fine
4282 * as long as "wired_count" doesn't drop all
4283 * the way to 0 (and we should panic if THAT
4284 * happens).
4285 */
1c79356b 4286 panic("vm_map_unwire: in_transition entry");
2d21ac55 4287 }
1c79356b
A
4288
4289 entry = entry->vme_next;
4290 continue;
4291 }
4292
2d21ac55 4293 if (entry->is_sub_map) {
91447636
A
4294 vm_map_offset_t sub_start;
4295 vm_map_offset_t sub_end;
4296 vm_map_offset_t local_end;
1c79356b 4297 pmap_t pmap;
2d21ac55 4298
1c79356b
A
4299 vm_map_clip_start(map, entry, start);
4300 vm_map_clip_end(map, entry, end);
4301
4302 sub_start = entry->offset;
4303 sub_end = entry->vme_end - entry->vme_start;
4304 sub_end += entry->offset;
4305 local_end = entry->vme_end;
4306 if(map_pmap == NULL) {
2d21ac55 4307 if(entry->use_pmap) {
1c79356b 4308 pmap = entry->object.sub_map->pmap;
9bccf70c 4309 pmap_addr = sub_start;
2d21ac55 4310 } else {
1c79356b 4311 pmap = map->pmap;
9bccf70c 4312 pmap_addr = start;
2d21ac55
A
4313 }
4314 if (entry->wired_count == 0 ||
4315 (user_wire && entry->user_wired_count == 0)) {
4316 if (!user_wire)
4317 panic("vm_map_unwire: entry is unwired");
4318 entry = entry->vme_next;
4319 continue;
4320 }
4321
4322 /*
4323 * Check for holes
4324 * Holes: Next entry should be contiguous unless
4325 * this is the end of the region.
4326 */
4327 if (((entry->vme_end < end) &&
4328 ((entry->vme_next == vm_map_to_entry(map)) ||
4329 (entry->vme_next->vme_start
4330 > entry->vme_end)))) {
4331 if (!user_wire)
4332 panic("vm_map_unwire: non-contiguous region");
1c79356b 4333/*
2d21ac55
A
4334 entry = entry->vme_next;
4335 continue;
1c79356b 4336*/
2d21ac55 4337 }
1c79356b 4338
2d21ac55 4339 subtract_wire_counts(map, entry, user_wire);
1c79356b 4340
2d21ac55
A
4341 if (entry->wired_count != 0) {
4342 entry = entry->vme_next;
4343 continue;
4344 }
1c79356b 4345
2d21ac55
A
4346 entry->in_transition = TRUE;
4347 tmp_entry = *entry;/* see comment in vm_map_wire() */
4348
4349 /*
4350 * We can unlock the map now. The in_transition state
4351 * guarantees existance of the entry.
4352 */
4353 vm_map_unlock(map);
4354 vm_map_unwire_nested(entry->object.sub_map,
4355 sub_start, sub_end, user_wire, pmap, pmap_addr);
4356 vm_map_lock(map);
1c79356b 4357
2d21ac55
A
4358 if (last_timestamp+1 != map->timestamp) {
4359 /*
4360 * Find the entry again. It could have been
4361 * clipped or deleted after we unlocked the map.
4362 */
4363 if (!vm_map_lookup_entry(map,
4364 tmp_entry.vme_start,
4365 &first_entry)) {
4366 if (!user_wire)
4367 panic("vm_map_unwire: re-lookup failed");
4368 entry = first_entry->vme_next;
4369 } else
4370 entry = first_entry;
4371 }
4372 last_timestamp = map->timestamp;
1c79356b 4373
1c79356b 4374 /*
2d21ac55
A
4375 * clear transition bit for all constituent entries
4376 * that were in the original entry (saved in
4377 * tmp_entry). Also check for waiters.
4378 */
4379 while ((entry != vm_map_to_entry(map)) &&
4380 (entry->vme_start < tmp_entry.vme_end)) {
4381 assert(entry->in_transition);
4382 entry->in_transition = FALSE;
4383 if (entry->needs_wakeup) {
4384 entry->needs_wakeup = FALSE;
4385 need_wakeup = TRUE;
4386 }
4387 entry = entry->vme_next;
1c79356b 4388 }
2d21ac55 4389 continue;
1c79356b 4390 } else {
2d21ac55
A
4391 vm_map_unlock(map);
4392 vm_map_unwire_nested(entry->object.sub_map,
4393 sub_start, sub_end, user_wire, map_pmap,
4394 pmap_addr);
4395 vm_map_lock(map);
1c79356b 4396
2d21ac55
A
4397 if (last_timestamp+1 != map->timestamp) {
4398 /*
4399 * Find the entry again. It could have been
4400 * clipped or deleted after we unlocked the map.
4401 */
4402 if (!vm_map_lookup_entry(map,
4403 tmp_entry.vme_start,
4404 &first_entry)) {
4405 if (!user_wire)
4406 panic("vm_map_unwire: re-lookup failed");
4407 entry = first_entry->vme_next;
4408 } else
4409 entry = first_entry;
4410 }
4411 last_timestamp = map->timestamp;
1c79356b
A
4412 }
4413 }
4414
4415
9bccf70c 4416 if ((entry->wired_count == 0) ||
2d21ac55 4417 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
4418 if (!user_wire)
4419 panic("vm_map_unwire: entry is unwired");
4420
4421 entry = entry->vme_next;
4422 continue;
4423 }
2d21ac55 4424
1c79356b 4425 assert(entry->wired_count > 0 &&
2d21ac55 4426 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
4427
4428 vm_map_clip_start(map, entry, start);
4429 vm_map_clip_end(map, entry, end);
4430
4431 /*
4432 * Check for holes
4433 * Holes: Next entry should be contiguous unless
4434 * this is the end of the region.
4435 */
4436 if (((entry->vme_end < end) &&
2d21ac55
A
4437 ((entry->vme_next == vm_map_to_entry(map)) ||
4438 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
4439
4440 if (!user_wire)
4441 panic("vm_map_unwire: non-contiguous region");
4442 entry = entry->vme_next;
4443 continue;
4444 }
4445
2d21ac55 4446 subtract_wire_counts(map, entry, user_wire);
1c79356b 4447
9bccf70c 4448 if (entry->wired_count != 0) {
1c79356b
A
4449 entry = entry->vme_next;
4450 continue;
1c79356b
A
4451 }
4452
b0d623f7
A
4453 if(entry->zero_wired_pages) {
4454 entry->zero_wired_pages = FALSE;
4455 }
4456
1c79356b
A
4457 entry->in_transition = TRUE;
4458 tmp_entry = *entry; /* see comment in vm_map_wire() */
4459
4460 /*
4461 * We can unlock the map now. The in_transition state
4462 * guarantees existance of the entry.
4463 */
4464 vm_map_unlock(map);
4465 if(map_pmap) {
9bccf70c 4466 vm_fault_unwire(map,
2d21ac55 4467 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 4468 } else {
9bccf70c 4469 vm_fault_unwire(map,
2d21ac55
A
4470 &tmp_entry, FALSE, map->pmap,
4471 tmp_entry.vme_start);
1c79356b
A
4472 }
4473 vm_map_lock(map);
4474
4475 if (last_timestamp+1 != map->timestamp) {
4476 /*
4477 * Find the entry again. It could have been clipped
4478 * or deleted after we unlocked the map.
4479 */
4480 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 4481 &first_entry)) {
1c79356b 4482 if (!user_wire)
2d21ac55 4483 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
4484 entry = first_entry->vme_next;
4485 } else
4486 entry = first_entry;
4487 }
4488 last_timestamp = map->timestamp;
4489
4490 /*
4491 * clear transition bit for all constituent entries that
4492 * were in the original entry (saved in tmp_entry). Also
4493 * check for waiters.
4494 */
4495 while ((entry != vm_map_to_entry(map)) &&
4496 (entry->vme_start < tmp_entry.vme_end)) {
4497 assert(entry->in_transition);
4498 entry->in_transition = FALSE;
4499 if (entry->needs_wakeup) {
4500 entry->needs_wakeup = FALSE;
4501 need_wakeup = TRUE;
4502 }
4503 entry = entry->vme_next;
4504 }
4505 }
91447636
A
4506
4507 /*
4508 * We might have fragmented the address space when we wired this
4509 * range of addresses. Attempt to re-coalesce these VM map entries
4510 * with their neighbors now that they're no longer wired.
4511 * Under some circumstances, address space fragmentation can
4512 * prevent VM object shadow chain collapsing, which can cause
4513 * swap space leaks.
4514 */
4515 vm_map_simplify_range(map, start, end);
4516
1c79356b
A
4517 vm_map_unlock(map);
4518 /*
4519 * wake up anybody waiting on entries that we have unwired.
4520 */
4521 if (need_wakeup)
4522 vm_map_entry_wakeup(map);
4523 return(KERN_SUCCESS);
4524
4525}
4526
4527kern_return_t
4528vm_map_unwire(
4529 register vm_map_t map,
91447636
A
4530 register vm_map_offset_t start,
4531 register vm_map_offset_t end,
1c79356b
A
4532 boolean_t user_wire)
4533{
9bccf70c 4534 return vm_map_unwire_nested(map, start, end,
2d21ac55 4535 user_wire, (pmap_t)NULL, 0);
1c79356b
A
4536}
4537
4538
4539/*
4540 * vm_map_entry_delete: [ internal use only ]
4541 *
4542 * Deallocate the given entry from the target map.
4543 */
91447636 4544static void
1c79356b
A
4545vm_map_entry_delete(
4546 register vm_map_t map,
4547 register vm_map_entry_t entry)
4548{
91447636 4549 register vm_map_offset_t s, e;
1c79356b
A
4550 register vm_object_t object;
4551 register vm_map_t submap;
1c79356b
A
4552
4553 s = entry->vme_start;
4554 e = entry->vme_end;
4555 assert(page_aligned(s));
4556 assert(page_aligned(e));
4557 assert(entry->wired_count == 0);
4558 assert(entry->user_wired_count == 0);
b0d623f7 4559 assert(!entry->permanent);
1c79356b
A
4560
4561 if (entry->is_sub_map) {
4562 object = NULL;
4563 submap = entry->object.sub_map;
4564 } else {
4565 submap = NULL;
4566 object = entry->object.vm_object;
4567 }
4568
6d2010ae 4569 vm_map_store_entry_unlink(map, entry);
1c79356b
A
4570 map->size -= e - s;
4571
4572 vm_map_entry_dispose(map, entry);
4573
4574 vm_map_unlock(map);
4575 /*
4576 * Deallocate the object only after removing all
4577 * pmap entries pointing to its pages.
4578 */
4579 if (submap)
4580 vm_map_deallocate(submap);
4581 else
2d21ac55 4582 vm_object_deallocate(object);
1c79356b
A
4583
4584}
4585
4586void
4587vm_map_submap_pmap_clean(
4588 vm_map_t map,
91447636
A
4589 vm_map_offset_t start,
4590 vm_map_offset_t end,
1c79356b 4591 vm_map_t sub_map,
91447636 4592 vm_map_offset_t offset)
1c79356b 4593{
91447636
A
4594 vm_map_offset_t submap_start;
4595 vm_map_offset_t submap_end;
4596 vm_map_size_t remove_size;
1c79356b
A
4597 vm_map_entry_t entry;
4598
4599 submap_end = offset + (end - start);
4600 submap_start = offset;
b7266188
A
4601
4602 vm_map_lock_read(sub_map);
1c79356b 4603 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
2d21ac55 4604
1c79356b
A
4605 remove_size = (entry->vme_end - entry->vme_start);
4606 if(offset > entry->vme_start)
4607 remove_size -= offset - entry->vme_start;
2d21ac55 4608
1c79356b
A
4609
4610 if(submap_end < entry->vme_end) {
4611 remove_size -=
4612 entry->vme_end - submap_end;
4613 }
4614 if(entry->is_sub_map) {
4615 vm_map_submap_pmap_clean(
4616 sub_map,
4617 start,
4618 start + remove_size,
4619 entry->object.sub_map,
4620 entry->offset);
4621 } else {
9bccf70c
A
4622
4623 if((map->mapped) && (map->ref_count)
2d21ac55 4624 && (entry->object.vm_object != NULL)) {
9bccf70c
A
4625 vm_object_pmap_protect(
4626 entry->object.vm_object,
6d2010ae 4627 entry->offset+(offset-entry->vme_start),
9bccf70c
A
4628 remove_size,
4629 PMAP_NULL,
4630 entry->vme_start,
4631 VM_PROT_NONE);
4632 } else {
4633 pmap_remove(map->pmap,
2d21ac55
A
4634 (addr64_t)start,
4635 (addr64_t)(start + remove_size));
9bccf70c 4636 }
1c79356b
A
4637 }
4638 }
4639
4640 entry = entry->vme_next;
2d21ac55 4641
1c79356b 4642 while((entry != vm_map_to_entry(sub_map))
2d21ac55 4643 && (entry->vme_start < submap_end)) {
1c79356b
A
4644 remove_size = (entry->vme_end - entry->vme_start);
4645 if(submap_end < entry->vme_end) {
4646 remove_size -= entry->vme_end - submap_end;
4647 }
4648 if(entry->is_sub_map) {
4649 vm_map_submap_pmap_clean(
4650 sub_map,
4651 (start + entry->vme_start) - offset,
4652 ((start + entry->vme_start) - offset) + remove_size,
4653 entry->object.sub_map,
4654 entry->offset);
4655 } else {
9bccf70c 4656 if((map->mapped) && (map->ref_count)
2d21ac55 4657 && (entry->object.vm_object != NULL)) {
9bccf70c
A
4658 vm_object_pmap_protect(
4659 entry->object.vm_object,
4660 entry->offset,
4661 remove_size,
4662 PMAP_NULL,
4663 entry->vme_start,
4664 VM_PROT_NONE);
4665 } else {
4666 pmap_remove(map->pmap,
2d21ac55
A
4667 (addr64_t)((start + entry->vme_start)
4668 - offset),
4669 (addr64_t)(((start + entry->vme_start)
4670 - offset) + remove_size));
9bccf70c 4671 }
1c79356b
A
4672 }
4673 entry = entry->vme_next;
b7266188
A
4674 }
4675 vm_map_unlock_read(sub_map);
1c79356b
A
4676 return;
4677}
4678
4679/*
4680 * vm_map_delete: [ internal use only ]
4681 *
4682 * Deallocates the given address range from the target map.
4683 * Removes all user wirings. Unwires one kernel wiring if
4684 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4685 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4686 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4687 *
4688 * This routine is called with map locked and leaves map locked.
4689 */
91447636 4690static kern_return_t
1c79356b 4691vm_map_delete(
91447636
A
4692 vm_map_t map,
4693 vm_map_offset_t start,
4694 vm_map_offset_t end,
4695 int flags,
4696 vm_map_t zap_map)
1c79356b
A
4697{
4698 vm_map_entry_t entry, next;
4699 struct vm_map_entry *first_entry, tmp_entry;
2d21ac55 4700 register vm_map_offset_t s;
1c79356b
A
4701 register vm_object_t object;
4702 boolean_t need_wakeup;
4703 unsigned int last_timestamp = ~0; /* unlikely value */
4704 int interruptible;
1c79356b
A
4705
4706 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 4707 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
4708
4709 /*
4710 * All our DMA I/O operations in IOKit are currently done by
4711 * wiring through the map entries of the task requesting the I/O.
4712 * Because of this, we must always wait for kernel wirings
4713 * to go away on the entries before deleting them.
4714 *
4715 * Any caller who wants to actually remove a kernel wiring
4716 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4717 * properly remove one wiring instead of blasting through
4718 * them all.
4719 */
4720 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4721
b0d623f7
A
4722 while(1) {
4723 /*
4724 * Find the start of the region, and clip it
4725 */
4726 if (vm_map_lookup_entry(map, start, &first_entry)) {
4727 entry = first_entry;
4728 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4729 start = SUPERPAGE_ROUND_DOWN(start);
4730 continue;
4731 }
4732 if (start == entry->vme_start) {
4733 /*
4734 * No need to clip. We don't want to cause
4735 * any unnecessary unnesting in this case...
4736 */
4737 } else {
4738 vm_map_clip_start(map, entry, start);
4739 }
4740
2d21ac55 4741 /*
b0d623f7
A
4742 * Fix the lookup hint now, rather than each
4743 * time through the loop.
2d21ac55 4744 */
b0d623f7 4745 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 4746 } else {
b0d623f7 4747 entry = first_entry->vme_next;
2d21ac55 4748 }
b0d623f7 4749 break;
1c79356b 4750 }
b0d623f7
A
4751 if (entry->superpage_size)
4752 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
4753
4754 need_wakeup = FALSE;
4755 /*
4756 * Step through all entries in this region
4757 */
2d21ac55
A
4758 s = entry->vme_start;
4759 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4760 /*
4761 * At this point, we have deleted all the memory entries
4762 * between "start" and "s". We still need to delete
4763 * all memory entries between "s" and "end".
4764 * While we were blocked and the map was unlocked, some
4765 * new memory entries could have been re-allocated between
4766 * "start" and "s" and we don't want to mess with those.
4767 * Some of those entries could even have been re-assembled
4768 * with an entry after "s" (in vm_map_simplify_entry()), so
4769 * we may have to vm_map_clip_start() again.
4770 */
1c79356b 4771
2d21ac55
A
4772 if (entry->vme_start >= s) {
4773 /*
4774 * This entry starts on or after "s"
4775 * so no need to clip its start.
4776 */
4777 } else {
4778 /*
4779 * This entry has been re-assembled by a
4780 * vm_map_simplify_entry(). We need to
4781 * re-clip its start.
4782 */
4783 vm_map_clip_start(map, entry, s);
4784 }
4785 if (entry->vme_end <= end) {
4786 /*
4787 * This entry is going away completely, so no need
4788 * to clip and possibly cause an unnecessary unnesting.
4789 */
4790 } else {
4791 vm_map_clip_end(map, entry, end);
4792 }
b0d623f7
A
4793
4794 if (entry->permanent) {
4795 panic("attempt to remove permanent VM map entry "
4796 "%p [0x%llx:0x%llx]\n",
4797 entry, (uint64_t) s, (uint64_t) end);
4798 }
4799
4800
1c79356b 4801 if (entry->in_transition) {
9bccf70c
A
4802 wait_result_t wait_result;
4803
1c79356b
A
4804 /*
4805 * Another thread is wiring/unwiring this entry.
4806 * Let the other thread know we are waiting.
4807 */
2d21ac55 4808 assert(s == entry->vme_start);
1c79356b
A
4809 entry->needs_wakeup = TRUE;
4810
4811 /*
4812 * wake up anybody waiting on entries that we have
4813 * already unwired/deleted.
4814 */
4815 if (need_wakeup) {
4816 vm_map_entry_wakeup(map);
4817 need_wakeup = FALSE;
4818 }
4819
9bccf70c 4820 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
4821
4822 if (interruptible &&
9bccf70c 4823 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
4824 /*
4825 * We do not clear the needs_wakeup flag,
4826 * since we cannot tell if we were the only one.
4827 */
9bccf70c 4828 vm_map_unlock(map);
1c79356b 4829 return KERN_ABORTED;
9bccf70c 4830 }
1c79356b
A
4831
4832 /*
4833 * The entry could have been clipped or it
4834 * may not exist anymore. Look it up again.
4835 */
4836 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4837 assert((map != kernel_map) &&
4838 (!entry->is_sub_map));
4839 /*
4840 * User: use the next entry
4841 */
4842 entry = first_entry->vme_next;
2d21ac55 4843 s = entry->vme_start;
1c79356b
A
4844 } else {
4845 entry = first_entry;
0c530ab8 4846 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 4847 }
9bccf70c 4848 last_timestamp = map->timestamp;
1c79356b
A
4849 continue;
4850 } /* end in_transition */
4851
4852 if (entry->wired_count) {
2d21ac55
A
4853 boolean_t user_wire;
4854
4855 user_wire = entry->user_wired_count > 0;
4856
1c79356b 4857 /*
b0d623f7 4858 * Remove a kernel wiring if requested
1c79356b 4859 */
b0d623f7 4860 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 4861 entry->wired_count--;
b0d623f7
A
4862 }
4863
4864 /*
4865 * Remove all user wirings for proper accounting
4866 */
4867 if (entry->user_wired_count > 0) {
4868 while (entry->user_wired_count)
4869 subtract_wire_counts(map, entry, user_wire);
4870 }
1c79356b
A
4871
4872 if (entry->wired_count != 0) {
2d21ac55 4873 assert(map != kernel_map);
1c79356b
A
4874 /*
4875 * Cannot continue. Typical case is when
4876 * a user thread has physical io pending on
4877 * on this page. Either wait for the
4878 * kernel wiring to go away or return an
4879 * error.
4880 */
4881 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 4882 wait_result_t wait_result;
1c79356b 4883
2d21ac55 4884 assert(s == entry->vme_start);
1c79356b 4885 entry->needs_wakeup = TRUE;
9bccf70c 4886 wait_result = vm_map_entry_wait(map,
2d21ac55 4887 interruptible);
1c79356b
A
4888
4889 if (interruptible &&
2d21ac55 4890 wait_result == THREAD_INTERRUPTED) {
1c79356b 4891 /*
2d21ac55 4892 * We do not clear the
1c79356b
A
4893 * needs_wakeup flag, since we
4894 * cannot tell if we were the
4895 * only one.
2d21ac55 4896 */
9bccf70c 4897 vm_map_unlock(map);
1c79356b 4898 return KERN_ABORTED;
9bccf70c 4899 }
1c79356b
A
4900
4901 /*
2d21ac55 4902 * The entry could have been clipped or
1c79356b
A
4903 * it may not exist anymore. Look it
4904 * up again.
2d21ac55 4905 */
1c79356b 4906 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
4907 &first_entry)) {
4908 assert(map != kernel_map);
1c79356b 4909 /*
2d21ac55
A
4910 * User: use the next entry
4911 */
1c79356b 4912 entry = first_entry->vme_next;
2d21ac55 4913 s = entry->vme_start;
1c79356b
A
4914 } else {
4915 entry = first_entry;
0c530ab8 4916 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 4917 }
9bccf70c 4918 last_timestamp = map->timestamp;
1c79356b
A
4919 continue;
4920 }
4921 else {
4922 return KERN_FAILURE;
4923 }
4924 }
4925
4926 entry->in_transition = TRUE;
4927 /*
4928 * copy current entry. see comment in vm_map_wire()
4929 */
4930 tmp_entry = *entry;
2d21ac55 4931 assert(s == entry->vme_start);
1c79356b
A
4932
4933 /*
4934 * We can unlock the map now. The in_transition
4935 * state guarentees existance of the entry.
4936 */
4937 vm_map_unlock(map);
2d21ac55
A
4938
4939 if (tmp_entry.is_sub_map) {
4940 vm_map_t sub_map;
4941 vm_map_offset_t sub_start, sub_end;
4942 pmap_t pmap;
4943 vm_map_offset_t pmap_addr;
4944
4945
4946 sub_map = tmp_entry.object.sub_map;
4947 sub_start = tmp_entry.offset;
4948 sub_end = sub_start + (tmp_entry.vme_end -
4949 tmp_entry.vme_start);
4950 if (tmp_entry.use_pmap) {
4951 pmap = sub_map->pmap;
4952 pmap_addr = tmp_entry.vme_start;
4953 } else {
4954 pmap = map->pmap;
4955 pmap_addr = tmp_entry.vme_start;
4956 }
4957 (void) vm_map_unwire_nested(sub_map,
4958 sub_start, sub_end,
4959 user_wire,
4960 pmap, pmap_addr);
4961 } else {
4962
4963 vm_fault_unwire(map, &tmp_entry,
4964 tmp_entry.object.vm_object == kernel_object,
4965 map->pmap, tmp_entry.vme_start);
4966 }
4967
1c79356b
A
4968 vm_map_lock(map);
4969
4970 if (last_timestamp+1 != map->timestamp) {
4971 /*
4972 * Find the entry again. It could have
4973 * been clipped after we unlocked the map.
4974 */
4975 if (!vm_map_lookup_entry(map, s, &first_entry)){
4976 assert((map != kernel_map) &&
2d21ac55 4977 (!entry->is_sub_map));
1c79356b 4978 first_entry = first_entry->vme_next;
2d21ac55 4979 s = first_entry->vme_start;
1c79356b 4980 } else {
0c530ab8 4981 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
4982 }
4983 } else {
0c530ab8 4984 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
4985 first_entry = entry;
4986 }
4987
4988 last_timestamp = map->timestamp;
4989
4990 entry = first_entry;
4991 while ((entry != vm_map_to_entry(map)) &&
4992 (entry->vme_start < tmp_entry.vme_end)) {
4993 assert(entry->in_transition);
4994 entry->in_transition = FALSE;
4995 if (entry->needs_wakeup) {
4996 entry->needs_wakeup = FALSE;
4997 need_wakeup = TRUE;
4998 }
4999 entry = entry->vme_next;
5000 }
5001 /*
5002 * We have unwired the entry(s). Go back and
5003 * delete them.
5004 */
5005 entry = first_entry;
5006 continue;
5007 }
5008
5009 /* entry is unwired */
5010 assert(entry->wired_count == 0);
5011 assert(entry->user_wired_count == 0);
5012
2d21ac55
A
5013 assert(s == entry->vme_start);
5014
5015 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5016 /*
5017 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5018 * vm_map_delete(), some map entries might have been
5019 * transferred to a "zap_map", which doesn't have a
5020 * pmap. The original pmap has already been flushed
5021 * in the vm_map_delete() call targeting the original
5022 * map, but when we get to destroying the "zap_map",
5023 * we don't have any pmap to flush, so let's just skip
5024 * all this.
5025 */
5026 } else if (entry->is_sub_map) {
5027 if (entry->use_pmap) {
0c530ab8
A
5028#ifndef NO_NESTED_PMAP
5029 pmap_unnest(map->pmap,
2d21ac55
A
5030 (addr64_t)entry->vme_start,
5031 entry->vme_end - entry->vme_start);
0c530ab8 5032#endif /* NO_NESTED_PMAP */
2d21ac55 5033 if ((map->mapped) && (map->ref_count)) {
9bccf70c
A
5034 /* clean up parent map/maps */
5035 vm_map_submap_pmap_clean(
5036 map, entry->vme_start,
5037 entry->vme_end,
5038 entry->object.sub_map,
5039 entry->offset);
5040 }
2d21ac55 5041 } else {
1c79356b
A
5042 vm_map_submap_pmap_clean(
5043 map, entry->vme_start, entry->vme_end,
5044 entry->object.sub_map,
5045 entry->offset);
2d21ac55
A
5046 }
5047 } else if (entry->object.vm_object != kernel_object) {
5048 object = entry->object.vm_object;
5049 if((map->mapped) && (map->ref_count)) {
5050 vm_object_pmap_protect(
55e303ae
A
5051 object, entry->offset,
5052 entry->vme_end - entry->vme_start,
5053 PMAP_NULL,
5054 entry->vme_start,
5055 VM_PROT_NONE);
2d21ac55
A
5056 } else {
5057 pmap_remove(map->pmap,
5058 (addr64_t)entry->vme_start,
5059 (addr64_t)entry->vme_end);
1c79356b
A
5060 }
5061 }
5062
91447636
A
5063 /*
5064 * All pmap mappings for this map entry must have been
5065 * cleared by now.
5066 */
5067 assert(vm_map_pmap_is_empty(map,
5068 entry->vme_start,
5069 entry->vme_end));
5070
1c79356b
A
5071 next = entry->vme_next;
5072 s = next->vme_start;
5073 last_timestamp = map->timestamp;
91447636
A
5074
5075 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5076 zap_map != VM_MAP_NULL) {
2d21ac55 5077 vm_map_size_t entry_size;
91447636
A
5078 /*
5079 * The caller wants to save the affected VM map entries
5080 * into the "zap_map". The caller will take care of
5081 * these entries.
5082 */
5083 /* unlink the entry from "map" ... */
6d2010ae 5084 vm_map_store_entry_unlink(map, entry);
91447636 5085 /* ... and add it to the end of the "zap_map" */
6d2010ae 5086 vm_map_store_entry_link(zap_map,
91447636
A
5087 vm_map_last_entry(zap_map),
5088 entry);
2d21ac55
A
5089 entry_size = entry->vme_end - entry->vme_start;
5090 map->size -= entry_size;
5091 zap_map->size += entry_size;
5092 /* we didn't unlock the map, so no timestamp increase */
5093 last_timestamp--;
91447636
A
5094 } else {
5095 vm_map_entry_delete(map, entry);
5096 /* vm_map_entry_delete unlocks the map */
5097 vm_map_lock(map);
5098 }
5099
1c79356b
A
5100 entry = next;
5101
5102 if(entry == vm_map_to_entry(map)) {
5103 break;
5104 }
5105 if (last_timestamp+1 != map->timestamp) {
5106 /*
5107 * we are responsible for deleting everything
5108 * from the give space, if someone has interfered
5109 * we pick up where we left off, back fills should
5110 * be all right for anyone except map_delete and
5111 * we have to assume that the task has been fully
5112 * disabled before we get here
5113 */
5114 if (!vm_map_lookup_entry(map, s, &entry)){
5115 entry = entry->vme_next;
2d21ac55 5116 s = entry->vme_start;
1c79356b 5117 } else {
2d21ac55 5118 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
5119 }
5120 /*
5121 * others can not only allocate behind us, we can
5122 * also see coalesce while we don't have the map lock
5123 */
5124 if(entry == vm_map_to_entry(map)) {
5125 break;
5126 }
1c79356b
A
5127 }
5128 last_timestamp = map->timestamp;
5129 }
5130
5131 if (map->wait_for_space)
5132 thread_wakeup((event_t) map);
5133 /*
5134 * wake up anybody waiting on entries that we have already deleted.
5135 */
5136 if (need_wakeup)
5137 vm_map_entry_wakeup(map);
5138
5139 return KERN_SUCCESS;
5140}
5141
5142/*
5143 * vm_map_remove:
5144 *
5145 * Remove the given address range from the target map.
5146 * This is the exported form of vm_map_delete.
5147 */
5148kern_return_t
5149vm_map_remove(
5150 register vm_map_t map,
91447636
A
5151 register vm_map_offset_t start,
5152 register vm_map_offset_t end,
1c79356b
A
5153 register boolean_t flags)
5154{
5155 register kern_return_t result;
9bccf70c 5156
1c79356b
A
5157 vm_map_lock(map);
5158 VM_MAP_RANGE_CHECK(map, start, end);
91447636 5159 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 5160 vm_map_unlock(map);
91447636 5161
1c79356b
A
5162 return(result);
5163}
5164
5165
1c79356b
A
5166/*
5167 * Routine: vm_map_copy_discard
5168 *
5169 * Description:
5170 * Dispose of a map copy object (returned by
5171 * vm_map_copyin).
5172 */
5173void
5174vm_map_copy_discard(
5175 vm_map_copy_t copy)
5176{
1c79356b
A
5177 if (copy == VM_MAP_COPY_NULL)
5178 return;
5179
5180 switch (copy->type) {
5181 case VM_MAP_COPY_ENTRY_LIST:
5182 while (vm_map_copy_first_entry(copy) !=
2d21ac55 5183 vm_map_copy_to_entry(copy)) {
1c79356b
A
5184 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5185
5186 vm_map_copy_entry_unlink(copy, entry);
5187 vm_object_deallocate(entry->object.vm_object);
5188 vm_map_copy_entry_dispose(copy, entry);
5189 }
5190 break;
5191 case VM_MAP_COPY_OBJECT:
5192 vm_object_deallocate(copy->cpy_object);
5193 break;
1c79356b
A
5194 case VM_MAP_COPY_KERNEL_BUFFER:
5195
5196 /*
5197 * The vm_map_copy_t and possibly the data buffer were
5198 * allocated by a single call to kalloc(), i.e. the
5199 * vm_map_copy_t was not allocated out of the zone.
5200 */
91447636 5201 kfree(copy, copy->cpy_kalloc_size);
1c79356b
A
5202 return;
5203 }
91447636 5204 zfree(vm_map_copy_zone, copy);
1c79356b
A
5205}
5206
5207/*
5208 * Routine: vm_map_copy_copy
5209 *
5210 * Description:
5211 * Move the information in a map copy object to
5212 * a new map copy object, leaving the old one
5213 * empty.
5214 *
5215 * This is used by kernel routines that need
5216 * to look at out-of-line data (in copyin form)
5217 * before deciding whether to return SUCCESS.
5218 * If the routine returns FAILURE, the original
5219 * copy object will be deallocated; therefore,
5220 * these routines must make a copy of the copy
5221 * object and leave the original empty so that
5222 * deallocation will not fail.
5223 */
5224vm_map_copy_t
5225vm_map_copy_copy(
5226 vm_map_copy_t copy)
5227{
5228 vm_map_copy_t new_copy;
5229
5230 if (copy == VM_MAP_COPY_NULL)
5231 return VM_MAP_COPY_NULL;
5232
5233 /*
5234 * Allocate a new copy object, and copy the information
5235 * from the old one into it.
5236 */
5237
5238 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5239 *new_copy = *copy;
5240
5241 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5242 /*
5243 * The links in the entry chain must be
5244 * changed to point to the new copy object.
5245 */
5246 vm_map_copy_first_entry(copy)->vme_prev
5247 = vm_map_copy_to_entry(new_copy);
5248 vm_map_copy_last_entry(copy)->vme_next
5249 = vm_map_copy_to_entry(new_copy);
5250 }
5251
5252 /*
5253 * Change the old copy object into one that contains
5254 * nothing to be deallocated.
5255 */
5256 copy->type = VM_MAP_COPY_OBJECT;
5257 copy->cpy_object = VM_OBJECT_NULL;
5258
5259 /*
5260 * Return the new object.
5261 */
5262 return new_copy;
5263}
5264
91447636 5265static kern_return_t
1c79356b
A
5266vm_map_overwrite_submap_recurse(
5267 vm_map_t dst_map,
91447636
A
5268 vm_map_offset_t dst_addr,
5269 vm_map_size_t dst_size)
1c79356b 5270{
91447636 5271 vm_map_offset_t dst_end;
1c79356b
A
5272 vm_map_entry_t tmp_entry;
5273 vm_map_entry_t entry;
5274 kern_return_t result;
5275 boolean_t encountered_sub_map = FALSE;
5276
5277
5278
5279 /*
5280 * Verify that the destination is all writeable
5281 * initially. We have to trunc the destination
5282 * address and round the copy size or we'll end up
5283 * splitting entries in strange ways.
5284 */
5285
91447636 5286 dst_end = vm_map_round_page(dst_addr + dst_size);
9bccf70c 5287 vm_map_lock(dst_map);
1c79356b
A
5288
5289start_pass_1:
1c79356b
A
5290 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5291 vm_map_unlock(dst_map);
5292 return(KERN_INVALID_ADDRESS);
5293 }
5294
91447636 5295 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
2d21ac55 5296 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
1c79356b
A
5297
5298 for (entry = tmp_entry;;) {
5299 vm_map_entry_t next;
5300
5301 next = entry->vme_next;
5302 while(entry->is_sub_map) {
91447636
A
5303 vm_map_offset_t sub_start;
5304 vm_map_offset_t sub_end;
5305 vm_map_offset_t local_end;
1c79356b
A
5306
5307 if (entry->in_transition) {
2d21ac55
A
5308 /*
5309 * Say that we are waiting, and wait for entry.
5310 */
1c79356b
A
5311 entry->needs_wakeup = TRUE;
5312 vm_map_entry_wait(dst_map, THREAD_UNINT);
5313
5314 goto start_pass_1;
5315 }
5316
5317 encountered_sub_map = TRUE;
5318 sub_start = entry->offset;
5319
5320 if(entry->vme_end < dst_end)
5321 sub_end = entry->vme_end;
5322 else
5323 sub_end = dst_end;
5324 sub_end -= entry->vme_start;
5325 sub_end += entry->offset;
5326 local_end = entry->vme_end;
5327 vm_map_unlock(dst_map);
5328
5329 result = vm_map_overwrite_submap_recurse(
2d21ac55
A
5330 entry->object.sub_map,
5331 sub_start,
5332 sub_end - sub_start);
1c79356b
A
5333
5334 if(result != KERN_SUCCESS)
5335 return result;
5336 if (dst_end <= entry->vme_end)
5337 return KERN_SUCCESS;
5338 vm_map_lock(dst_map);
5339 if(!vm_map_lookup_entry(dst_map, local_end,
5340 &tmp_entry)) {
5341 vm_map_unlock(dst_map);
5342 return(KERN_INVALID_ADDRESS);
5343 }
5344 entry = tmp_entry;
5345 next = entry->vme_next;
5346 }
5347
5348 if ( ! (entry->protection & VM_PROT_WRITE)) {
5349 vm_map_unlock(dst_map);
5350 return(KERN_PROTECTION_FAILURE);
5351 }
5352
5353 /*
5354 * If the entry is in transition, we must wait
5355 * for it to exit that state. Anything could happen
5356 * when we unlock the map, so start over.
5357 */
5358 if (entry->in_transition) {
5359
5360 /*
5361 * Say that we are waiting, and wait for entry.
5362 */
5363 entry->needs_wakeup = TRUE;
5364 vm_map_entry_wait(dst_map, THREAD_UNINT);
5365
5366 goto start_pass_1;
5367 }
5368
5369/*
5370 * our range is contained completely within this map entry
5371 */
5372 if (dst_end <= entry->vme_end) {
5373 vm_map_unlock(dst_map);
5374 return KERN_SUCCESS;
5375 }
5376/*
5377 * check that range specified is contiguous region
5378 */
5379 if ((next == vm_map_to_entry(dst_map)) ||
5380 (next->vme_start != entry->vme_end)) {
5381 vm_map_unlock(dst_map);
5382 return(KERN_INVALID_ADDRESS);
5383 }
5384
5385 /*
5386 * Check for permanent objects in the destination.
5387 */
5388 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
2d21ac55
A
5389 ((!entry->object.vm_object->internal) ||
5390 (entry->object.vm_object->true_share))) {
1c79356b
A
5391 if(encountered_sub_map) {
5392 vm_map_unlock(dst_map);
5393 return(KERN_FAILURE);
5394 }
5395 }
5396
5397
5398 entry = next;
5399 }/* for */
5400 vm_map_unlock(dst_map);
5401 return(KERN_SUCCESS);
5402}
5403
5404/*
5405 * Routine: vm_map_copy_overwrite
5406 *
5407 * Description:
5408 * Copy the memory described by the map copy
5409 * object (copy; returned by vm_map_copyin) onto
5410 * the specified destination region (dst_map, dst_addr).
5411 * The destination must be writeable.
5412 *
5413 * Unlike vm_map_copyout, this routine actually
5414 * writes over previously-mapped memory. If the
5415 * previous mapping was to a permanent (user-supplied)
5416 * memory object, it is preserved.
5417 *
5418 * The attributes (protection and inheritance) of the
5419 * destination region are preserved.
5420 *
5421 * If successful, consumes the copy object.
5422 * Otherwise, the caller is responsible for it.
5423 *
5424 * Implementation notes:
5425 * To overwrite aligned temporary virtual memory, it is
5426 * sufficient to remove the previous mapping and insert
5427 * the new copy. This replacement is done either on
5428 * the whole region (if no permanent virtual memory
5429 * objects are embedded in the destination region) or
5430 * in individual map entries.
5431 *
5432 * To overwrite permanent virtual memory , it is necessary
5433 * to copy each page, as the external memory management
5434 * interface currently does not provide any optimizations.
5435 *
5436 * Unaligned memory also has to be copied. It is possible
5437 * to use 'vm_trickery' to copy the aligned data. This is
5438 * not done but not hard to implement.
5439 *
5440 * Once a page of permanent memory has been overwritten,
5441 * it is impossible to interrupt this function; otherwise,
5442 * the call would be neither atomic nor location-independent.
5443 * The kernel-state portion of a user thread must be
5444 * interruptible.
5445 *
5446 * It may be expensive to forward all requests that might
5447 * overwrite permanent memory (vm_write, vm_copy) to
5448 * uninterruptible kernel threads. This routine may be
5449 * called by interruptible threads; however, success is
5450 * not guaranteed -- if the request cannot be performed
5451 * atomically and interruptibly, an error indication is
5452 * returned.
5453 */
5454
91447636 5455static kern_return_t
1c79356b 5456vm_map_copy_overwrite_nested(
91447636
A
5457 vm_map_t dst_map,
5458 vm_map_address_t dst_addr,
5459 vm_map_copy_t copy,
5460 boolean_t interruptible,
6d2010ae
A
5461 pmap_t pmap,
5462 boolean_t discard_on_success)
1c79356b 5463{
91447636
A
5464 vm_map_offset_t dst_end;
5465 vm_map_entry_t tmp_entry;
5466 vm_map_entry_t entry;
5467 kern_return_t kr;
5468 boolean_t aligned = TRUE;
5469 boolean_t contains_permanent_objects = FALSE;
5470 boolean_t encountered_sub_map = FALSE;
5471 vm_map_offset_t base_addr;
5472 vm_map_size_t copy_size;
5473 vm_map_size_t total_size;
1c79356b
A
5474
5475
5476 /*
5477 * Check for null copy object.
5478 */
5479
5480 if (copy == VM_MAP_COPY_NULL)
5481 return(KERN_SUCCESS);
5482
5483 /*
5484 * Check for special kernel buffer allocated
5485 * by new_ipc_kmsg_copyin.
5486 */
5487
5488 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 5489 return(vm_map_copyout_kernel_buffer(
2d21ac55
A
5490 dst_map, &dst_addr,
5491 copy, TRUE));
1c79356b
A
5492 }
5493
5494 /*
5495 * Only works for entry lists at the moment. Will
5496 * support page lists later.
5497 */
5498
5499 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5500
5501 if (copy->size == 0) {
6d2010ae
A
5502 if (discard_on_success)
5503 vm_map_copy_discard(copy);
1c79356b
A
5504 return(KERN_SUCCESS);
5505 }
5506
5507 /*
5508 * Verify that the destination is all writeable
5509 * initially. We have to trunc the destination
5510 * address and round the copy size or we'll end up
5511 * splitting entries in strange ways.
5512 */
5513
5514 if (!page_aligned(copy->size) ||
2d21ac55
A
5515 !page_aligned (copy->offset) ||
5516 !page_aligned (dst_addr))
1c79356b
A
5517 {
5518 aligned = FALSE;
91447636 5519 dst_end = vm_map_round_page(dst_addr + copy->size);
1c79356b
A
5520 } else {
5521 dst_end = dst_addr + copy->size;
5522 }
5523
1c79356b 5524 vm_map_lock(dst_map);
9bccf70c 5525
91447636
A
5526 /* LP64todo - remove this check when vm_map_commpage64()
5527 * no longer has to stuff in a map_entry for the commpage
5528 * above the map's max_offset.
5529 */
5530 if (dst_addr >= dst_map->max_offset) {
5531 vm_map_unlock(dst_map);
5532 return(KERN_INVALID_ADDRESS);
5533 }
5534
9bccf70c 5535start_pass_1:
1c79356b
A
5536 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5537 vm_map_unlock(dst_map);
5538 return(KERN_INVALID_ADDRESS);
5539 }
91447636 5540 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
1c79356b
A
5541 for (entry = tmp_entry;;) {
5542 vm_map_entry_t next = entry->vme_next;
5543
5544 while(entry->is_sub_map) {
91447636
A
5545 vm_map_offset_t sub_start;
5546 vm_map_offset_t sub_end;
5547 vm_map_offset_t local_end;
1c79356b
A
5548
5549 if (entry->in_transition) {
5550
2d21ac55
A
5551 /*
5552 * Say that we are waiting, and wait for entry.
5553 */
1c79356b
A
5554 entry->needs_wakeup = TRUE;
5555 vm_map_entry_wait(dst_map, THREAD_UNINT);
5556
5557 goto start_pass_1;
5558 }
5559
5560 local_end = entry->vme_end;
5561 if (!(entry->needs_copy)) {
5562 /* if needs_copy we are a COW submap */
5563 /* in such a case we just replace so */
5564 /* there is no need for the follow- */
5565 /* ing check. */
5566 encountered_sub_map = TRUE;
5567 sub_start = entry->offset;
5568
5569 if(entry->vme_end < dst_end)
5570 sub_end = entry->vme_end;
5571 else
5572 sub_end = dst_end;
5573 sub_end -= entry->vme_start;
5574 sub_end += entry->offset;
5575 vm_map_unlock(dst_map);
5576
5577 kr = vm_map_overwrite_submap_recurse(
5578 entry->object.sub_map,
5579 sub_start,
5580 sub_end - sub_start);
5581 if(kr != KERN_SUCCESS)
5582 return kr;
5583 vm_map_lock(dst_map);
5584 }
5585
5586 if (dst_end <= entry->vme_end)
5587 goto start_overwrite;
5588 if(!vm_map_lookup_entry(dst_map, local_end,
5589 &entry)) {
5590 vm_map_unlock(dst_map);
5591 return(KERN_INVALID_ADDRESS);
5592 }
5593 next = entry->vme_next;
5594 }
5595
5596 if ( ! (entry->protection & VM_PROT_WRITE)) {
5597 vm_map_unlock(dst_map);
5598 return(KERN_PROTECTION_FAILURE);
5599 }
5600
5601 /*
5602 * If the entry is in transition, we must wait
5603 * for it to exit that state. Anything could happen
5604 * when we unlock the map, so start over.
5605 */
5606 if (entry->in_transition) {
5607
5608 /*
5609 * Say that we are waiting, and wait for entry.
5610 */
5611 entry->needs_wakeup = TRUE;
5612 vm_map_entry_wait(dst_map, THREAD_UNINT);
5613
5614 goto start_pass_1;
5615 }
5616
5617/*
5618 * our range is contained completely within this map entry
5619 */
5620 if (dst_end <= entry->vme_end)
5621 break;
5622/*
5623 * check that range specified is contiguous region
5624 */
5625 if ((next == vm_map_to_entry(dst_map)) ||
5626 (next->vme_start != entry->vme_end)) {
5627 vm_map_unlock(dst_map);
5628 return(KERN_INVALID_ADDRESS);
5629 }
5630
5631
5632 /*
5633 * Check for permanent objects in the destination.
5634 */
5635 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
2d21ac55
A
5636 ((!entry->object.vm_object->internal) ||
5637 (entry->object.vm_object->true_share))) {
1c79356b
A
5638 contains_permanent_objects = TRUE;
5639 }
5640
5641 entry = next;
5642 }/* for */
5643
5644start_overwrite:
5645 /*
5646 * If there are permanent objects in the destination, then
5647 * the copy cannot be interrupted.
5648 */
5649
5650 if (interruptible && contains_permanent_objects) {
5651 vm_map_unlock(dst_map);
5652 return(KERN_FAILURE); /* XXX */
5653 }
5654
5655 /*
5656 *
5657 * Make a second pass, overwriting the data
5658 * At the beginning of each loop iteration,
5659 * the next entry to be overwritten is "tmp_entry"
5660 * (initially, the value returned from the lookup above),
5661 * and the starting address expected in that entry
5662 * is "start".
5663 */
5664
5665 total_size = copy->size;
5666 if(encountered_sub_map) {
5667 copy_size = 0;
5668 /* re-calculate tmp_entry since we've had the map */
5669 /* unlocked */
5670 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5671 vm_map_unlock(dst_map);
5672 return(KERN_INVALID_ADDRESS);
5673 }
5674 } else {
5675 copy_size = copy->size;
5676 }
5677
5678 base_addr = dst_addr;
5679 while(TRUE) {
5680 /* deconstruct the copy object and do in parts */
5681 /* only in sub_map, interruptable case */
5682 vm_map_entry_t copy_entry;
91447636
A
5683 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5684 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 5685 int nentries;
91447636 5686 int remaining_entries = 0;
b0d623f7 5687 vm_map_offset_t new_offset = 0;
1c79356b
A
5688
5689 for (entry = tmp_entry; copy_size == 0;) {
5690 vm_map_entry_t next;
5691
5692 next = entry->vme_next;
5693
5694 /* tmp_entry and base address are moved along */
5695 /* each time we encounter a sub-map. Otherwise */
5696 /* entry can outpase tmp_entry, and the copy_size */
5697 /* may reflect the distance between them */
5698 /* if the current entry is found to be in transition */
5699 /* we will start over at the beginning or the last */
5700 /* encounter of a submap as dictated by base_addr */
5701 /* we will zero copy_size accordingly. */
5702 if (entry->in_transition) {
5703 /*
5704 * Say that we are waiting, and wait for entry.
5705 */
5706 entry->needs_wakeup = TRUE;
5707 vm_map_entry_wait(dst_map, THREAD_UNINT);
5708
1c79356b 5709 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 5710 &tmp_entry)) {
1c79356b
A
5711 vm_map_unlock(dst_map);
5712 return(KERN_INVALID_ADDRESS);
5713 }
5714 copy_size = 0;
5715 entry = tmp_entry;
5716 continue;
5717 }
5718 if(entry->is_sub_map) {
91447636
A
5719 vm_map_offset_t sub_start;
5720 vm_map_offset_t sub_end;
5721 vm_map_offset_t local_end;
1c79356b
A
5722
5723 if (entry->needs_copy) {
5724 /* if this is a COW submap */
5725 /* just back the range with a */
5726 /* anonymous entry */
5727 if(entry->vme_end < dst_end)
5728 sub_end = entry->vme_end;
5729 else
5730 sub_end = dst_end;
5731 if(entry->vme_start < base_addr)
5732 sub_start = base_addr;
5733 else
5734 sub_start = entry->vme_start;
5735 vm_map_clip_end(
5736 dst_map, entry, sub_end);
5737 vm_map_clip_start(
5738 dst_map, entry, sub_start);
2d21ac55 5739 assert(!entry->use_pmap);
1c79356b
A
5740 entry->is_sub_map = FALSE;
5741 vm_map_deallocate(
5742 entry->object.sub_map);
5743 entry->object.sub_map = NULL;
5744 entry->is_shared = FALSE;
5745 entry->needs_copy = FALSE;
5746 entry->offset = 0;
2d21ac55
A
5747 /*
5748 * XXX FBDP
5749 * We should propagate the protections
5750 * of the submap entry here instead
5751 * of forcing them to VM_PROT_ALL...
5752 * Or better yet, we should inherit
5753 * the protection of the copy_entry.
5754 */
1c79356b
A
5755 entry->protection = VM_PROT_ALL;
5756 entry->max_protection = VM_PROT_ALL;
5757 entry->wired_count = 0;
5758 entry->user_wired_count = 0;
5759 if(entry->inheritance
2d21ac55
A
5760 == VM_INHERIT_SHARE)
5761 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
5762 continue;
5763 }
5764 /* first take care of any non-sub_map */
5765 /* entries to send */
5766 if(base_addr < entry->vme_start) {
5767 /* stuff to send */
5768 copy_size =
5769 entry->vme_start - base_addr;
5770 break;
5771 }
5772 sub_start = entry->offset;
5773
5774 if(entry->vme_end < dst_end)
5775 sub_end = entry->vme_end;
5776 else
5777 sub_end = dst_end;
5778 sub_end -= entry->vme_start;
5779 sub_end += entry->offset;
5780 local_end = entry->vme_end;
5781 vm_map_unlock(dst_map);
5782 copy_size = sub_end - sub_start;
5783
5784 /* adjust the copy object */
5785 if (total_size > copy_size) {
91447636
A
5786 vm_map_size_t local_size = 0;
5787 vm_map_size_t entry_size;
1c79356b 5788
2d21ac55
A
5789 nentries = 1;
5790 new_offset = copy->offset;
5791 copy_entry = vm_map_copy_first_entry(copy);
5792 while(copy_entry !=
5793 vm_map_copy_to_entry(copy)){
5794 entry_size = copy_entry->vme_end -
5795 copy_entry->vme_start;
5796 if((local_size < copy_size) &&
5797 ((local_size + entry_size)
5798 >= copy_size)) {
5799 vm_map_copy_clip_end(copy,
5800 copy_entry,
5801 copy_entry->vme_start +
5802 (copy_size - local_size));
5803 entry_size = copy_entry->vme_end -
5804 copy_entry->vme_start;
5805 local_size += entry_size;
5806 new_offset += entry_size;
5807 }
5808 if(local_size >= copy_size) {
5809 next_copy = copy_entry->vme_next;
5810 copy_entry->vme_next =
5811 vm_map_copy_to_entry(copy);
5812 previous_prev =
5813 copy->cpy_hdr.links.prev;
5814 copy->cpy_hdr.links.prev = copy_entry;
5815 copy->size = copy_size;
5816 remaining_entries =
5817 copy->cpy_hdr.nentries;
5818 remaining_entries -= nentries;
5819 copy->cpy_hdr.nentries = nentries;
5820 break;
5821 } else {
5822 local_size += entry_size;
5823 new_offset += entry_size;
5824 nentries++;
5825 }
5826 copy_entry = copy_entry->vme_next;
5827 }
1c79356b
A
5828 }
5829
5830 if((entry->use_pmap) && (pmap == NULL)) {
5831 kr = vm_map_copy_overwrite_nested(
5832 entry->object.sub_map,
5833 sub_start,
5834 copy,
5835 interruptible,
6d2010ae
A
5836 entry->object.sub_map->pmap,
5837 TRUE);
1c79356b
A
5838 } else if (pmap != NULL) {
5839 kr = vm_map_copy_overwrite_nested(
5840 entry->object.sub_map,
5841 sub_start,
5842 copy,
6d2010ae
A
5843 interruptible, pmap,
5844 TRUE);
1c79356b
A
5845 } else {
5846 kr = vm_map_copy_overwrite_nested(
5847 entry->object.sub_map,
5848 sub_start,
5849 copy,
5850 interruptible,
6d2010ae
A
5851 dst_map->pmap,
5852 TRUE);
1c79356b
A
5853 }
5854 if(kr != KERN_SUCCESS) {
5855 if(next_copy != NULL) {
2d21ac55
A
5856 copy->cpy_hdr.nentries +=
5857 remaining_entries;
5858 copy->cpy_hdr.links.prev->vme_next =
5859 next_copy;
5860 copy->cpy_hdr.links.prev
5861 = previous_prev;
5862 copy->size = total_size;
1c79356b
A
5863 }
5864 return kr;
5865 }
5866 if (dst_end <= local_end) {
5867 return(KERN_SUCCESS);
5868 }
5869 /* otherwise copy no longer exists, it was */
5870 /* destroyed after successful copy_overwrite */
5871 copy = (vm_map_copy_t)
2d21ac55 5872 zalloc(vm_map_copy_zone);
1c79356b 5873 vm_map_copy_first_entry(copy) =
2d21ac55
A
5874 vm_map_copy_last_entry(copy) =
5875 vm_map_copy_to_entry(copy);
1c79356b
A
5876 copy->type = VM_MAP_COPY_ENTRY_LIST;
5877 copy->offset = new_offset;
5878
5879 total_size -= copy_size;
5880 copy_size = 0;
5881 /* put back remainder of copy in container */
5882 if(next_copy != NULL) {
2d21ac55
A
5883 copy->cpy_hdr.nentries = remaining_entries;
5884 copy->cpy_hdr.links.next = next_copy;
5885 copy->cpy_hdr.links.prev = previous_prev;
5886 copy->size = total_size;
5887 next_copy->vme_prev =
5888 vm_map_copy_to_entry(copy);
5889 next_copy = NULL;
1c79356b
A
5890 }
5891 base_addr = local_end;
5892 vm_map_lock(dst_map);
5893 if(!vm_map_lookup_entry(dst_map,
2d21ac55 5894 local_end, &tmp_entry)) {
1c79356b
A
5895 vm_map_unlock(dst_map);
5896 return(KERN_INVALID_ADDRESS);
5897 }
5898 entry = tmp_entry;
5899 continue;
5900 }
5901 if (dst_end <= entry->vme_end) {
5902 copy_size = dst_end - base_addr;
5903 break;
5904 }
5905
5906 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 5907 (next->vme_start != entry->vme_end)) {
1c79356b
A
5908 vm_map_unlock(dst_map);
5909 return(KERN_INVALID_ADDRESS);
5910 }
5911
5912 entry = next;
5913 }/* for */
5914
5915 next_copy = NULL;
5916 nentries = 1;
5917
5918 /* adjust the copy object */
5919 if (total_size > copy_size) {
91447636
A
5920 vm_map_size_t local_size = 0;
5921 vm_map_size_t entry_size;
1c79356b
A
5922
5923 new_offset = copy->offset;
5924 copy_entry = vm_map_copy_first_entry(copy);
5925 while(copy_entry != vm_map_copy_to_entry(copy)) {
5926 entry_size = copy_entry->vme_end -
2d21ac55 5927 copy_entry->vme_start;
1c79356b 5928 if((local_size < copy_size) &&
2d21ac55
A
5929 ((local_size + entry_size)
5930 >= copy_size)) {
1c79356b 5931 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
5932 copy_entry->vme_start +
5933 (copy_size - local_size));
1c79356b 5934 entry_size = copy_entry->vme_end -
2d21ac55 5935 copy_entry->vme_start;
1c79356b
A
5936 local_size += entry_size;
5937 new_offset += entry_size;
5938 }
5939 if(local_size >= copy_size) {
5940 next_copy = copy_entry->vme_next;
5941 copy_entry->vme_next =
5942 vm_map_copy_to_entry(copy);
5943 previous_prev =
5944 copy->cpy_hdr.links.prev;
5945 copy->cpy_hdr.links.prev = copy_entry;
5946 copy->size = copy_size;
5947 remaining_entries =
5948 copy->cpy_hdr.nentries;
5949 remaining_entries -= nentries;
5950 copy->cpy_hdr.nentries = nentries;
5951 break;
5952 } else {
5953 local_size += entry_size;
5954 new_offset += entry_size;
5955 nentries++;
5956 }
5957 copy_entry = copy_entry->vme_next;
5958 }
5959 }
5960
5961 if (aligned) {
5962 pmap_t local_pmap;
5963
5964 if(pmap)
5965 local_pmap = pmap;
5966 else
5967 local_pmap = dst_map->pmap;
5968
5969 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
5970 dst_map, tmp_entry, copy,
5971 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b
A
5972 if(next_copy != NULL) {
5973 copy->cpy_hdr.nentries +=
2d21ac55 5974 remaining_entries;
1c79356b 5975 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 5976 next_copy;
1c79356b 5977 copy->cpy_hdr.links.prev =
2d21ac55 5978 previous_prev;
1c79356b
A
5979 copy->size += copy_size;
5980 }
5981 return kr;
5982 }
5983 vm_map_unlock(dst_map);
5984 } else {
2d21ac55
A
5985 /*
5986 * Performance gain:
5987 *
5988 * if the copy and dst address are misaligned but the same
5989 * offset within the page we can copy_not_aligned the
5990 * misaligned parts and copy aligned the rest. If they are
5991 * aligned but len is unaligned we simply need to copy
5992 * the end bit unaligned. We'll need to split the misaligned
5993 * bits of the region in this case !
5994 */
5995 /* ALWAYS UNLOCKS THE dst_map MAP */
1c79356b 5996 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
2d21ac55 5997 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
1c79356b
A
5998 if(next_copy != NULL) {
5999 copy->cpy_hdr.nentries +=
2d21ac55 6000 remaining_entries;
1c79356b 6001 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 6002 next_copy;
1c79356b
A
6003 copy->cpy_hdr.links.prev =
6004 previous_prev;
6005 copy->size += copy_size;
6006 }
6007 return kr;
6008 }
6009 }
6010 total_size -= copy_size;
6011 if(total_size == 0)
6012 break;
6013 base_addr += copy_size;
6014 copy_size = 0;
6015 copy->offset = new_offset;
6016 if(next_copy != NULL) {
6017 copy->cpy_hdr.nentries = remaining_entries;
6018 copy->cpy_hdr.links.next = next_copy;
6019 copy->cpy_hdr.links.prev = previous_prev;
6020 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6021 copy->size = total_size;
6022 }
6023 vm_map_lock(dst_map);
6024 while(TRUE) {
6025 if (!vm_map_lookup_entry(dst_map,
2d21ac55 6026 base_addr, &tmp_entry)) {
1c79356b
A
6027 vm_map_unlock(dst_map);
6028 return(KERN_INVALID_ADDRESS);
6029 }
6030 if (tmp_entry->in_transition) {
6031 entry->needs_wakeup = TRUE;
6032 vm_map_entry_wait(dst_map, THREAD_UNINT);
6033 } else {
6034 break;
6035 }
6036 }
91447636 6037 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
1c79356b
A
6038
6039 entry = tmp_entry;
6040 } /* while */
6041
6042 /*
6043 * Throw away the vm_map_copy object
6044 */
6d2010ae
A
6045 if (discard_on_success)
6046 vm_map_copy_discard(copy);
1c79356b
A
6047
6048 return(KERN_SUCCESS);
6049}/* vm_map_copy_overwrite */
6050
6051kern_return_t
6052vm_map_copy_overwrite(
6053 vm_map_t dst_map,
91447636 6054 vm_map_offset_t dst_addr,
1c79356b
A
6055 vm_map_copy_t copy,
6056 boolean_t interruptible)
6057{
6d2010ae
A
6058 vm_map_size_t head_size, tail_size;
6059 vm_map_copy_t head_copy, tail_copy;
6060 vm_map_offset_t head_addr, tail_addr;
6061 vm_map_entry_t entry;
6062 kern_return_t kr;
6063
6064 head_size = 0;
6065 tail_size = 0;
6066 head_copy = NULL;
6067 tail_copy = NULL;
6068 head_addr = 0;
6069 tail_addr = 0;
6070
6071 if (interruptible ||
6072 copy == VM_MAP_COPY_NULL ||
6073 copy->type != VM_MAP_COPY_ENTRY_LIST) {
6074 /*
6075 * We can't split the "copy" map if we're interruptible
6076 * or if we don't have a "copy" map...
6077 */
6078 blunt_copy:
6079 return vm_map_copy_overwrite_nested(dst_map,
6080 dst_addr,
6081 copy,
6082 interruptible,
6083 (pmap_t) NULL,
6084 TRUE);
6085 }
6086
6087 if (copy->size < 3 * PAGE_SIZE) {
6088 /*
6089 * Too small to bother with optimizing...
6090 */
6091 goto blunt_copy;
6092 }
6093
6094 if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6095 /*
6096 * Incompatible mis-alignment of source and destination...
6097 */
6098 goto blunt_copy;
6099 }
6100
6101 /*
6102 * Proper alignment or identical mis-alignment at the beginning.
6103 * Let's try and do a small unaligned copy first (if needed)
6104 * and then an aligned copy for the rest.
6105 */
6106 if (!page_aligned(dst_addr)) {
6107 head_addr = dst_addr;
6108 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6109 }
6110 if (!page_aligned(copy->offset + copy->size)) {
6111 /*
6112 * Mis-alignment at the end.
6113 * Do an aligned copy up to the last page and
6114 * then an unaligned copy for the remaining bytes.
6115 */
6116 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6117 tail_addr = dst_addr + copy->size - tail_size;
6118 }
6119
6120 if (head_size + tail_size == copy->size) {
6121 /*
6122 * It's all unaligned, no optimization possible...
6123 */
6124 goto blunt_copy;
6125 }
6126
6127 /*
6128 * Can't optimize if there are any submaps in the
6129 * destination due to the way we free the "copy" map
6130 * progressively in vm_map_copy_overwrite_nested()
6131 * in that case.
6132 */
6133 vm_map_lock_read(dst_map);
6134 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6135 vm_map_unlock_read(dst_map);
6136 goto blunt_copy;
6137 }
6138 for (;
6139 (entry != vm_map_copy_to_entry(copy) &&
6140 entry->vme_start < dst_addr + copy->size);
6141 entry = entry->vme_next) {
6142 if (entry->is_sub_map) {
6143 vm_map_unlock_read(dst_map);
6144 goto blunt_copy;
6145 }
6146 }
6147 vm_map_unlock_read(dst_map);
6148
6149 if (head_size) {
6150 /*
6151 * Unaligned copy of the first "head_size" bytes, to reach
6152 * a page boundary.
6153 */
6154
6155 /*
6156 * Extract "head_copy" out of "copy".
6157 */
6158 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6159 vm_map_copy_first_entry(head_copy) =
6160 vm_map_copy_to_entry(head_copy);
6161 vm_map_copy_last_entry(head_copy) =
6162 vm_map_copy_to_entry(head_copy);
6163 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6164 head_copy->cpy_hdr.nentries = 0;
6165 head_copy->cpy_hdr.entries_pageable =
6166 copy->cpy_hdr.entries_pageable;
6167 vm_map_store_init(&head_copy->cpy_hdr);
6168
6169 head_copy->offset = copy->offset;
6170 head_copy->size = head_size;
6171
6172 copy->offset += head_size;
6173 copy->size -= head_size;
6174
6175 entry = vm_map_copy_first_entry(copy);
6176 vm_map_copy_clip_end(copy, entry, copy->offset);
6177 vm_map_copy_entry_unlink(copy, entry);
6178 vm_map_copy_entry_link(head_copy,
6179 vm_map_copy_to_entry(head_copy),
6180 entry);
6181
6182 /*
6183 * Do the unaligned copy.
6184 */
6185 kr = vm_map_copy_overwrite_nested(dst_map,
6186 head_addr,
6187 head_copy,
6188 interruptible,
6189 (pmap_t) NULL,
6190 FALSE);
6191 if (kr != KERN_SUCCESS)
6192 goto done;
6193 }
6194
6195 if (tail_size) {
6196 /*
6197 * Extract "tail_copy" out of "copy".
6198 */
6199 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6200 vm_map_copy_first_entry(tail_copy) =
6201 vm_map_copy_to_entry(tail_copy);
6202 vm_map_copy_last_entry(tail_copy) =
6203 vm_map_copy_to_entry(tail_copy);
6204 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6205 tail_copy->cpy_hdr.nentries = 0;
6206 tail_copy->cpy_hdr.entries_pageable =
6207 copy->cpy_hdr.entries_pageable;
6208 vm_map_store_init(&tail_copy->cpy_hdr);
6209
6210 tail_copy->offset = copy->offset + copy->size - tail_size;
6211 tail_copy->size = tail_size;
6212
6213 copy->size -= tail_size;
6214
6215 entry = vm_map_copy_last_entry(copy);
6216 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6217 entry = vm_map_copy_last_entry(copy);
6218 vm_map_copy_entry_unlink(copy, entry);
6219 vm_map_copy_entry_link(tail_copy,
6220 vm_map_copy_last_entry(tail_copy),
6221 entry);
6222 }
6223
6224 /*
6225 * Copy most (or possibly all) of the data.
6226 */
6227 kr = vm_map_copy_overwrite_nested(dst_map,
6228 dst_addr + head_size,
6229 copy,
6230 interruptible,
6231 (pmap_t) NULL,
6232 FALSE);
6233 if (kr != KERN_SUCCESS) {
6234 goto done;
6235 }
6236
6237 if (tail_size) {
6238 kr = vm_map_copy_overwrite_nested(dst_map,
6239 tail_addr,
6240 tail_copy,
6241 interruptible,
6242 (pmap_t) NULL,
6243 FALSE);
6244 }
6245
6246done:
6247 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6248 if (kr == KERN_SUCCESS) {
6249 /*
6250 * Discard all the copy maps.
6251 */
6252 if (head_copy) {
6253 vm_map_copy_discard(head_copy);
6254 head_copy = NULL;
6255 }
6256 vm_map_copy_discard(copy);
6257 if (tail_copy) {
6258 vm_map_copy_discard(tail_copy);
6259 tail_copy = NULL;
6260 }
6261 } else {
6262 /*
6263 * Re-assemble the original copy map.
6264 */
6265 if (head_copy) {
6266 entry = vm_map_copy_first_entry(head_copy);
6267 vm_map_copy_entry_unlink(head_copy, entry);
6268 vm_map_copy_entry_link(copy,
6269 vm_map_copy_to_entry(copy),
6270 entry);
6271 copy->offset -= head_size;
6272 copy->size += head_size;
6273 vm_map_copy_discard(head_copy);
6274 head_copy = NULL;
6275 }
6276 if (tail_copy) {
6277 entry = vm_map_copy_last_entry(tail_copy);
6278 vm_map_copy_entry_unlink(tail_copy, entry);
6279 vm_map_copy_entry_link(copy,
6280 vm_map_copy_last_entry(copy),
6281 entry);
6282 copy->size += tail_size;
6283 vm_map_copy_discard(tail_copy);
6284 tail_copy = NULL;
6285 }
6286 }
6287 return kr;
1c79356b
A
6288}
6289
6290
6291/*
91447636 6292 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
6293 *
6294 * Decription:
6295 * Physically copy unaligned data
6296 *
6297 * Implementation:
6298 * Unaligned parts of pages have to be physically copied. We use
6299 * a modified form of vm_fault_copy (which understands none-aligned
6300 * page offsets and sizes) to do the copy. We attempt to copy as
6301 * much memory in one go as possibly, however vm_fault_copy copies
6302 * within 1 memory object so we have to find the smaller of "amount left"
6303 * "source object data size" and "target object data size". With
6304 * unaligned data we don't need to split regions, therefore the source
6305 * (copy) object should be one map entry, the target range may be split
6306 * over multiple map entries however. In any event we are pessimistic
6307 * about these assumptions.
6308 *
6309 * Assumptions:
6310 * dst_map is locked on entry and is return locked on success,
6311 * unlocked on error.
6312 */
6313
91447636 6314static kern_return_t
1c79356b
A
6315vm_map_copy_overwrite_unaligned(
6316 vm_map_t dst_map,
6317 vm_map_entry_t entry,
6318 vm_map_copy_t copy,
91447636 6319 vm_map_offset_t start)
1c79356b
A
6320{
6321 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6322 vm_map_version_t version;
6323 vm_object_t dst_object;
6324 vm_object_offset_t dst_offset;
6325 vm_object_offset_t src_offset;
6326 vm_object_offset_t entry_offset;
91447636
A
6327 vm_map_offset_t entry_end;
6328 vm_map_size_t src_size,
1c79356b
A
6329 dst_size,
6330 copy_size,
6331 amount_left;
6332 kern_return_t kr = KERN_SUCCESS;
6333
6334 vm_map_lock_write_to_read(dst_map);
6335
91447636 6336 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
6337 amount_left = copy->size;
6338/*
6339 * unaligned so we never clipped this entry, we need the offset into
6340 * the vm_object not just the data.
6341 */
6342 while (amount_left > 0) {
6343
6344 if (entry == vm_map_to_entry(dst_map)) {
6345 vm_map_unlock_read(dst_map);
6346 return KERN_INVALID_ADDRESS;
6347 }
6348
6349 /* "start" must be within the current map entry */
6350 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6351
6352 dst_offset = start - entry->vme_start;
6353
6354 dst_size = entry->vme_end - start;
6355
6356 src_size = copy_entry->vme_end -
6357 (copy_entry->vme_start + src_offset);
6358
6359 if (dst_size < src_size) {
6360/*
6361 * we can only copy dst_size bytes before
6362 * we have to get the next destination entry
6363 */
6364 copy_size = dst_size;
6365 } else {
6366/*
6367 * we can only copy src_size bytes before
6368 * we have to get the next source copy entry
6369 */
6370 copy_size = src_size;
6371 }
6372
6373 if (copy_size > amount_left) {
6374 copy_size = amount_left;
6375 }
6376/*
6377 * Entry needs copy, create a shadow shadow object for
6378 * Copy on write region.
6379 */
6380 if (entry->needs_copy &&
2d21ac55 6381 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
6382 {
6383 if (vm_map_lock_read_to_write(dst_map)) {
6384 vm_map_lock_read(dst_map);
6385 goto RetryLookup;
6386 }
6387 vm_object_shadow(&entry->object.vm_object,
2d21ac55
A
6388 &entry->offset,
6389 (vm_map_size_t)(entry->vme_end
6390 - entry->vme_start));
1c79356b
A
6391 entry->needs_copy = FALSE;
6392 vm_map_lock_write_to_read(dst_map);
6393 }
6394 dst_object = entry->object.vm_object;
6395/*
6396 * unlike with the virtual (aligned) copy we're going
6397 * to fault on it therefore we need a target object.
6398 */
6399 if (dst_object == VM_OBJECT_NULL) {
6400 if (vm_map_lock_read_to_write(dst_map)) {
6401 vm_map_lock_read(dst_map);
6402 goto RetryLookup;
6403 }
91447636 6404 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 6405 entry->vme_end - entry->vme_start);
1c79356b
A
6406 entry->object.vm_object = dst_object;
6407 entry->offset = 0;
6408 vm_map_lock_write_to_read(dst_map);
6409 }
6410/*
6411 * Take an object reference and unlock map. The "entry" may
6412 * disappear or change when the map is unlocked.
6413 */
6414 vm_object_reference(dst_object);
6415 version.main_timestamp = dst_map->timestamp;
6416 entry_offset = entry->offset;
6417 entry_end = entry->vme_end;
6418 vm_map_unlock_read(dst_map);
6419/*
6420 * Copy as much as possible in one pass
6421 */
6422 kr = vm_fault_copy(
6423 copy_entry->object.vm_object,
6424 copy_entry->offset + src_offset,
6425 &copy_size,
6426 dst_object,
6427 entry_offset + dst_offset,
6428 dst_map,
6429 &version,
6430 THREAD_UNINT );
6431
6432 start += copy_size;
6433 src_offset += copy_size;
6434 amount_left -= copy_size;
6435/*
6436 * Release the object reference
6437 */
6438 vm_object_deallocate(dst_object);
6439/*
6440 * If a hard error occurred, return it now
6441 */
6442 if (kr != KERN_SUCCESS)
6443 return kr;
6444
6445 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 6446 || amount_left == 0)
1c79356b
A
6447 {
6448/*
6449 * all done with this copy entry, dispose.
6450 */
6451 vm_map_copy_entry_unlink(copy, copy_entry);
6452 vm_object_deallocate(copy_entry->object.vm_object);
6453 vm_map_copy_entry_dispose(copy, copy_entry);
6454
6455 if ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 6456 == vm_map_copy_to_entry(copy) && amount_left) {
1c79356b
A
6457/*
6458 * not finished copying but run out of source
6459 */
6460 return KERN_INVALID_ADDRESS;
6461 }
6462 src_offset = 0;
6463 }
6464
6465 if (amount_left == 0)
6466 return KERN_SUCCESS;
6467
6468 vm_map_lock_read(dst_map);
6469 if (version.main_timestamp == dst_map->timestamp) {
6470 if (start == entry_end) {
6471/*
6472 * destination region is split. Use the version
6473 * information to avoid a lookup in the normal
6474 * case.
6475 */
6476 entry = entry->vme_next;
6477/*
6478 * should be contiguous. Fail if we encounter
6479 * a hole in the destination.
6480 */
6481 if (start != entry->vme_start) {
6482 vm_map_unlock_read(dst_map);
6483 return KERN_INVALID_ADDRESS ;
6484 }
6485 }
6486 } else {
6487/*
6488 * Map version check failed.
6489 * we must lookup the entry because somebody
6490 * might have changed the map behind our backs.
6491 */
2d21ac55 6492 RetryLookup:
1c79356b
A
6493 if (!vm_map_lookup_entry(dst_map, start, &entry))
6494 {
6495 vm_map_unlock_read(dst_map);
6496 return KERN_INVALID_ADDRESS ;
6497 }
6498 }
6499 }/* while */
6500
1c79356b
A
6501 return KERN_SUCCESS;
6502}/* vm_map_copy_overwrite_unaligned */
6503
6504/*
91447636 6505 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
6506 *
6507 * Description:
6508 * Does all the vm_trickery possible for whole pages.
6509 *
6510 * Implementation:
6511 *
6512 * If there are no permanent objects in the destination,
6513 * and the source and destination map entry zones match,
6514 * and the destination map entry is not shared,
6515 * then the map entries can be deleted and replaced
6516 * with those from the copy. The following code is the
6517 * basic idea of what to do, but there are lots of annoying
6518 * little details about getting protection and inheritance
6519 * right. Should add protection, inheritance, and sharing checks
6520 * to the above pass and make sure that no wiring is involved.
6521 */
6522
91447636 6523static kern_return_t
1c79356b
A
6524vm_map_copy_overwrite_aligned(
6525 vm_map_t dst_map,
6526 vm_map_entry_t tmp_entry,
6527 vm_map_copy_t copy,
91447636 6528 vm_map_offset_t start,
2d21ac55 6529 __unused pmap_t pmap)
1c79356b
A
6530{
6531 vm_object_t object;
6532 vm_map_entry_t copy_entry;
91447636
A
6533 vm_map_size_t copy_size;
6534 vm_map_size_t size;
1c79356b
A
6535 vm_map_entry_t entry;
6536
6537 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 6538 != vm_map_copy_to_entry(copy))
1c79356b
A
6539 {
6540 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6541
6542 entry = tmp_entry;
2d21ac55 6543 assert(!entry->use_pmap); /* unnested when clipped earlier */
1c79356b
A
6544 if (entry == vm_map_to_entry(dst_map)) {
6545 vm_map_unlock(dst_map);
6546 return KERN_INVALID_ADDRESS;
6547 }
6548 size = (entry->vme_end - entry->vme_start);
6549 /*
6550 * Make sure that no holes popped up in the
6551 * address map, and that the protection is
6552 * still valid, in case the map was unlocked
6553 * earlier.
6554 */
6555
6556 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 6557 && !entry->needs_copy)) {
1c79356b
A
6558 vm_map_unlock(dst_map);
6559 return(KERN_INVALID_ADDRESS);
6560 }
6561 assert(entry != vm_map_to_entry(dst_map));
6562
6563 /*
6564 * Check protection again
6565 */
6566
6567 if ( ! (entry->protection & VM_PROT_WRITE)) {
6568 vm_map_unlock(dst_map);
6569 return(KERN_PROTECTION_FAILURE);
6570 }
6571
6572 /*
6573 * Adjust to source size first
6574 */
6575
6576 if (copy_size < size) {
6577 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6578 size = copy_size;
6579 }
6580
6581 /*
6582 * Adjust to destination size
6583 */
6584
6585 if (size < copy_size) {
6586 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 6587 copy_entry->vme_start + size);
1c79356b
A
6588 copy_size = size;
6589 }
6590
6591 assert((entry->vme_end - entry->vme_start) == size);
6592 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6593 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6594
6595 /*
6596 * If the destination contains temporary unshared memory,
6597 * we can perform the copy by throwing it away and
6598 * installing the source data.
6599 */
6600
6601 object = entry->object.vm_object;
6602 if ((!entry->is_shared &&
2d21ac55
A
6603 ((object == VM_OBJECT_NULL) ||
6604 (object->internal && !object->true_share))) ||
1c79356b
A
6605 entry->needs_copy) {
6606 vm_object_t old_object = entry->object.vm_object;
6607 vm_object_offset_t old_offset = entry->offset;
6608 vm_object_offset_t offset;
6609
6610 /*
6611 * Ensure that the source and destination aren't
6612 * identical
6613 */
6614 if (old_object == copy_entry->object.vm_object &&
6615 old_offset == copy_entry->offset) {
6616 vm_map_copy_entry_unlink(copy, copy_entry);
6617 vm_map_copy_entry_dispose(copy, copy_entry);
6618
6619 if (old_object != VM_OBJECT_NULL)
6620 vm_object_deallocate(old_object);
6621
6622 start = tmp_entry->vme_end;
6623 tmp_entry = tmp_entry->vme_next;
6624 continue;
6625 }
6626
6627 if (old_object != VM_OBJECT_NULL) {
6628 if(entry->is_sub_map) {
9bccf70c 6629 if(entry->use_pmap) {
0c530ab8 6630#ifndef NO_NESTED_PMAP
9bccf70c 6631 pmap_unnest(dst_map->pmap,
2d21ac55
A
6632 (addr64_t)entry->vme_start,
6633 entry->vme_end - entry->vme_start);
0c530ab8 6634#endif /* NO_NESTED_PMAP */
9bccf70c
A
6635 if(dst_map->mapped) {
6636 /* clean up parent */
6637 /* map/maps */
2d21ac55
A
6638 vm_map_submap_pmap_clean(
6639 dst_map, entry->vme_start,
6640 entry->vme_end,
6641 entry->object.sub_map,
6642 entry->offset);
9bccf70c
A
6643 }
6644 } else {
6645 vm_map_submap_pmap_clean(
6646 dst_map, entry->vme_start,
6647 entry->vme_end,
6648 entry->object.sub_map,
6649 entry->offset);
6650 }
6651 vm_map_deallocate(
1c79356b 6652 entry->object.sub_map);
9bccf70c
A
6653 } else {
6654 if(dst_map->mapped) {
6655 vm_object_pmap_protect(
6656 entry->object.vm_object,
6657 entry->offset,
6658 entry->vme_end
2d21ac55 6659 - entry->vme_start,
9bccf70c
A
6660 PMAP_NULL,
6661 entry->vme_start,
6662 VM_PROT_NONE);
6663 } else {
2d21ac55
A
6664 pmap_remove(dst_map->pmap,
6665 (addr64_t)(entry->vme_start),
6666 (addr64_t)(entry->vme_end));
9bccf70c 6667 }
1c79356b 6668 vm_object_deallocate(old_object);
9bccf70c 6669 }
1c79356b
A
6670 }
6671
6672 entry->is_sub_map = FALSE;
6673 entry->object = copy_entry->object;
6674 object = entry->object.vm_object;
6675 entry->needs_copy = copy_entry->needs_copy;
6676 entry->wired_count = 0;
6677 entry->user_wired_count = 0;
6678 offset = entry->offset = copy_entry->offset;
6679
6680 vm_map_copy_entry_unlink(copy, copy_entry);
6681 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 6682
1c79356b 6683 /*
2d21ac55 6684 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
6685 * this optimization only saved on average 2 us per page if ALL
6686 * the pages in the source were currently mapped
6687 * and ALL the pages in the dest were touched, if there were fewer
6688 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 6689 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
6690 */
6691
1c79356b
A
6692 /*
6693 * Set up for the next iteration. The map
6694 * has not been unlocked, so the next
6695 * address should be at the end of this
6696 * entry, and the next map entry should be
6697 * the one following it.
6698 */
6699
6700 start = tmp_entry->vme_end;
6701 tmp_entry = tmp_entry->vme_next;
6702 } else {
6703 vm_map_version_t version;
6704 vm_object_t dst_object = entry->object.vm_object;
6705 vm_object_offset_t dst_offset = entry->offset;
6706 kern_return_t r;
6707
6708 /*
6709 * Take an object reference, and record
6710 * the map version information so that the
6711 * map can be safely unlocked.
6712 */
6713
6714 vm_object_reference(dst_object);
6715
9bccf70c
A
6716 /* account for unlock bumping up timestamp */
6717 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
6718
6719 vm_map_unlock(dst_map);
6720
6721 /*
6722 * Copy as much as possible in one pass
6723 */
6724
6725 copy_size = size;
6726 r = vm_fault_copy(
2d21ac55
A
6727 copy_entry->object.vm_object,
6728 copy_entry->offset,
6729 &copy_size,
6730 dst_object,
6731 dst_offset,
6732 dst_map,
6733 &version,
6734 THREAD_UNINT );
1c79356b
A
6735
6736 /*
6737 * Release the object reference
6738 */
6739
6740 vm_object_deallocate(dst_object);
6741
6742 /*
6743 * If a hard error occurred, return it now
6744 */
6745
6746 if (r != KERN_SUCCESS)
6747 return(r);
6748
6749 if (copy_size != 0) {
6750 /*
6751 * Dispose of the copied region
6752 */
6753
6754 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 6755 copy_entry->vme_start + copy_size);
1c79356b
A
6756 vm_map_copy_entry_unlink(copy, copy_entry);
6757 vm_object_deallocate(copy_entry->object.vm_object);
6758 vm_map_copy_entry_dispose(copy, copy_entry);
6759 }
6760
6761 /*
6762 * Pick up in the destination map where we left off.
6763 *
6764 * Use the version information to avoid a lookup
6765 * in the normal case.
6766 */
6767
6768 start += copy_size;
6769 vm_map_lock(dst_map);
9bccf70c 6770 if (version.main_timestamp == dst_map->timestamp) {
1c79356b
A
6771 /* We can safely use saved tmp_entry value */
6772
6773 vm_map_clip_end(dst_map, tmp_entry, start);
6774 tmp_entry = tmp_entry->vme_next;
6775 } else {
6776 /* Must do lookup of tmp_entry */
6777
6778 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6779 vm_map_unlock(dst_map);
6780 return(KERN_INVALID_ADDRESS);
6781 }
6782 vm_map_clip_start(dst_map, tmp_entry, start);
6783 }
6784 }
6785 }/* while */
6786
6787 return(KERN_SUCCESS);
6788}/* vm_map_copy_overwrite_aligned */
6789
6790/*
91447636 6791 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
6792 *
6793 * Description:
6794 * Copy in data to a kernel buffer from space in the
91447636 6795 * source map. The original space may be optionally
1c79356b
A
6796 * deallocated.
6797 *
6798 * If successful, returns a new copy object.
6799 */
91447636 6800static kern_return_t
1c79356b
A
6801vm_map_copyin_kernel_buffer(
6802 vm_map_t src_map,
91447636
A
6803 vm_map_offset_t src_addr,
6804 vm_map_size_t len,
1c79356b
A
6805 boolean_t src_destroy,
6806 vm_map_copy_t *copy_result)
6807{
91447636 6808 kern_return_t kr;
1c79356b 6809 vm_map_copy_t copy;
b0d623f7
A
6810 vm_size_t kalloc_size;
6811
6812 if ((vm_size_t) len != len) {
6813 /* "len" is too big and doesn't fit in a "vm_size_t" */
6814 return KERN_RESOURCE_SHORTAGE;
6815 }
6816 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6817 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
1c79356b
A
6818
6819 copy = (vm_map_copy_t) kalloc(kalloc_size);
6820 if (copy == VM_MAP_COPY_NULL) {
6821 return KERN_RESOURCE_SHORTAGE;
6822 }
6823 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6824 copy->size = len;
6825 copy->offset = 0;
91447636 6826 copy->cpy_kdata = (void *) (copy + 1);
1c79356b
A
6827 copy->cpy_kalloc_size = kalloc_size;
6828
b0d623f7 6829 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
91447636
A
6830 if (kr != KERN_SUCCESS) {
6831 kfree(copy, kalloc_size);
6832 return kr;
1c79356b
A
6833 }
6834 if (src_destroy) {
91447636 6835 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
2d21ac55
A
6836 vm_map_round_page(src_addr + len),
6837 VM_MAP_REMOVE_INTERRUPTIBLE |
6838 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6839 (src_map == kernel_map) ?
6840 VM_MAP_REMOVE_KUNWIRE : 0);
1c79356b
A
6841 }
6842 *copy_result = copy;
6843 return KERN_SUCCESS;
6844}
6845
6846/*
91447636 6847 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
6848 *
6849 * Description:
6850 * Copy out data from a kernel buffer into space in the
6851 * destination map. The space may be otpionally dynamically
6852 * allocated.
6853 *
6854 * If successful, consumes the copy object.
6855 * Otherwise, the caller is responsible for it.
6856 */
91447636
A
6857static int vm_map_copyout_kernel_buffer_failures = 0;
6858static kern_return_t
1c79356b 6859vm_map_copyout_kernel_buffer(
91447636
A
6860 vm_map_t map,
6861 vm_map_address_t *addr, /* IN/OUT */
6862 vm_map_copy_t copy,
6863 boolean_t overwrite)
1c79356b
A
6864{
6865 kern_return_t kr = KERN_SUCCESS;
91447636 6866 thread_t thread = current_thread();
1c79356b
A
6867
6868 if (!overwrite) {
6869
6870 /*
6871 * Allocate space in the target map for the data
6872 */
6873 *addr = 0;
6874 kr = vm_map_enter(map,
6875 addr,
91447636
A
6876 vm_map_round_page(copy->size),
6877 (vm_map_offset_t) 0,
6878 VM_FLAGS_ANYWHERE,
1c79356b
A
6879 VM_OBJECT_NULL,
6880 (vm_object_offset_t) 0,
6881 FALSE,
6882 VM_PROT_DEFAULT,
6883 VM_PROT_ALL,
6884 VM_INHERIT_DEFAULT);
6885 if (kr != KERN_SUCCESS)
91447636 6886 return kr;
1c79356b
A
6887 }
6888
6889 /*
6890 * Copyout the data from the kernel buffer to the target map.
6891 */
91447636 6892 if (thread->map == map) {
1c79356b
A
6893
6894 /*
6895 * If the target map is the current map, just do
6896 * the copy.
6897 */
b0d623f7
A
6898 assert((vm_size_t) copy->size == copy->size);
6899 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
91447636 6900 kr = KERN_INVALID_ADDRESS;
1c79356b
A
6901 }
6902 }
6903 else {
6904 vm_map_t oldmap;
6905
6906 /*
6907 * If the target map is another map, assume the
6908 * target's address space identity for the duration
6909 * of the copy.
6910 */
6911 vm_map_reference(map);
6912 oldmap = vm_map_switch(map);
6913
b0d623f7
A
6914 assert((vm_size_t) copy->size == copy->size);
6915 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
91447636
A
6916 vm_map_copyout_kernel_buffer_failures++;
6917 kr = KERN_INVALID_ADDRESS;
1c79356b
A
6918 }
6919
6920 (void) vm_map_switch(oldmap);
6921 vm_map_deallocate(map);
6922 }
6923
91447636
A
6924 if (kr != KERN_SUCCESS) {
6925 /* the copy failed, clean up */
6926 if (!overwrite) {
6927 /*
6928 * Deallocate the space we allocated in the target map.
6929 */
6930 (void) vm_map_remove(map,
6931 vm_map_trunc_page(*addr),
6932 vm_map_round_page(*addr +
6933 vm_map_round_page(copy->size)),
6934 VM_MAP_NO_FLAGS);
6935 *addr = 0;
6936 }
6937 } else {
6938 /* copy was successful, dicard the copy structure */
6939 kfree(copy, copy->cpy_kalloc_size);
6940 }
1c79356b 6941
91447636 6942 return kr;
1c79356b
A
6943}
6944
6945/*
6946 * Macro: vm_map_copy_insert
6947 *
6948 * Description:
6949 * Link a copy chain ("copy") into a map at the
6950 * specified location (after "where").
6951 * Side effects:
6952 * The copy chain is destroyed.
6953 * Warning:
6954 * The arguments are evaluated multiple times.
6955 */
6956#define vm_map_copy_insert(map, where, copy) \
6957MACRO_BEGIN \
6d2010ae
A
6958 vm_map_store_copy_insert(map, where, copy); \
6959 zfree(vm_map_copy_zone, copy); \
1c79356b
A
6960MACRO_END
6961
6962/*
6963 * Routine: vm_map_copyout
6964 *
6965 * Description:
6966 * Copy out a copy chain ("copy") into newly-allocated
6967 * space in the destination map.
6968 *
6969 * If successful, consumes the copy object.
6970 * Otherwise, the caller is responsible for it.
6971 */
6972kern_return_t
6973vm_map_copyout(
91447636
A
6974 vm_map_t dst_map,
6975 vm_map_address_t *dst_addr, /* OUT */
6976 vm_map_copy_t copy)
1c79356b 6977{
91447636
A
6978 vm_map_size_t size;
6979 vm_map_size_t adjustment;
6980 vm_map_offset_t start;
1c79356b
A
6981 vm_object_offset_t vm_copy_start;
6982 vm_map_entry_t last;
6983 register
6984 vm_map_entry_t entry;
6985
6986 /*
6987 * Check for null copy object.
6988 */
6989
6990 if (copy == VM_MAP_COPY_NULL) {
6991 *dst_addr = 0;
6992 return(KERN_SUCCESS);
6993 }
6994
6995 /*
6996 * Check for special copy object, created
6997 * by vm_map_copyin_object.
6998 */
6999
7000 if (copy->type == VM_MAP_COPY_OBJECT) {
7001 vm_object_t object = copy->cpy_object;
7002 kern_return_t kr;
7003 vm_object_offset_t offset;
7004
91447636
A
7005 offset = vm_object_trunc_page(copy->offset);
7006 size = vm_map_round_page(copy->size +
2d21ac55 7007 (vm_map_size_t)(copy->offset - offset));
1c79356b
A
7008 *dst_addr = 0;
7009 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 7010 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
7011 object, offset, FALSE,
7012 VM_PROT_DEFAULT, VM_PROT_ALL,
7013 VM_INHERIT_DEFAULT);
7014 if (kr != KERN_SUCCESS)
7015 return(kr);
7016 /* Account for non-pagealigned copy object */
91447636
A
7017 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7018 zfree(vm_map_copy_zone, copy);
1c79356b
A
7019 return(KERN_SUCCESS);
7020 }
7021
7022 /*
7023 * Check for special kernel buffer allocated
7024 * by new_ipc_kmsg_copyin.
7025 */
7026
7027 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7028 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7029 copy, FALSE));
7030 }
7031
1c79356b
A
7032 /*
7033 * Find space for the data
7034 */
7035
91447636
A
7036 vm_copy_start = vm_object_trunc_page(copy->offset);
7037 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
2d21ac55 7038 - vm_copy_start;
1c79356b 7039
2d21ac55 7040StartAgain: ;
1c79356b
A
7041
7042 vm_map_lock(dst_map);
6d2010ae
A
7043 if( dst_map->disable_vmentry_reuse == TRUE) {
7044 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7045 last = entry;
7046 } else {
7047 assert(first_free_is_valid(dst_map));
7048 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
1c79356b 7049 vm_map_min(dst_map) : last->vme_end;
6d2010ae 7050 }
1c79356b
A
7051
7052 while (TRUE) {
7053 vm_map_entry_t next = last->vme_next;
91447636 7054 vm_map_offset_t end = start + size;
1c79356b
A
7055
7056 if ((end > dst_map->max_offset) || (end < start)) {
7057 if (dst_map->wait_for_space) {
7058 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7059 assert_wait((event_t) dst_map,
7060 THREAD_INTERRUPTIBLE);
7061 vm_map_unlock(dst_map);
91447636 7062 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
7063 goto StartAgain;
7064 }
7065 }
7066 vm_map_unlock(dst_map);
7067 return(KERN_NO_SPACE);
7068 }
7069
7070 if ((next == vm_map_to_entry(dst_map)) ||
7071 (next->vme_start >= end))
7072 break;
7073
7074 last = next;
7075 start = last->vme_end;
7076 }
7077
7078 /*
7079 * Since we're going to just drop the map
7080 * entries from the copy into the destination
7081 * map, they must come from the same pool.
7082 */
7083
7084 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
7085 /*
7086 * Mismatches occur when dealing with the default
7087 * pager.
7088 */
7089 zone_t old_zone;
7090 vm_map_entry_t next, new;
7091
7092 /*
7093 * Find the zone that the copies were allocated from
7094 */
7095 old_zone = (copy->cpy_hdr.entries_pageable)
1c79356b
A
7096 ? vm_map_entry_zone
7097 : vm_map_kentry_zone;
2d21ac55
A
7098 entry = vm_map_copy_first_entry(copy);
7099
7100 /*
7101 * Reinitialize the copy so that vm_map_copy_entry_link
7102 * will work.
7103 */
6d2010ae 7104 vm_map_store_copy_reset(copy, entry);
2d21ac55 7105 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
7106
7107 /*
7108 * Copy each entry.
7109 */
7110 while (entry != vm_map_copy_to_entry(copy)) {
7111 new = vm_map_copy_entry_create(copy);
7112 vm_map_entry_copy_full(new, entry);
7113 new->use_pmap = FALSE; /* clr address space specifics */
7114 vm_map_copy_entry_link(copy,
7115 vm_map_copy_last_entry(copy),
7116 new);
7117 next = entry->vme_next;
7118 zfree(old_zone, entry);
7119 entry = next;
7120 }
1c79356b
A
7121 }
7122
7123 /*
7124 * Adjust the addresses in the copy chain, and
7125 * reset the region attributes.
7126 */
7127
7128 adjustment = start - vm_copy_start;
7129 for (entry = vm_map_copy_first_entry(copy);
7130 entry != vm_map_copy_to_entry(copy);
7131 entry = entry->vme_next) {
7132 entry->vme_start += adjustment;
7133 entry->vme_end += adjustment;
7134
7135 entry->inheritance = VM_INHERIT_DEFAULT;
7136 entry->protection = VM_PROT_DEFAULT;
7137 entry->max_protection = VM_PROT_ALL;
7138 entry->behavior = VM_BEHAVIOR_DEFAULT;
7139
7140 /*
7141 * If the entry is now wired,
7142 * map the pages into the destination map.
7143 */
7144 if (entry->wired_count != 0) {
2d21ac55
A
7145 register vm_map_offset_t va;
7146 vm_object_offset_t offset;
7147 register vm_object_t object;
7148 vm_prot_t prot;
7149 int type_of_fault;
1c79356b 7150
2d21ac55
A
7151 object = entry->object.vm_object;
7152 offset = entry->offset;
7153 va = entry->vme_start;
1c79356b 7154
2d21ac55
A
7155 pmap_pageable(dst_map->pmap,
7156 entry->vme_start,
7157 entry->vme_end,
7158 TRUE);
1c79356b 7159
2d21ac55
A
7160 while (va < entry->vme_end) {
7161 register vm_page_t m;
1c79356b 7162
2d21ac55
A
7163 /*
7164 * Look up the page in the object.
7165 * Assert that the page will be found in the
7166 * top object:
7167 * either
7168 * the object was newly created by
7169 * vm_object_copy_slowly, and has
7170 * copies of all of the pages from
7171 * the source object
7172 * or
7173 * the object was moved from the old
7174 * map entry; because the old map
7175 * entry was wired, all of the pages
7176 * were in the top-level object.
7177 * (XXX not true if we wire pages for
7178 * reading)
7179 */
7180 vm_object_lock(object);
91447636 7181
2d21ac55 7182 m = vm_page_lookup(object, offset);
b0d623f7 7183 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
7184 m->absent)
7185 panic("vm_map_copyout: wiring %p", m);
1c79356b 7186
2d21ac55
A
7187 /*
7188 * ENCRYPTED SWAP:
7189 * The page is assumed to be wired here, so it
7190 * shouldn't be encrypted. Otherwise, we
7191 * couldn't enter it in the page table, since
7192 * we don't want the user to see the encrypted
7193 * data.
7194 */
7195 ASSERT_PAGE_DECRYPTED(m);
1c79356b 7196
2d21ac55 7197 prot = entry->protection;
1c79356b 7198
2d21ac55
A
7199 if (override_nx(dst_map, entry->alias) && prot)
7200 prot |= VM_PROT_EXECUTE;
1c79356b 7201
2d21ac55 7202 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 7203
6d2010ae
A
7204 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7205 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2d21ac55 7206 &type_of_fault);
1c79356b 7207
2d21ac55 7208 vm_object_unlock(object);
1c79356b 7209
2d21ac55
A
7210 offset += PAGE_SIZE_64;
7211 va += PAGE_SIZE;
1c79356b
A
7212 }
7213 }
7214 }
7215
7216 /*
7217 * Correct the page alignment for the result
7218 */
7219
7220 *dst_addr = start + (copy->offset - vm_copy_start);
7221
7222 /*
7223 * Update the hints and the map size
7224 */
7225
0c530ab8 7226 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
1c79356b
A
7227
7228 dst_map->size += size;
7229
7230 /*
7231 * Link in the copy
7232 */
7233
7234 vm_map_copy_insert(dst_map, last, copy);
7235
7236 vm_map_unlock(dst_map);
7237
7238 /*
7239 * XXX If wiring_required, call vm_map_pageable
7240 */
7241
7242 return(KERN_SUCCESS);
7243}
7244
1c79356b
A
7245/*
7246 * Routine: vm_map_copyin
7247 *
7248 * Description:
2d21ac55
A
7249 * see vm_map_copyin_common. Exported via Unsupported.exports.
7250 *
7251 */
7252
7253#undef vm_map_copyin
7254
7255kern_return_t
7256vm_map_copyin(
7257 vm_map_t src_map,
7258 vm_map_address_t src_addr,
7259 vm_map_size_t len,
7260 boolean_t src_destroy,
7261 vm_map_copy_t *copy_result) /* OUT */
7262{
7263 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7264 FALSE, copy_result, FALSE));
7265}
7266
7267/*
7268 * Routine: vm_map_copyin_common
7269 *
7270 * Description:
1c79356b
A
7271 * Copy the specified region (src_addr, len) from the
7272 * source address space (src_map), possibly removing
7273 * the region from the source address space (src_destroy).
7274 *
7275 * Returns:
7276 * A vm_map_copy_t object (copy_result), suitable for
7277 * insertion into another address space (using vm_map_copyout),
7278 * copying over another address space region (using
7279 * vm_map_copy_overwrite). If the copy is unused, it
7280 * should be destroyed (using vm_map_copy_discard).
7281 *
7282 * In/out conditions:
7283 * The source map should not be locked on entry.
7284 */
7285
7286typedef struct submap_map {
7287 vm_map_t parent_map;
91447636
A
7288 vm_map_offset_t base_start;
7289 vm_map_offset_t base_end;
2d21ac55 7290 vm_map_size_t base_len;
1c79356b
A
7291 struct submap_map *next;
7292} submap_map_t;
7293
7294kern_return_t
7295vm_map_copyin_common(
7296 vm_map_t src_map,
91447636
A
7297 vm_map_address_t src_addr,
7298 vm_map_size_t len,
1c79356b 7299 boolean_t src_destroy,
91447636 7300 __unused boolean_t src_volatile,
1c79356b
A
7301 vm_map_copy_t *copy_result, /* OUT */
7302 boolean_t use_maxprot)
7303{
1c79356b
A
7304 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7305 * in multi-level lookup, this
7306 * entry contains the actual
7307 * vm_object/offset.
7308 */
7309 register
7310 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7311
91447636 7312 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
7313 * where copy is taking place now
7314 */
91447636 7315 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 7316 * copied */
2d21ac55 7317 vm_map_offset_t src_base;
91447636 7318 vm_map_t base_map = src_map;
1c79356b
A
7319 boolean_t map_share=FALSE;
7320 submap_map_t *parent_maps = NULL;
7321
7322 register
7323 vm_map_copy_t copy; /* Resulting copy */
91447636 7324 vm_map_address_t copy_addr;
1c79356b
A
7325
7326 /*
7327 * Check for copies of zero bytes.
7328 */
7329
7330 if (len == 0) {
7331 *copy_result = VM_MAP_COPY_NULL;
7332 return(KERN_SUCCESS);
7333 }
7334
4a249263
A
7335 /*
7336 * Check that the end address doesn't overflow
7337 */
7338 src_end = src_addr + len;
7339 if (src_end < src_addr)
7340 return KERN_INVALID_ADDRESS;
7341
1c79356b
A
7342 /*
7343 * If the copy is sufficiently small, use a kernel buffer instead
7344 * of making a virtual copy. The theory being that the cost of
7345 * setting up VM (and taking C-O-W faults) dominates the copy costs
7346 * for small regions.
7347 */
7348 if ((len < msg_ool_size_small) && !use_maxprot)
2d21ac55
A
7349 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7350 src_destroy, copy_result);
1c79356b
A
7351
7352 /*
4a249263 7353 * Compute (page aligned) start and end of region
1c79356b 7354 */
91447636
A
7355 src_start = vm_map_trunc_page(src_addr);
7356 src_end = vm_map_round_page(src_end);
1c79356b 7357
b0d623f7 7358 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 7359
1c79356b
A
7360 /*
7361 * Allocate a header element for the list.
7362 *
7363 * Use the start and end in the header to
7364 * remember the endpoints prior to rounding.
7365 */
7366
7367 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7368 vm_map_copy_first_entry(copy) =
2d21ac55 7369 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
7370 copy->type = VM_MAP_COPY_ENTRY_LIST;
7371 copy->cpy_hdr.nentries = 0;
7372 copy->cpy_hdr.entries_pageable = TRUE;
7373
6d2010ae
A
7374 vm_map_store_init( &(copy->cpy_hdr) );
7375
1c79356b
A
7376 copy->offset = src_addr;
7377 copy->size = len;
7378
7379 new_entry = vm_map_copy_entry_create(copy);
7380
7381#define RETURN(x) \
7382 MACRO_BEGIN \
7383 vm_map_unlock(src_map); \
9bccf70c
A
7384 if(src_map != base_map) \
7385 vm_map_deallocate(src_map); \
1c79356b
A
7386 if (new_entry != VM_MAP_ENTRY_NULL) \
7387 vm_map_copy_entry_dispose(copy,new_entry); \
7388 vm_map_copy_discard(copy); \
7389 { \
91447636 7390 submap_map_t *_ptr; \
1c79356b 7391 \
91447636 7392 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 7393 parent_maps=parent_maps->next; \
91447636
A
7394 if (_ptr->parent_map != base_map) \
7395 vm_map_deallocate(_ptr->parent_map); \
7396 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
7397 } \
7398 } \
7399 MACRO_RETURN(x); \
7400 MACRO_END
7401
7402 /*
7403 * Find the beginning of the region.
7404 */
7405
7406 vm_map_lock(src_map);
7407
7408 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7409 RETURN(KERN_INVALID_ADDRESS);
7410 if(!tmp_entry->is_sub_map) {
7411 vm_map_clip_start(src_map, tmp_entry, src_start);
7412 }
7413 /* set for later submap fix-up */
7414 copy_addr = src_start;
7415
7416 /*
7417 * Go through entries until we get to the end.
7418 */
7419
7420 while (TRUE) {
7421 register
7422 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 7423 vm_map_size_t src_size; /* Size of source
1c79356b
A
7424 * map entry (in both
7425 * maps)
7426 */
7427
7428 register
7429 vm_object_t src_object; /* Object to copy */
7430 vm_object_offset_t src_offset;
7431
7432 boolean_t src_needs_copy; /* Should source map
7433 * be made read-only
7434 * for copy-on-write?
7435 */
7436
7437 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7438
7439 boolean_t was_wired; /* Was source wired? */
7440 vm_map_version_t version; /* Version before locks
7441 * dropped to make copy
7442 */
7443 kern_return_t result; /* Return value from
7444 * copy_strategically.
7445 */
7446 while(tmp_entry->is_sub_map) {
91447636 7447 vm_map_size_t submap_len;
1c79356b
A
7448 submap_map_t *ptr;
7449
7450 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7451 ptr->next = parent_maps;
7452 parent_maps = ptr;
7453 ptr->parent_map = src_map;
7454 ptr->base_start = src_start;
7455 ptr->base_end = src_end;
7456 submap_len = tmp_entry->vme_end - src_start;
7457 if(submap_len > (src_end-src_start))
7458 submap_len = src_end-src_start;
2d21ac55 7459 ptr->base_len = submap_len;
1c79356b
A
7460
7461 src_start -= tmp_entry->vme_start;
7462 src_start += tmp_entry->offset;
7463 src_end = src_start + submap_len;
7464 src_map = tmp_entry->object.sub_map;
7465 vm_map_lock(src_map);
9bccf70c
A
7466 /* keep an outstanding reference for all maps in */
7467 /* the parents tree except the base map */
7468 vm_map_reference(src_map);
1c79356b
A
7469 vm_map_unlock(ptr->parent_map);
7470 if (!vm_map_lookup_entry(
2d21ac55 7471 src_map, src_start, &tmp_entry))
1c79356b
A
7472 RETURN(KERN_INVALID_ADDRESS);
7473 map_share = TRUE;
7474 if(!tmp_entry->is_sub_map)
2d21ac55 7475 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
7476 src_entry = tmp_entry;
7477 }
2d21ac55
A
7478 /* we are now in the lowest level submap... */
7479
0b4e3aa0 7480 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
55e303ae
A
7481 (tmp_entry->object.vm_object->phys_contiguous)) {
7482 /* This is not, supported for now.In future */
7483 /* we will need to detect the phys_contig */
7484 /* condition and then upgrade copy_slowly */
7485 /* to do physical copy from the device mem */
7486 /* based object. We can piggy-back off of */
7487 /* the was wired boolean to set-up the */
7488 /* proper handling */
0b4e3aa0
A
7489 RETURN(KERN_PROTECTION_FAILURE);
7490 }
1c79356b
A
7491 /*
7492 * Create a new address map entry to hold the result.
7493 * Fill in the fields from the appropriate source entries.
7494 * We must unlock the source map to do this if we need
7495 * to allocate a map entry.
7496 */
7497 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
7498 version.main_timestamp = src_map->timestamp;
7499 vm_map_unlock(src_map);
1c79356b 7500
2d21ac55 7501 new_entry = vm_map_copy_entry_create(copy);
1c79356b 7502
2d21ac55
A
7503 vm_map_lock(src_map);
7504 if ((version.main_timestamp + 1) != src_map->timestamp) {
7505 if (!vm_map_lookup_entry(src_map, src_start,
7506 &tmp_entry)) {
7507 RETURN(KERN_INVALID_ADDRESS);
7508 }
7509 if (!tmp_entry->is_sub_map)
7510 vm_map_clip_start(src_map, tmp_entry, src_start);
7511 continue; /* restart w/ new tmp_entry */
1c79356b 7512 }
1c79356b
A
7513 }
7514
7515 /*
7516 * Verify that the region can be read.
7517 */
7518 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 7519 !use_maxprot) ||
1c79356b
A
7520 (src_entry->max_protection & VM_PROT_READ) == 0)
7521 RETURN(KERN_PROTECTION_FAILURE);
7522
7523 /*
7524 * Clip against the endpoints of the entire region.
7525 */
7526
7527 vm_map_clip_end(src_map, src_entry, src_end);
7528
7529 src_size = src_entry->vme_end - src_start;
7530 src_object = src_entry->object.vm_object;
7531 src_offset = src_entry->offset;
7532 was_wired = (src_entry->wired_count != 0);
7533
7534 vm_map_entry_copy(new_entry, src_entry);
7535 new_entry->use_pmap = FALSE; /* clr address space specifics */
7536
7537 /*
7538 * Attempt non-blocking copy-on-write optimizations.
7539 */
7540
7541 if (src_destroy &&
7542 (src_object == VM_OBJECT_NULL ||
2d21ac55
A
7543 (src_object->internal && !src_object->true_share
7544 && !map_share))) {
7545 /*
7546 * If we are destroying the source, and the object
7547 * is internal, we can move the object reference
7548 * from the source to the copy. The copy is
7549 * copy-on-write only if the source is.
7550 * We make another reference to the object, because
7551 * destroying the source entry will deallocate it.
7552 */
7553 vm_object_reference(src_object);
1c79356b 7554
2d21ac55
A
7555 /*
7556 * Copy is always unwired. vm_map_copy_entry
7557 * set its wired count to zero.
7558 */
1c79356b 7559
2d21ac55 7560 goto CopySuccessful;
1c79356b
A
7561 }
7562
7563
2d21ac55 7564 RestartCopy:
1c79356b
A
7565 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7566 src_object, new_entry, new_entry->object.vm_object,
7567 was_wired, 0);
55e303ae 7568 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
7569 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7570 vm_object_copy_quickly(
7571 &new_entry->object.vm_object,
7572 src_offset,
7573 src_size,
7574 &src_needs_copy,
7575 &new_entry_needs_copy)) {
1c79356b
A
7576
7577 new_entry->needs_copy = new_entry_needs_copy;
7578
7579 /*
7580 * Handle copy-on-write obligations
7581 */
7582
7583 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
7584 vm_prot_t prot;
7585
7586 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
7587
7588 if (override_nx(src_map, src_entry->alias) && prot)
0c530ab8 7589 prot |= VM_PROT_EXECUTE;
2d21ac55 7590
55e303ae
A
7591 vm_object_pmap_protect(
7592 src_object,
7593 src_offset,
7594 src_size,
7595 (src_entry->is_shared ?
2d21ac55
A
7596 PMAP_NULL
7597 : src_map->pmap),
55e303ae 7598 src_entry->vme_start,
0c530ab8
A
7599 prot);
7600
55e303ae 7601 tmp_entry->needs_copy = TRUE;
1c79356b
A
7602 }
7603
7604 /*
7605 * The map has never been unlocked, so it's safe
7606 * to move to the next entry rather than doing
7607 * another lookup.
7608 */
7609
7610 goto CopySuccessful;
7611 }
7612
1c79356b
A
7613 /*
7614 * Take an object reference, so that we may
7615 * release the map lock(s).
7616 */
7617
7618 assert(src_object != VM_OBJECT_NULL);
7619 vm_object_reference(src_object);
7620
7621 /*
7622 * Record the timestamp for later verification.
7623 * Unlock the map.
7624 */
7625
7626 version.main_timestamp = src_map->timestamp;
9bccf70c 7627 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
7628
7629 /*
7630 * Perform the copy
7631 */
7632
7633 if (was_wired) {
55e303ae 7634 CopySlowly:
1c79356b
A
7635 vm_object_lock(src_object);
7636 result = vm_object_copy_slowly(
2d21ac55
A
7637 src_object,
7638 src_offset,
7639 src_size,
7640 THREAD_UNINT,
7641 &new_entry->object.vm_object);
1c79356b
A
7642 new_entry->offset = 0;
7643 new_entry->needs_copy = FALSE;
55e303ae
A
7644
7645 }
7646 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
2d21ac55 7647 (tmp_entry->is_shared || map_share)) {
55e303ae
A
7648 vm_object_t new_object;
7649
2d21ac55 7650 vm_object_lock_shared(src_object);
55e303ae 7651 new_object = vm_object_copy_delayed(
2d21ac55
A
7652 src_object,
7653 src_offset,
7654 src_size,
7655 TRUE);
55e303ae
A
7656 if (new_object == VM_OBJECT_NULL)
7657 goto CopySlowly;
7658
7659 new_entry->object.vm_object = new_object;
7660 new_entry->needs_copy = TRUE;
7661 result = KERN_SUCCESS;
7662
1c79356b
A
7663 } else {
7664 result = vm_object_copy_strategically(src_object,
2d21ac55
A
7665 src_offset,
7666 src_size,
7667 &new_entry->object.vm_object,
7668 &new_entry->offset,
7669 &new_entry_needs_copy);
1c79356b
A
7670
7671 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
7672 }
7673
7674 if (result != KERN_SUCCESS &&
7675 result != KERN_MEMORY_RESTART_COPY) {
7676 vm_map_lock(src_map);
7677 RETURN(result);
7678 }
7679
7680 /*
7681 * Throw away the extra reference
7682 */
7683
7684 vm_object_deallocate(src_object);
7685
7686 /*
7687 * Verify that the map has not substantially
7688 * changed while the copy was being made.
7689 */
7690
9bccf70c 7691 vm_map_lock(src_map);
1c79356b
A
7692
7693 if ((version.main_timestamp + 1) == src_map->timestamp)
7694 goto VerificationSuccessful;
7695
7696 /*
7697 * Simple version comparison failed.
7698 *
7699 * Retry the lookup and verify that the
7700 * same object/offset are still present.
7701 *
7702 * [Note: a memory manager that colludes with
7703 * the calling task can detect that we have
7704 * cheated. While the map was unlocked, the
7705 * mapping could have been changed and restored.]
7706 */
7707
7708 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7709 RETURN(KERN_INVALID_ADDRESS);
7710 }
7711
7712 src_entry = tmp_entry;
7713 vm_map_clip_start(src_map, src_entry, src_start);
7714
91447636
A
7715 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7716 !use_maxprot) ||
7717 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
7718 goto VerificationFailed;
7719
7720 if (src_entry->vme_end < new_entry->vme_end)
7721 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7722
7723 if ((src_entry->object.vm_object != src_object) ||
7724 (src_entry->offset != src_offset) ) {
7725
7726 /*
7727 * Verification failed.
7728 *
7729 * Start over with this top-level entry.
7730 */
7731
2d21ac55 7732 VerificationFailed: ;
1c79356b
A
7733
7734 vm_object_deallocate(new_entry->object.vm_object);
7735 tmp_entry = src_entry;
7736 continue;
7737 }
7738
7739 /*
7740 * Verification succeeded.
7741 */
7742
2d21ac55 7743 VerificationSuccessful: ;
1c79356b
A
7744
7745 if (result == KERN_MEMORY_RESTART_COPY)
7746 goto RestartCopy;
7747
7748 /*
7749 * Copy succeeded.
7750 */
7751
2d21ac55 7752 CopySuccessful: ;
1c79356b
A
7753
7754 /*
7755 * Link in the new copy entry.
7756 */
7757
7758 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7759 new_entry);
7760
7761 /*
7762 * Determine whether the entire region
7763 * has been copied.
7764 */
2d21ac55 7765 src_base = src_start;
1c79356b
A
7766 src_start = new_entry->vme_end;
7767 new_entry = VM_MAP_ENTRY_NULL;
7768 while ((src_start >= src_end) && (src_end != 0)) {
7769 if (src_map != base_map) {
7770 submap_map_t *ptr;
7771
7772 ptr = parent_maps;
7773 assert(ptr != NULL);
7774 parent_maps = parent_maps->next;
2d21ac55
A
7775
7776 /* fix up the damage we did in that submap */
7777 vm_map_simplify_range(src_map,
7778 src_base,
7779 src_end);
7780
1c79356b 7781 vm_map_unlock(src_map);
9bccf70c
A
7782 vm_map_deallocate(src_map);
7783 vm_map_lock(ptr->parent_map);
1c79356b 7784 src_map = ptr->parent_map;
2d21ac55
A
7785 src_base = ptr->base_start;
7786 src_start = ptr->base_start + ptr->base_len;
1c79356b
A
7787 src_end = ptr->base_end;
7788 if ((src_end > src_start) &&
2d21ac55
A
7789 !vm_map_lookup_entry(
7790 src_map, src_start, &tmp_entry))
1c79356b 7791 RETURN(KERN_INVALID_ADDRESS);
91447636 7792 kfree(ptr, sizeof(submap_map_t));
1c79356b
A
7793 if(parent_maps == NULL)
7794 map_share = FALSE;
7795 src_entry = tmp_entry->vme_prev;
7796 } else
7797 break;
7798 }
7799 if ((src_start >= src_end) && (src_end != 0))
7800 break;
7801
7802 /*
7803 * Verify that there are no gaps in the region
7804 */
7805
7806 tmp_entry = src_entry->vme_next;
7807 if ((tmp_entry->vme_start != src_start) ||
2d21ac55 7808 (tmp_entry == vm_map_to_entry(src_map)))
1c79356b
A
7809 RETURN(KERN_INVALID_ADDRESS);
7810 }
7811
7812 /*
7813 * If the source should be destroyed, do it now, since the
7814 * copy was successful.
7815 */
7816 if (src_destroy) {
7817 (void) vm_map_delete(src_map,
91447636 7818 vm_map_trunc_page(src_addr),
1c79356b
A
7819 src_end,
7820 (src_map == kernel_map) ?
2d21ac55
A
7821 VM_MAP_REMOVE_KUNWIRE :
7822 VM_MAP_NO_FLAGS,
91447636 7823 VM_MAP_NULL);
2d21ac55
A
7824 } else {
7825 /* fix up the damage we did in the base map */
7826 vm_map_simplify_range(src_map,
7827 vm_map_trunc_page(src_addr),
7828 vm_map_round_page(src_end));
1c79356b
A
7829 }
7830
7831 vm_map_unlock(src_map);
7832
7833 /* Fix-up start and end points in copy. This is necessary */
7834 /* when the various entries in the copy object were picked */
7835 /* up from different sub-maps */
7836
7837 tmp_entry = vm_map_copy_first_entry(copy);
7838 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7839 tmp_entry->vme_end = copy_addr +
7840 (tmp_entry->vme_end - tmp_entry->vme_start);
7841 tmp_entry->vme_start = copy_addr;
7842 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7843 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7844 }
7845
7846 *copy_result = copy;
7847 return(KERN_SUCCESS);
7848
7849#undef RETURN
7850}
7851
7852/*
7853 * vm_map_copyin_object:
7854 *
7855 * Create a copy object from an object.
7856 * Our caller donates an object reference.
7857 */
7858
7859kern_return_t
7860vm_map_copyin_object(
7861 vm_object_t object,
7862 vm_object_offset_t offset, /* offset of region in object */
7863 vm_object_size_t size, /* size of region in object */
7864 vm_map_copy_t *copy_result) /* OUT */
7865{
7866 vm_map_copy_t copy; /* Resulting copy */
7867
7868 /*
7869 * We drop the object into a special copy object
7870 * that contains the object directly.
7871 */
7872
7873 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7874 copy->type = VM_MAP_COPY_OBJECT;
7875 copy->cpy_object = object;
1c79356b
A
7876 copy->offset = offset;
7877 copy->size = size;
7878
7879 *copy_result = copy;
7880 return(KERN_SUCCESS);
7881}
7882
91447636 7883static void
1c79356b
A
7884vm_map_fork_share(
7885 vm_map_t old_map,
7886 vm_map_entry_t old_entry,
7887 vm_map_t new_map)
7888{
7889 vm_object_t object;
7890 vm_map_entry_t new_entry;
1c79356b
A
7891
7892 /*
7893 * New sharing code. New map entry
7894 * references original object. Internal
7895 * objects use asynchronous copy algorithm for
7896 * future copies. First make sure we have
7897 * the right object. If we need a shadow,
7898 * or someone else already has one, then
7899 * make a new shadow and share it.
7900 */
7901
7902 object = old_entry->object.vm_object;
7903 if (old_entry->is_sub_map) {
7904 assert(old_entry->wired_count == 0);
0c530ab8 7905#ifndef NO_NESTED_PMAP
1c79356b 7906 if(old_entry->use_pmap) {
91447636
A
7907 kern_return_t result;
7908
1c79356b 7909 result = pmap_nest(new_map->pmap,
2d21ac55
A
7910 (old_entry->object.sub_map)->pmap,
7911 (addr64_t)old_entry->vme_start,
7912 (addr64_t)old_entry->vme_start,
7913 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
7914 if(result)
7915 panic("vm_map_fork_share: pmap_nest failed!");
7916 }
0c530ab8 7917#endif /* NO_NESTED_PMAP */
1c79356b 7918 } else if (object == VM_OBJECT_NULL) {
91447636 7919 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 7920 old_entry->vme_start));
1c79356b
A
7921 old_entry->offset = 0;
7922 old_entry->object.vm_object = object;
7923 assert(!old_entry->needs_copy);
7924 } else if (object->copy_strategy !=
2d21ac55 7925 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
7926
7927 /*
7928 * We are already using an asymmetric
7929 * copy, and therefore we already have
7930 * the right object.
7931 */
7932
7933 assert(! old_entry->needs_copy);
7934 }
7935 else if (old_entry->needs_copy || /* case 1 */
7936 object->shadowed || /* case 2 */
7937 (!object->true_share && /* case 3 */
2d21ac55 7938 !old_entry->is_shared &&
6d2010ae 7939 (object->vo_size >
2d21ac55
A
7940 (vm_map_size_t)(old_entry->vme_end -
7941 old_entry->vme_start)))) {
1c79356b
A
7942
7943 /*
7944 * We need to create a shadow.
7945 * There are three cases here.
7946 * In the first case, we need to
7947 * complete a deferred symmetrical
7948 * copy that we participated in.
7949 * In the second and third cases,
7950 * we need to create the shadow so
7951 * that changes that we make to the
7952 * object do not interfere with
7953 * any symmetrical copies which
7954 * have occured (case 2) or which
7955 * might occur (case 3).
7956 *
7957 * The first case is when we had
7958 * deferred shadow object creation
7959 * via the entry->needs_copy mechanism.
7960 * This mechanism only works when
7961 * only one entry points to the source
7962 * object, and we are about to create
7963 * a second entry pointing to the
7964 * same object. The problem is that
7965 * there is no way of mapping from
7966 * an object to the entries pointing
7967 * to it. (Deferred shadow creation
7968 * works with one entry because occurs
7969 * at fault time, and we walk from the
7970 * entry to the object when handling
7971 * the fault.)
7972 *
7973 * The second case is when the object
7974 * to be shared has already been copied
7975 * with a symmetric copy, but we point
7976 * directly to the object without
7977 * needs_copy set in our entry. (This
7978 * can happen because different ranges
7979 * of an object can be pointed to by
7980 * different entries. In particular,
7981 * a single entry pointing to an object
7982 * can be split by a call to vm_inherit,
7983 * which, combined with task_create, can
7984 * result in the different entries
7985 * having different needs_copy values.)
7986 * The shadowed flag in the object allows
7987 * us to detect this case. The problem
7988 * with this case is that if this object
7989 * has or will have shadows, then we
7990 * must not perform an asymmetric copy
7991 * of this object, since such a copy
7992 * allows the object to be changed, which
7993 * will break the previous symmetrical
7994 * copies (which rely upon the object
7995 * not changing). In a sense, the shadowed
7996 * flag says "don't change this object".
7997 * We fix this by creating a shadow
7998 * object for this object, and sharing
7999 * that. This works because we are free
8000 * to change the shadow object (and thus
8001 * to use an asymmetric copy strategy);
8002 * this is also semantically correct,
8003 * since this object is temporary, and
8004 * therefore a copy of the object is
8005 * as good as the object itself. (This
8006 * is not true for permanent objects,
8007 * since the pager needs to see changes,
8008 * which won't happen if the changes
8009 * are made to a copy.)
8010 *
8011 * The third case is when the object
8012 * to be shared has parts sticking
8013 * outside of the entry we're working
8014 * with, and thus may in the future
8015 * be subject to a symmetrical copy.
8016 * (This is a preemptive version of
8017 * case 2.)
8018 */
1c79356b
A
8019 vm_object_shadow(&old_entry->object.vm_object,
8020 &old_entry->offset,
91447636 8021 (vm_map_size_t) (old_entry->vme_end -
2d21ac55 8022 old_entry->vme_start));
1c79356b
A
8023
8024 /*
8025 * If we're making a shadow for other than
8026 * copy on write reasons, then we have
8027 * to remove write permission.
8028 */
8029
1c79356b
A
8030 if (!old_entry->needs_copy &&
8031 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
8032 vm_prot_t prot;
8033
8034 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
8035
8036 if (override_nx(old_map, old_entry->alias) && prot)
0c530ab8 8037 prot |= VM_PROT_EXECUTE;
2d21ac55 8038
0c530ab8 8039 if (old_map->mapped) {
9bccf70c
A
8040 vm_object_pmap_protect(
8041 old_entry->object.vm_object,
8042 old_entry->offset,
8043 (old_entry->vme_end -
2d21ac55 8044 old_entry->vme_start),
9bccf70c
A
8045 PMAP_NULL,
8046 old_entry->vme_start,
0c530ab8 8047 prot);
1c79356b 8048 } else {
9bccf70c 8049 pmap_protect(old_map->pmap,
2d21ac55
A
8050 old_entry->vme_start,
8051 old_entry->vme_end,
8052 prot);
1c79356b
A
8053 }
8054 }
8055
8056 old_entry->needs_copy = FALSE;
8057 object = old_entry->object.vm_object;
8058 }
6d2010ae 8059
1c79356b
A
8060
8061 /*
8062 * If object was using a symmetric copy strategy,
8063 * change its copy strategy to the default
8064 * asymmetric copy strategy, which is copy_delay
8065 * in the non-norma case and copy_call in the
8066 * norma case. Bump the reference count for the
8067 * new entry.
8068 */
8069
8070 if(old_entry->is_sub_map) {
8071 vm_map_lock(old_entry->object.sub_map);
8072 vm_map_reference(old_entry->object.sub_map);
8073 vm_map_unlock(old_entry->object.sub_map);
8074 } else {
8075 vm_object_lock(object);
2d21ac55 8076 vm_object_reference_locked(object);
1c79356b
A
8077 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8078 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8079 }
8080 vm_object_unlock(object);
8081 }
8082
8083 /*
8084 * Clone the entry, using object ref from above.
8085 * Mark both entries as shared.
8086 */
8087
8088 new_entry = vm_map_entry_create(new_map);
8089 vm_map_entry_copy(new_entry, old_entry);
8090 old_entry->is_shared = TRUE;
8091 new_entry->is_shared = TRUE;
8092
8093 /*
8094 * Insert the entry into the new map -- we
8095 * know we're inserting at the end of the new
8096 * map.
8097 */
8098
6d2010ae 8099 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
1c79356b
A
8100
8101 /*
8102 * Update the physical map
8103 */
8104
8105 if (old_entry->is_sub_map) {
8106 /* Bill Angell pmap support goes here */
8107 } else {
8108 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
8109 old_entry->vme_end - old_entry->vme_start,
8110 old_entry->vme_start);
1c79356b
A
8111 }
8112}
8113
91447636 8114static boolean_t
1c79356b
A
8115vm_map_fork_copy(
8116 vm_map_t old_map,
8117 vm_map_entry_t *old_entry_p,
8118 vm_map_t new_map)
8119{
8120 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
8121 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8122 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
8123 vm_map_copy_t copy;
8124 vm_map_entry_t last = vm_map_last_entry(new_map);
8125
8126 vm_map_unlock(old_map);
8127 /*
8128 * Use maxprot version of copyin because we
8129 * care about whether this memory can ever
8130 * be accessed, not just whether it's accessible
8131 * right now.
8132 */
8133 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8134 != KERN_SUCCESS) {
8135 /*
8136 * The map might have changed while it
8137 * was unlocked, check it again. Skip
8138 * any blank space or permanently
8139 * unreadable region.
8140 */
8141 vm_map_lock(old_map);
8142 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 8143 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
8144 last = last->vme_next;
8145 }
8146 *old_entry_p = last;
8147
8148 /*
8149 * XXX For some error returns, want to
8150 * XXX skip to the next element. Note
8151 * that INVALID_ADDRESS and
8152 * PROTECTION_FAILURE are handled above.
8153 */
8154
8155 return FALSE;
8156 }
8157
8158 /*
8159 * Insert the copy into the new map
8160 */
8161
8162 vm_map_copy_insert(new_map, last, copy);
8163
8164 /*
8165 * Pick up the traversal at the end of
8166 * the copied region.
8167 */
8168
8169 vm_map_lock(old_map);
8170 start += entry_size;
8171 if (! vm_map_lookup_entry(old_map, start, &last)) {
8172 last = last->vme_next;
8173 } else {
2d21ac55
A
8174 if (last->vme_start == start) {
8175 /*
8176 * No need to clip here and we don't
8177 * want to cause any unnecessary
8178 * unnesting...
8179 */
8180 } else {
8181 vm_map_clip_start(old_map, last, start);
8182 }
1c79356b
A
8183 }
8184 *old_entry_p = last;
8185
8186 return TRUE;
8187}
8188
8189/*
8190 * vm_map_fork:
8191 *
8192 * Create and return a new map based on the old
8193 * map, according to the inheritance values on the
8194 * regions in that map.
8195 *
8196 * The source map must not be locked.
8197 */
8198vm_map_t
8199vm_map_fork(
8200 vm_map_t old_map)
8201{
2d21ac55 8202 pmap_t new_pmap;
1c79356b
A
8203 vm_map_t new_map;
8204 vm_map_entry_t old_entry;
91447636 8205 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
8206 vm_map_entry_t new_entry;
8207 boolean_t src_needs_copy;
8208 boolean_t new_entry_needs_copy;
8209
2d21ac55 8210 new_pmap = pmap_create((vm_map_size_t) 0,
b0d623f7
A
8211#if defined(__i386__) || defined(__x86_64__)
8212 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8213#else
8214 0
8215#endif
8216 );
8217#if defined(__i386__)
2d21ac55
A
8218 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8219 pmap_set_4GB_pagezero(new_pmap);
2d21ac55
A
8220#endif
8221
1c79356b
A
8222 vm_map_reference_swap(old_map);
8223 vm_map_lock(old_map);
8224
8225 new_map = vm_map_create(new_pmap,
2d21ac55
A
8226 old_map->min_offset,
8227 old_map->max_offset,
8228 old_map->hdr.entries_pageable);
1c79356b 8229 for (
2d21ac55
A
8230 old_entry = vm_map_first_entry(old_map);
8231 old_entry != vm_map_to_entry(old_map);
8232 ) {
1c79356b
A
8233
8234 entry_size = old_entry->vme_end - old_entry->vme_start;
8235
8236 switch (old_entry->inheritance) {
8237 case VM_INHERIT_NONE:
8238 break;
8239
8240 case VM_INHERIT_SHARE:
8241 vm_map_fork_share(old_map, old_entry, new_map);
8242 new_size += entry_size;
8243 break;
8244
8245 case VM_INHERIT_COPY:
8246
8247 /*
8248 * Inline the copy_quickly case;
8249 * upon failure, fall back on call
8250 * to vm_map_fork_copy.
8251 */
8252
8253 if(old_entry->is_sub_map)
8254 break;
9bccf70c 8255 if ((old_entry->wired_count != 0) ||
2d21ac55
A
8256 ((old_entry->object.vm_object != NULL) &&
8257 (old_entry->object.vm_object->true_share))) {
1c79356b
A
8258 goto slow_vm_map_fork_copy;
8259 }
8260
8261 new_entry = vm_map_entry_create(new_map);
8262 vm_map_entry_copy(new_entry, old_entry);
8263 /* clear address space specifics */
8264 new_entry->use_pmap = FALSE;
8265
8266 if (! vm_object_copy_quickly(
2d21ac55
A
8267 &new_entry->object.vm_object,
8268 old_entry->offset,
8269 (old_entry->vme_end -
8270 old_entry->vme_start),
8271 &src_needs_copy,
8272 &new_entry_needs_copy)) {
1c79356b
A
8273 vm_map_entry_dispose(new_map, new_entry);
8274 goto slow_vm_map_fork_copy;
8275 }
8276
8277 /*
8278 * Handle copy-on-write obligations
8279 */
8280
8281 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
8282 vm_prot_t prot;
8283
8284 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
8285
8286 if (override_nx(old_map, old_entry->alias) && prot)
0c530ab8 8287 prot |= VM_PROT_EXECUTE;
2d21ac55 8288
1c79356b
A
8289 vm_object_pmap_protect(
8290 old_entry->object.vm_object,
8291 old_entry->offset,
8292 (old_entry->vme_end -
2d21ac55 8293 old_entry->vme_start),
1c79356b 8294 ((old_entry->is_shared
2d21ac55
A
8295 || old_map->mapped)
8296 ? PMAP_NULL :
8297 old_map->pmap),
1c79356b 8298 old_entry->vme_start,
0c530ab8 8299 prot);
1c79356b
A
8300
8301 old_entry->needs_copy = TRUE;
8302 }
8303 new_entry->needs_copy = new_entry_needs_copy;
8304
8305 /*
8306 * Insert the entry at the end
8307 * of the map.
8308 */
8309
6d2010ae 8310 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
8311 new_entry);
8312 new_size += entry_size;
8313 break;
8314
8315 slow_vm_map_fork_copy:
8316 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8317 new_size += entry_size;
8318 }
8319 continue;
8320 }
8321 old_entry = old_entry->vme_next;
8322 }
8323
8324 new_map->size = new_size;
8325 vm_map_unlock(old_map);
8326 vm_map_deallocate(old_map);
8327
8328 return(new_map);
8329}
8330
2d21ac55
A
8331/*
8332 * vm_map_exec:
8333 *
8334 * Setup the "new_map" with the proper execution environment according
8335 * to the type of executable (platform, 64bit, chroot environment).
8336 * Map the comm page and shared region, etc...
8337 */
8338kern_return_t
8339vm_map_exec(
8340 vm_map_t new_map,
8341 task_t task,
8342 void *fsroot,
8343 cpu_type_t cpu)
8344{
8345 SHARED_REGION_TRACE_DEBUG(
8346 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8347 current_task(), new_map, task, fsroot, cpu));
8348 (void) vm_commpage_enter(new_map, task);
8349 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8350 SHARED_REGION_TRACE_DEBUG(
8351 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8352 current_task(), new_map, task, fsroot, cpu));
8353 return KERN_SUCCESS;
8354}
1c79356b
A
8355
8356/*
8357 * vm_map_lookup_locked:
8358 *
8359 * Finds the VM object, offset, and
8360 * protection for a given virtual address in the
8361 * specified map, assuming a page fault of the
8362 * type specified.
8363 *
8364 * Returns the (object, offset, protection) for
8365 * this address, whether it is wired down, and whether
8366 * this map has the only reference to the data in question.
8367 * In order to later verify this lookup, a "version"
8368 * is returned.
8369 *
8370 * The map MUST be locked by the caller and WILL be
8371 * locked on exit. In order to guarantee the
8372 * existence of the returned object, it is returned
8373 * locked.
8374 *
8375 * If a lookup is requested with "write protection"
8376 * specified, the map may be changed to perform virtual
8377 * copying operations, although the data referenced will
8378 * remain the same.
8379 */
8380kern_return_t
8381vm_map_lookup_locked(
8382 vm_map_t *var_map, /* IN/OUT */
2d21ac55 8383 vm_map_offset_t vaddr,
91447636 8384 vm_prot_t fault_type,
2d21ac55 8385 int object_lock_type,
1c79356b
A
8386 vm_map_version_t *out_version, /* OUT */
8387 vm_object_t *object, /* OUT */
8388 vm_object_offset_t *offset, /* OUT */
8389 vm_prot_t *out_prot, /* OUT */
8390 boolean_t *wired, /* OUT */
2d21ac55 8391 vm_object_fault_info_t fault_info, /* OUT */
91447636 8392 vm_map_t *real_map)
1c79356b
A
8393{
8394 vm_map_entry_t entry;
8395 register vm_map_t map = *var_map;
8396 vm_map_t old_map = *var_map;
8397 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
8398 vm_map_offset_t cow_parent_vaddr = 0;
8399 vm_map_offset_t old_start = 0;
8400 vm_map_offset_t old_end = 0;
1c79356b 8401 register vm_prot_t prot;
6d2010ae
A
8402 boolean_t mask_protections;
8403 vm_prot_t original_fault_type;
8404
8405 /*
8406 * VM_PROT_MASK means that the caller wants us to use "fault_type"
8407 * as a mask against the mapping's actual protections, not as an
8408 * absolute value.
8409 */
8410 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8411 fault_type &= ~VM_PROT_IS_MASK;
8412 original_fault_type = fault_type;
1c79356b 8413
91447636 8414 *real_map = map;
6d2010ae
A
8415
8416RetryLookup:
8417 fault_type = original_fault_type;
1c79356b
A
8418
8419 /*
8420 * If the map has an interesting hint, try it before calling
8421 * full blown lookup routine.
8422 */
1c79356b 8423 entry = map->hint;
1c79356b
A
8424
8425 if ((entry == vm_map_to_entry(map)) ||
8426 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8427 vm_map_entry_t tmp_entry;
8428
8429 /*
8430 * Entry was either not a valid hint, or the vaddr
8431 * was not contained in the entry, so do a full lookup.
8432 */
8433 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8434 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8435 vm_map_unlock(cow_sub_map_parent);
91447636 8436 if((*real_map != map)
2d21ac55 8437 && (*real_map != cow_sub_map_parent))
91447636 8438 vm_map_unlock(*real_map);
1c79356b
A
8439 return KERN_INVALID_ADDRESS;
8440 }
8441
8442 entry = tmp_entry;
8443 }
8444 if(map == old_map) {
8445 old_start = entry->vme_start;
8446 old_end = entry->vme_end;
8447 }
8448
8449 /*
8450 * Handle submaps. Drop lock on upper map, submap is
8451 * returned locked.
8452 */
8453
8454submap_recurse:
8455 if (entry->is_sub_map) {
91447636
A
8456 vm_map_offset_t local_vaddr;
8457 vm_map_offset_t end_delta;
8458 vm_map_offset_t start_delta;
1c79356b
A
8459 vm_map_entry_t submap_entry;
8460 boolean_t mapped_needs_copy=FALSE;
8461
8462 local_vaddr = vaddr;
8463
2d21ac55 8464 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
91447636
A
8465 /* if real_map equals map we unlock below */
8466 if ((*real_map != map) &&
2d21ac55 8467 (*real_map != cow_sub_map_parent))
91447636
A
8468 vm_map_unlock(*real_map);
8469 *real_map = entry->object.sub_map;
1c79356b
A
8470 }
8471
2d21ac55 8472 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
1c79356b
A
8473 if (!mapped_needs_copy) {
8474 if (vm_map_lock_read_to_write(map)) {
8475 vm_map_lock_read(map);
2d21ac55 8476 /* XXX FBDP: entry still valid ? */
91447636
A
8477 if(*real_map == entry->object.sub_map)
8478 *real_map = map;
1c79356b
A
8479 goto RetryLookup;
8480 }
8481 vm_map_lock_read(entry->object.sub_map);
8482 cow_sub_map_parent = map;
8483 /* reset base to map before cow object */
8484 /* this is the map which will accept */
8485 /* the new cow object */
8486 old_start = entry->vme_start;
8487 old_end = entry->vme_end;
8488 cow_parent_vaddr = vaddr;
8489 mapped_needs_copy = TRUE;
8490 } else {
8491 vm_map_lock_read(entry->object.sub_map);
8492 if((cow_sub_map_parent != map) &&
2d21ac55 8493 (*real_map != map))
1c79356b
A
8494 vm_map_unlock(map);
8495 }
8496 } else {
8497 vm_map_lock_read(entry->object.sub_map);
8498 /* leave map locked if it is a target */
8499 /* cow sub_map above otherwise, just */
8500 /* follow the maps down to the object */
8501 /* here we unlock knowing we are not */
8502 /* revisiting the map. */
91447636 8503 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
8504 vm_map_unlock_read(map);
8505 }
8506
2d21ac55 8507 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
1c79356b
A
8508 *var_map = map = entry->object.sub_map;
8509
8510 /* calculate the offset in the submap for vaddr */
8511 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8512
2d21ac55 8513 RetrySubMap:
1c79356b
A
8514 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8515 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8516 vm_map_unlock(cow_sub_map_parent);
8517 }
91447636 8518 if((*real_map != map)
2d21ac55 8519 && (*real_map != cow_sub_map_parent)) {
91447636 8520 vm_map_unlock(*real_map);
1c79356b 8521 }
91447636 8522 *real_map = map;
1c79356b
A
8523 return KERN_INVALID_ADDRESS;
8524 }
2d21ac55 8525
1c79356b
A
8526 /* find the attenuated shadow of the underlying object */
8527 /* on our target map */
8528
8529 /* in english the submap object may extend beyond the */
8530 /* region mapped by the entry or, may only fill a portion */
8531 /* of it. For our purposes, we only care if the object */
8532 /* doesn't fill. In this case the area which will */
8533 /* ultimately be clipped in the top map will only need */
8534 /* to be as big as the portion of the underlying entry */
8535 /* which is mapped */
8536 start_delta = submap_entry->vme_start > entry->offset ?
2d21ac55 8537 submap_entry->vme_start - entry->offset : 0;
1c79356b
A
8538
8539 end_delta =
2d21ac55 8540 (entry->offset + start_delta + (old_end - old_start)) <=
1c79356b 8541 submap_entry->vme_end ?
2d21ac55
A
8542 0 : (entry->offset +
8543 (old_end - old_start))
8544 - submap_entry->vme_end;
1c79356b
A
8545
8546 old_start += start_delta;
8547 old_end -= end_delta;
8548
8549 if(submap_entry->is_sub_map) {
8550 entry = submap_entry;
8551 vaddr = local_vaddr;
8552 goto submap_recurse;
8553 }
8554
8555 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8556
2d21ac55
A
8557 vm_object_t sub_object, copy_object;
8558 vm_object_offset_t copy_offset;
91447636
A
8559 vm_map_offset_t local_start;
8560 vm_map_offset_t local_end;
0b4e3aa0 8561 boolean_t copied_slowly = FALSE;
1c79356b
A
8562
8563 if (vm_map_lock_read_to_write(map)) {
8564 vm_map_lock_read(map);
8565 old_start -= start_delta;
8566 old_end += end_delta;
8567 goto RetrySubMap;
8568 }
0b4e3aa0
A
8569
8570
2d21ac55
A
8571 sub_object = submap_entry->object.vm_object;
8572 if (sub_object == VM_OBJECT_NULL) {
8573 sub_object =
1c79356b 8574 vm_object_allocate(
91447636 8575 (vm_map_size_t)
2d21ac55
A
8576 (submap_entry->vme_end -
8577 submap_entry->vme_start));
8578 submap_entry->object.vm_object = sub_object;
91447636 8579 submap_entry->offset = 0;
1c79356b
A
8580 }
8581 local_start = local_vaddr -
2d21ac55 8582 (cow_parent_vaddr - old_start);
1c79356b 8583 local_end = local_vaddr +
2d21ac55 8584 (old_end - cow_parent_vaddr);
1c79356b
A
8585 vm_map_clip_start(map, submap_entry, local_start);
8586 vm_map_clip_end(map, submap_entry, local_end);
2d21ac55
A
8587 /* unnesting was done in vm_map_clip_start/end() */
8588 assert(!submap_entry->use_pmap);
1c79356b
A
8589
8590 /* This is the COW case, lets connect */
8591 /* an entry in our space to the underlying */
8592 /* object in the submap, bypassing the */
8593 /* submap. */
0b4e3aa0
A
8594
8595
2d21ac55 8596 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
8597 (sub_object->copy_strategy ==
8598 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
8599 vm_object_lock(sub_object);
8600 vm_object_copy_slowly(sub_object,
8601 submap_entry->offset,
8602 (submap_entry->vme_end -
8603 submap_entry->vme_start),
8604 FALSE,
8605 &copy_object);
8606 copied_slowly = TRUE;
0b4e3aa0 8607 } else {
2d21ac55 8608
0b4e3aa0 8609 /* set up shadow object */
2d21ac55 8610 copy_object = sub_object;
0b4e3aa0 8611 vm_object_reference(copy_object);
2d21ac55 8612 sub_object->shadowed = TRUE;
0b4e3aa0 8613 submap_entry->needs_copy = TRUE;
0c530ab8
A
8614
8615 prot = submap_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
8616
8617 if (override_nx(map, submap_entry->alias) && prot)
0c530ab8 8618 prot |= VM_PROT_EXECUTE;
2d21ac55 8619
0b4e3aa0 8620 vm_object_pmap_protect(
2d21ac55 8621 sub_object,
1c79356b
A
8622 submap_entry->offset,
8623 submap_entry->vme_end -
2d21ac55 8624 submap_entry->vme_start,
9bccf70c 8625 (submap_entry->is_shared
2d21ac55
A
8626 || map->mapped) ?
8627 PMAP_NULL : map->pmap,
1c79356b 8628 submap_entry->vme_start,
0c530ab8 8629 prot);
0b4e3aa0 8630 }
1c79356b 8631
2d21ac55
A
8632 /*
8633 * Adjust the fault offset to the submap entry.
8634 */
8635 copy_offset = (local_vaddr -
8636 submap_entry->vme_start +
8637 submap_entry->offset);
1c79356b
A
8638
8639 /* This works diffently than the */
8640 /* normal submap case. We go back */
8641 /* to the parent of the cow map and*/
8642 /* clip out the target portion of */
8643 /* the sub_map, substituting the */
8644 /* new copy object, */
8645
8646 vm_map_unlock(map);
8647 local_start = old_start;
8648 local_end = old_end;
8649 map = cow_sub_map_parent;
8650 *var_map = cow_sub_map_parent;
8651 vaddr = cow_parent_vaddr;
8652 cow_sub_map_parent = NULL;
8653
2d21ac55
A
8654 if(!vm_map_lookup_entry(map,
8655 vaddr, &entry)) {
8656 vm_object_deallocate(
8657 copy_object);
8658 vm_map_lock_write_to_read(map);
8659 return KERN_INVALID_ADDRESS;
8660 }
8661
8662 /* clip out the portion of space */
8663 /* mapped by the sub map which */
8664 /* corresponds to the underlying */
8665 /* object */
8666
8667 /*
8668 * Clip (and unnest) the smallest nested chunk
8669 * possible around the faulting address...
8670 */
8671 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8672 local_end = local_start + pmap_nesting_size_min;
8673 /*
8674 * ... but don't go beyond the "old_start" to "old_end"
8675 * range, to avoid spanning over another VM region
8676 * with a possibly different VM object and/or offset.
8677 */
8678 if (local_start < old_start) {
8679 local_start = old_start;
8680 }
8681 if (local_end > old_end) {
8682 local_end = old_end;
8683 }
8684 /*
8685 * Adjust copy_offset to the start of the range.
8686 */
8687 copy_offset -= (vaddr - local_start);
8688
1c79356b
A
8689 vm_map_clip_start(map, entry, local_start);
8690 vm_map_clip_end(map, entry, local_end);
2d21ac55
A
8691 /* unnesting was done in vm_map_clip_start/end() */
8692 assert(!entry->use_pmap);
1c79356b
A
8693
8694 /* substitute copy object for */
8695 /* shared map entry */
8696 vm_map_deallocate(entry->object.sub_map);
8697 entry->is_sub_map = FALSE;
1c79356b 8698 entry->object.vm_object = copy_object;
1c79356b 8699
2d21ac55
A
8700 /* propagate the submap entry's protections */
8701 entry->protection |= submap_entry->protection;
8702 entry->max_protection |= submap_entry->max_protection;
8703
0b4e3aa0 8704 if(copied_slowly) {
4a3eedf9 8705 entry->offset = local_start - old_start;
0b4e3aa0
A
8706 entry->needs_copy = FALSE;
8707 entry->is_shared = FALSE;
8708 } else {
2d21ac55 8709 entry->offset = copy_offset;
0b4e3aa0
A
8710 entry->needs_copy = TRUE;
8711 if(entry->inheritance == VM_INHERIT_SHARE)
8712 entry->inheritance = VM_INHERIT_COPY;
8713 if (map != old_map)
8714 entry->is_shared = TRUE;
8715 }
1c79356b 8716 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 8717 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
8718
8719 vm_map_lock_write_to_read(map);
8720 } else {
8721 if((cow_sub_map_parent)
2d21ac55
A
8722 && (cow_sub_map_parent != *real_map)
8723 && (cow_sub_map_parent != map)) {
1c79356b
A
8724 vm_map_unlock(cow_sub_map_parent);
8725 }
8726 entry = submap_entry;
8727 vaddr = local_vaddr;
8728 }
8729 }
8730
8731 /*
8732 * Check whether this task is allowed to have
8733 * this page.
8734 */
2d21ac55 8735
6601e61a 8736 prot = entry->protection;
0c530ab8 8737
2d21ac55 8738 if (override_nx(map, entry->alias) && prot) {
0c530ab8 8739 /*
2d21ac55 8740 * HACK -- if not a stack, then allow execution
0c530ab8
A
8741 */
8742 prot |= VM_PROT_EXECUTE;
2d21ac55
A
8743 }
8744
6d2010ae
A
8745 if (mask_protections) {
8746 fault_type &= prot;
8747 if (fault_type == VM_PROT_NONE) {
8748 goto protection_failure;
8749 }
8750 }
1c79356b 8751 if ((fault_type & (prot)) != fault_type) {
6d2010ae 8752 protection_failure:
2d21ac55
A
8753 if (*real_map != map) {
8754 vm_map_unlock(*real_map);
0c530ab8
A
8755 }
8756 *real_map = map;
8757
8758 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 8759 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 8760
2d21ac55 8761 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 8762 return KERN_PROTECTION_FAILURE;
1c79356b
A
8763 }
8764
8765 /*
8766 * If this page is not pageable, we have to get
8767 * it for all possible accesses.
8768 */
8769
91447636
A
8770 *wired = (entry->wired_count != 0);
8771 if (*wired)
0c530ab8 8772 fault_type = prot;
1c79356b
A
8773
8774 /*
8775 * If the entry was copy-on-write, we either ...
8776 */
8777
8778 if (entry->needs_copy) {
8779 /*
8780 * If we want to write the page, we may as well
8781 * handle that now since we've got the map locked.
8782 *
8783 * If we don't need to write the page, we just
8784 * demote the permissions allowed.
8785 */
8786
91447636 8787 if ((fault_type & VM_PROT_WRITE) || *wired) {
1c79356b
A
8788 /*
8789 * Make a new object, and place it in the
8790 * object chain. Note that no new references
8791 * have appeared -- one just moved from the
8792 * map to the new object.
8793 */
8794
8795 if (vm_map_lock_read_to_write(map)) {
8796 vm_map_lock_read(map);
8797 goto RetryLookup;
8798 }
8799 vm_object_shadow(&entry->object.vm_object,
8800 &entry->offset,
91447636 8801 (vm_map_size_t) (entry->vme_end -
2d21ac55 8802 entry->vme_start));
1c79356b
A
8803
8804 entry->object.vm_object->shadowed = TRUE;
8805 entry->needs_copy = FALSE;
8806 vm_map_lock_write_to_read(map);
8807 }
8808 else {
8809 /*
8810 * We're attempting to read a copy-on-write
8811 * page -- don't allow writes.
8812 */
8813
8814 prot &= (~VM_PROT_WRITE);
8815 }
8816 }
8817
8818 /*
8819 * Create an object if necessary.
8820 */
8821 if (entry->object.vm_object == VM_OBJECT_NULL) {
8822
8823 if (vm_map_lock_read_to_write(map)) {
8824 vm_map_lock_read(map);
8825 goto RetryLookup;
8826 }
8827
8828 entry->object.vm_object = vm_object_allocate(
91447636 8829 (vm_map_size_t)(entry->vme_end - entry->vme_start));
1c79356b
A
8830 entry->offset = 0;
8831 vm_map_lock_write_to_read(map);
8832 }
8833
8834 /*
8835 * Return the object/offset from this entry. If the entry
8836 * was copy-on-write or empty, it has been fixed up. Also
8837 * return the protection.
8838 */
8839
8840 *offset = (vaddr - entry->vme_start) + entry->offset;
8841 *object = entry->object.vm_object;
8842 *out_prot = prot;
2d21ac55
A
8843
8844 if (fault_info) {
8845 fault_info->interruptible = THREAD_UNINT; /* for now... */
8846 /* ... the caller will change "interruptible" if needed */
8847 fault_info->cluster_size = 0;
8848 fault_info->user_tag = entry->alias;
8849 fault_info->behavior = entry->behavior;
8850 fault_info->lo_offset = entry->offset;
8851 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8852 fault_info->no_cache = entry->no_cache;
b0d623f7 8853 fault_info->stealth = FALSE;
6d2010ae
A
8854 fault_info->io_sync = FALSE;
8855 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
0b4c1975 8856 fault_info->mark_zf_absent = FALSE;
2d21ac55 8857 }
1c79356b
A
8858
8859 /*
8860 * Lock the object to prevent it from disappearing
8861 */
2d21ac55
A
8862 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8863 vm_object_lock(*object);
8864 else
8865 vm_object_lock_shared(*object);
8866
1c79356b
A
8867 /*
8868 * Save the version number
8869 */
8870
8871 out_version->main_timestamp = map->timestamp;
8872
8873 return KERN_SUCCESS;
8874}
8875
8876
8877/*
8878 * vm_map_verify:
8879 *
8880 * Verifies that the map in question has not changed
8881 * since the given version. If successful, the map
8882 * will not change until vm_map_verify_done() is called.
8883 */
8884boolean_t
8885vm_map_verify(
8886 register vm_map_t map,
8887 register vm_map_version_t *version) /* REF */
8888{
8889 boolean_t result;
8890
8891 vm_map_lock_read(map);
8892 result = (map->timestamp == version->main_timestamp);
8893
8894 if (!result)
8895 vm_map_unlock_read(map);
8896
8897 return(result);
8898}
8899
8900/*
8901 * vm_map_verify_done:
8902 *
8903 * Releases locks acquired by a vm_map_verify.
8904 *
8905 * This is now a macro in vm/vm_map.h. It does a
8906 * vm_map_unlock_read on the map.
8907 */
8908
8909
91447636
A
8910/*
8911 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8912 * Goes away after regular vm_region_recurse function migrates to
8913 * 64 bits
8914 * vm_region_recurse: A form of vm_region which follows the
8915 * submaps in a target map
8916 *
8917 */
8918
8919kern_return_t
8920vm_map_region_recurse_64(
8921 vm_map_t map,
8922 vm_map_offset_t *address, /* IN/OUT */
8923 vm_map_size_t *size, /* OUT */
8924 natural_t *nesting_depth, /* IN/OUT */
8925 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8926 mach_msg_type_number_t *count) /* IN/OUT */
8927{
8928 vm_region_extended_info_data_t extended;
8929 vm_map_entry_t tmp_entry;
8930 vm_map_offset_t user_address;
8931 unsigned int user_max_depth;
8932
8933 /*
8934 * "curr_entry" is the VM map entry preceding or including the
8935 * address we're looking for.
8936 * "curr_map" is the map or sub-map containing "curr_entry".
6d2010ae
A
8937 * "curr_address" is the equivalent of the top map's "user_address"
8938 * in the current map.
91447636
A
8939 * "curr_offset" is the cumulated offset of "curr_map" in the
8940 * target task's address space.
8941 * "curr_depth" is the depth of "curr_map" in the chain of
8942 * sub-maps.
6d2010ae
A
8943 *
8944 * "curr_max_below" and "curr_max_above" limit the range (around
8945 * "curr_address") we should take into account in the current (sub)map.
8946 * They limit the range to what's visible through the map entries
8947 * we've traversed from the top map to the current map.
8948
91447636
A
8949 */
8950 vm_map_entry_t curr_entry;
6d2010ae 8951 vm_map_address_t curr_address;
91447636
A
8952 vm_map_offset_t curr_offset;
8953 vm_map_t curr_map;
8954 unsigned int curr_depth;
6d2010ae
A
8955 vm_map_offset_t curr_max_below, curr_max_above;
8956 vm_map_offset_t curr_skip;
91447636
A
8957
8958 /*
8959 * "next_" is the same as "curr_" but for the VM region immediately
8960 * after the address we're looking for. We need to keep track of this
8961 * too because we want to return info about that region if the
8962 * address we're looking for is not mapped.
8963 */
8964 vm_map_entry_t next_entry;
8965 vm_map_offset_t next_offset;
6d2010ae 8966 vm_map_offset_t next_address;
91447636
A
8967 vm_map_t next_map;
8968 unsigned int next_depth;
6d2010ae
A
8969 vm_map_offset_t next_max_below, next_max_above;
8970 vm_map_offset_t next_skip;
91447636 8971
2d21ac55
A
8972 boolean_t look_for_pages;
8973 vm_region_submap_short_info_64_t short_info;
8974
91447636
A
8975 if (map == VM_MAP_NULL) {
8976 /* no address space to work on */
8977 return KERN_INVALID_ARGUMENT;
8978 }
8979
8980 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
2d21ac55
A
8981 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8982 /*
8983 * "info" structure is not big enough and
8984 * would overflow
8985 */
8986 return KERN_INVALID_ARGUMENT;
8987 } else {
8988 look_for_pages = FALSE;
8989 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8990 short_info = (vm_region_submap_short_info_64_t) submap_info;
8991 submap_info = NULL;
8992 }
8993 } else {
8994 look_for_pages = TRUE;
8995 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8996 short_info = NULL;
91447636
A
8997 }
8998
91447636
A
8999
9000 user_address = *address;
9001 user_max_depth = *nesting_depth;
9002
9003 curr_entry = NULL;
9004 curr_map = map;
6d2010ae 9005 curr_address = user_address;
91447636 9006 curr_offset = 0;
6d2010ae 9007 curr_skip = 0;
91447636 9008 curr_depth = 0;
6d2010ae
A
9009 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9010 curr_max_below = curr_address;
91447636
A
9011
9012 next_entry = NULL;
9013 next_map = NULL;
6d2010ae 9014 next_address = 0;
91447636 9015 next_offset = 0;
6d2010ae 9016 next_skip = 0;
91447636 9017 next_depth = 0;
6d2010ae
A
9018 next_max_above = (vm_map_offset_t) -1;
9019 next_max_below = (vm_map_offset_t) -1;
91447636
A
9020
9021 if (not_in_kdp) {
9022 vm_map_lock_read(curr_map);
9023 }
9024
9025 for (;;) {
9026 if (vm_map_lookup_entry(curr_map,
6d2010ae 9027 curr_address,
91447636
A
9028 &tmp_entry)) {
9029 /* tmp_entry contains the address we're looking for */
9030 curr_entry = tmp_entry;
9031 } else {
6d2010ae 9032 vm_map_offset_t skip;
91447636
A
9033 /*
9034 * The address is not mapped. "tmp_entry" is the
9035 * map entry preceding the address. We want the next
9036 * one, if it exists.
9037 */
9038 curr_entry = tmp_entry->vme_next;
6d2010ae 9039
91447636 9040 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
9041 (curr_entry->vme_start >=
9042 curr_address + curr_max_above)) {
91447636
A
9043 /* no next entry at this level: stop looking */
9044 if (not_in_kdp) {
9045 vm_map_unlock_read(curr_map);
9046 }
9047 curr_entry = NULL;
9048 curr_map = NULL;
9049 curr_offset = 0;
9050 curr_depth = 0;
6d2010ae
A
9051 curr_max_above = 0;
9052 curr_max_below = 0;
91447636
A
9053 break;
9054 }
6d2010ae
A
9055
9056 /* adjust current address and offset */
9057 skip = curr_entry->vme_start - curr_address;
9058 curr_address = curr_entry->vme_start;
9059 curr_skip = skip;
9060 curr_offset += skip;
9061 curr_max_above -= skip;
9062 curr_max_below = 0;
91447636
A
9063 }
9064
9065 /*
9066 * Is the next entry at this level closer to the address (or
9067 * deeper in the submap chain) than the one we had
9068 * so far ?
9069 */
9070 tmp_entry = curr_entry->vme_next;
9071 if (tmp_entry == vm_map_to_entry(curr_map)) {
9072 /* no next entry at this level */
6d2010ae
A
9073 } else if (tmp_entry->vme_start >=
9074 curr_address + curr_max_above) {
91447636
A
9075 /*
9076 * tmp_entry is beyond the scope of what we mapped of
9077 * this submap in the upper level: ignore it.
9078 */
9079 } else if ((next_entry == NULL) ||
9080 (tmp_entry->vme_start + curr_offset <=
9081 next_entry->vme_start + next_offset)) {
9082 /*
9083 * We didn't have a "next_entry" or this one is
9084 * closer to the address we're looking for:
9085 * use this "tmp_entry" as the new "next_entry".
9086 */
9087 if (next_entry != NULL) {
9088 /* unlock the last "next_map" */
9089 if (next_map != curr_map && not_in_kdp) {
9090 vm_map_unlock_read(next_map);
9091 }
9092 }
9093 next_entry = tmp_entry;
9094 next_map = curr_map;
91447636 9095 next_depth = curr_depth;
6d2010ae
A
9096 next_address = next_entry->vme_start;
9097 next_skip = curr_skip;
9098 next_offset = curr_offset;
9099 next_offset += (next_address - curr_address);
9100 next_max_above = MIN(next_max_above, curr_max_above);
9101 next_max_above = MIN(next_max_above,
9102 next_entry->vme_end - next_address);
9103 next_max_below = MIN(next_max_below, curr_max_below);
9104 next_max_below = MIN(next_max_below,
9105 next_address - next_entry->vme_start);
91447636
A
9106 }
9107
6d2010ae
A
9108 /*
9109 * "curr_max_{above,below}" allow us to keep track of the
9110 * portion of the submap that is actually mapped at this level:
9111 * the rest of that submap is irrelevant to us, since it's not
9112 * mapped here.
9113 * The relevant portion of the map starts at
9114 * "curr_entry->offset" up to the size of "curr_entry".
9115 */
9116 curr_max_above = MIN(curr_max_above,
9117 curr_entry->vme_end - curr_address);
9118 curr_max_below = MIN(curr_max_below,
9119 curr_address - curr_entry->vme_start);
9120
91447636
A
9121 if (!curr_entry->is_sub_map ||
9122 curr_depth >= user_max_depth) {
9123 /*
9124 * We hit a leaf map or we reached the maximum depth
9125 * we could, so stop looking. Keep the current map
9126 * locked.
9127 */
9128 break;
9129 }
9130
9131 /*
9132 * Get down to the next submap level.
9133 */
9134
9135 /*
9136 * Lock the next level and unlock the current level,
9137 * unless we need to keep it locked to access the "next_entry"
9138 * later.
9139 */
9140 if (not_in_kdp) {
9141 vm_map_lock_read(curr_entry->object.sub_map);
9142 }
9143 if (curr_map == next_map) {
9144 /* keep "next_map" locked in case we need it */
9145 } else {
9146 /* release this map */
b0d623f7
A
9147 if (not_in_kdp)
9148 vm_map_unlock_read(curr_map);
91447636
A
9149 }
9150
9151 /*
9152 * Adjust the offset. "curr_entry" maps the submap
9153 * at relative address "curr_entry->vme_start" in the
9154 * curr_map but skips the first "curr_entry->offset"
9155 * bytes of the submap.
9156 * "curr_offset" always represents the offset of a virtual
9157 * address in the curr_map relative to the absolute address
9158 * space (i.e. the top-level VM map).
9159 */
9160 curr_offset +=
6d2010ae
A
9161 (curr_entry->offset - curr_entry->vme_start);
9162 curr_address = user_address + curr_offset;
91447636
A
9163 /* switch to the submap */
9164 curr_map = curr_entry->object.sub_map;
9165 curr_depth++;
91447636
A
9166 curr_entry = NULL;
9167 }
9168
9169 if (curr_entry == NULL) {
9170 /* no VM region contains the address... */
9171 if (next_entry == NULL) {
9172 /* ... and no VM region follows it either */
9173 return KERN_INVALID_ADDRESS;
9174 }
9175 /* ... gather info about the next VM region */
9176 curr_entry = next_entry;
9177 curr_map = next_map; /* still locked ... */
6d2010ae
A
9178 curr_address = next_address;
9179 curr_skip = next_skip;
91447636
A
9180 curr_offset = next_offset;
9181 curr_depth = next_depth;
6d2010ae
A
9182 curr_max_above = next_max_above;
9183 curr_max_below = next_max_below;
9184 if (curr_map == map) {
9185 user_address = curr_address;
9186 }
91447636
A
9187 } else {
9188 /* we won't need "next_entry" after all */
9189 if (next_entry != NULL) {
9190 /* release "next_map" */
9191 if (next_map != curr_map && not_in_kdp) {
9192 vm_map_unlock_read(next_map);
9193 }
9194 }
9195 }
9196 next_entry = NULL;
9197 next_map = NULL;
9198 next_offset = 0;
6d2010ae 9199 next_skip = 0;
91447636 9200 next_depth = 0;
6d2010ae
A
9201 next_max_below = -1;
9202 next_max_above = -1;
91447636
A
9203
9204 *nesting_depth = curr_depth;
6d2010ae
A
9205 *size = curr_max_above + curr_max_below;
9206 *address = user_address + curr_skip - curr_max_below;
91447636 9207
b0d623f7
A
9208// LP64todo: all the current tools are 32bit, obviously never worked for 64b
9209// so probably should be a real 32b ID vs. ptr.
9210// Current users just check for equality
9211#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9212
2d21ac55
A
9213 if (look_for_pages) {
9214 submap_info->user_tag = curr_entry->alias;
9215 submap_info->offset = curr_entry->offset;
9216 submap_info->protection = curr_entry->protection;
9217 submap_info->inheritance = curr_entry->inheritance;
9218 submap_info->max_protection = curr_entry->max_protection;
9219 submap_info->behavior = curr_entry->behavior;
9220 submap_info->user_wired_count = curr_entry->user_wired_count;
9221 submap_info->is_submap = curr_entry->is_sub_map;
b0d623f7 9222 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
2d21ac55
A
9223 } else {
9224 short_info->user_tag = curr_entry->alias;
9225 short_info->offset = curr_entry->offset;
9226 short_info->protection = curr_entry->protection;
9227 short_info->inheritance = curr_entry->inheritance;
9228 short_info->max_protection = curr_entry->max_protection;
9229 short_info->behavior = curr_entry->behavior;
9230 short_info->user_wired_count = curr_entry->user_wired_count;
9231 short_info->is_submap = curr_entry->is_sub_map;
b0d623f7 9232 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
2d21ac55 9233 }
91447636
A
9234
9235 extended.pages_resident = 0;
9236 extended.pages_swapped_out = 0;
9237 extended.pages_shared_now_private = 0;
9238 extended.pages_dirtied = 0;
9239 extended.external_pager = 0;
9240 extended.shadow_depth = 0;
9241
9242 if (not_in_kdp) {
9243 if (!curr_entry->is_sub_map) {
6d2010ae
A
9244 vm_map_offset_t range_start, range_end;
9245 range_start = MAX((curr_address - curr_max_below),
9246 curr_entry->vme_start);
9247 range_end = MIN((curr_address + curr_max_above),
9248 curr_entry->vme_end);
91447636 9249 vm_map_region_walk(curr_map,
6d2010ae 9250 range_start,
91447636 9251 curr_entry,
6d2010ae
A
9252 (curr_entry->offset +
9253 (range_start -
9254 curr_entry->vme_start)),
9255 range_end - range_start,
2d21ac55
A
9256 &extended,
9257 look_for_pages);
91447636
A
9258 if (extended.external_pager &&
9259 extended.ref_count == 2 &&
9260 extended.share_mode == SM_SHARED) {
2d21ac55 9261 extended.share_mode = SM_PRIVATE;
91447636 9262 }
91447636
A
9263 } else {
9264 if (curr_entry->use_pmap) {
2d21ac55 9265 extended.share_mode = SM_TRUESHARED;
91447636 9266 } else {
2d21ac55 9267 extended.share_mode = SM_PRIVATE;
91447636 9268 }
2d21ac55 9269 extended.ref_count =
91447636
A
9270 curr_entry->object.sub_map->ref_count;
9271 }
9272 }
9273
2d21ac55
A
9274 if (look_for_pages) {
9275 submap_info->pages_resident = extended.pages_resident;
9276 submap_info->pages_swapped_out = extended.pages_swapped_out;
9277 submap_info->pages_shared_now_private =
9278 extended.pages_shared_now_private;
9279 submap_info->pages_dirtied = extended.pages_dirtied;
9280 submap_info->external_pager = extended.external_pager;
9281 submap_info->shadow_depth = extended.shadow_depth;
9282 submap_info->share_mode = extended.share_mode;
9283 submap_info->ref_count = extended.ref_count;
9284 } else {
9285 short_info->external_pager = extended.external_pager;
9286 short_info->shadow_depth = extended.shadow_depth;
9287 short_info->share_mode = extended.share_mode;
9288 short_info->ref_count = extended.ref_count;
9289 }
91447636
A
9290
9291 if (not_in_kdp) {
9292 vm_map_unlock_read(curr_map);
9293 }
9294
9295 return KERN_SUCCESS;
9296}
9297
1c79356b
A
9298/*
9299 * vm_region:
9300 *
9301 * User call to obtain information about a region in
9302 * a task's address map. Currently, only one flavor is
9303 * supported.
9304 *
9305 * XXX The reserved and behavior fields cannot be filled
9306 * in until the vm merge from the IK is completed, and
9307 * vm_reserve is implemented.
1c79356b
A
9308 */
9309
9310kern_return_t
91447636 9311vm_map_region(
1c79356b 9312 vm_map_t map,
91447636
A
9313 vm_map_offset_t *address, /* IN/OUT */
9314 vm_map_size_t *size, /* OUT */
1c79356b
A
9315 vm_region_flavor_t flavor, /* IN */
9316 vm_region_info_t info, /* OUT */
91447636
A
9317 mach_msg_type_number_t *count, /* IN/OUT */
9318 mach_port_t *object_name) /* OUT */
1c79356b
A
9319{
9320 vm_map_entry_t tmp_entry;
1c79356b 9321 vm_map_entry_t entry;
91447636 9322 vm_map_offset_t start;
1c79356b
A
9323
9324 if (map == VM_MAP_NULL)
9325 return(KERN_INVALID_ARGUMENT);
9326
9327 switch (flavor) {
91447636 9328
1c79356b 9329 case VM_REGION_BASIC_INFO:
2d21ac55 9330 /* legacy for old 32-bit objects info */
1c79356b 9331 {
2d21ac55 9332 vm_region_basic_info_t basic;
91447636 9333
2d21ac55
A
9334 if (*count < VM_REGION_BASIC_INFO_COUNT)
9335 return(KERN_INVALID_ARGUMENT);
1c79356b 9336
2d21ac55
A
9337 basic = (vm_region_basic_info_t) info;
9338 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 9339
2d21ac55 9340 vm_map_lock_read(map);
1c79356b 9341
2d21ac55
A
9342 start = *address;
9343 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9344 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9345 vm_map_unlock_read(map);
9346 return(KERN_INVALID_ADDRESS);
9347 }
9348 } else {
9349 entry = tmp_entry;
1c79356b 9350 }
1c79356b 9351
2d21ac55 9352 start = entry->vme_start;
1c79356b 9353
2d21ac55
A
9354 basic->offset = (uint32_t)entry->offset;
9355 basic->protection = entry->protection;
9356 basic->inheritance = entry->inheritance;
9357 basic->max_protection = entry->max_protection;
9358 basic->behavior = entry->behavior;
9359 basic->user_wired_count = entry->user_wired_count;
9360 basic->reserved = entry->is_sub_map;
9361 *address = start;
9362 *size = (entry->vme_end - start);
91447636 9363
2d21ac55
A
9364 if (object_name) *object_name = IP_NULL;
9365 if (entry->is_sub_map) {
9366 basic->shared = FALSE;
9367 } else {
9368 basic->shared = entry->is_shared;
9369 }
91447636 9370
2d21ac55
A
9371 vm_map_unlock_read(map);
9372 return(KERN_SUCCESS);
91447636
A
9373 }
9374
9375 case VM_REGION_BASIC_INFO_64:
9376 {
2d21ac55 9377 vm_region_basic_info_64_t basic;
91447636 9378
2d21ac55
A
9379 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9380 return(KERN_INVALID_ARGUMENT);
9381
9382 basic = (vm_region_basic_info_64_t) info;
9383 *count = VM_REGION_BASIC_INFO_COUNT_64;
9384
9385 vm_map_lock_read(map);
9386
9387 start = *address;
9388 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9389 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9390 vm_map_unlock_read(map);
9391 return(KERN_INVALID_ADDRESS);
9392 }
9393 } else {
9394 entry = tmp_entry;
9395 }
91447636 9396
2d21ac55 9397 start = entry->vme_start;
91447636 9398
2d21ac55
A
9399 basic->offset = entry->offset;
9400 basic->protection = entry->protection;
9401 basic->inheritance = entry->inheritance;
9402 basic->max_protection = entry->max_protection;
9403 basic->behavior = entry->behavior;
9404 basic->user_wired_count = entry->user_wired_count;
9405 basic->reserved = entry->is_sub_map;
9406 *address = start;
9407 *size = (entry->vme_end - start);
91447636 9408
2d21ac55
A
9409 if (object_name) *object_name = IP_NULL;
9410 if (entry->is_sub_map) {
9411 basic->shared = FALSE;
9412 } else {
9413 basic->shared = entry->is_shared;
91447636 9414 }
2d21ac55
A
9415
9416 vm_map_unlock_read(map);
9417 return(KERN_SUCCESS);
1c79356b
A
9418 }
9419 case VM_REGION_EXTENDED_INFO:
9420 {
2d21ac55 9421 vm_region_extended_info_t extended;
1c79356b 9422
2d21ac55
A
9423 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9424 return(KERN_INVALID_ARGUMENT);
1c79356b 9425
2d21ac55
A
9426 extended = (vm_region_extended_info_t) info;
9427 *count = VM_REGION_EXTENDED_INFO_COUNT;
1c79356b 9428
2d21ac55 9429 vm_map_lock_read(map);
1c79356b 9430
2d21ac55
A
9431 start = *address;
9432 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9433 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9434 vm_map_unlock_read(map);
9435 return(KERN_INVALID_ADDRESS);
9436 }
9437 } else {
9438 entry = tmp_entry;
1c79356b 9439 }
2d21ac55 9440 start = entry->vme_start;
1c79356b 9441
2d21ac55
A
9442 extended->protection = entry->protection;
9443 extended->user_tag = entry->alias;
9444 extended->pages_resident = 0;
9445 extended->pages_swapped_out = 0;
9446 extended->pages_shared_now_private = 0;
9447 extended->pages_dirtied = 0;
9448 extended->external_pager = 0;
9449 extended->shadow_depth = 0;
1c79356b 9450
2d21ac55 9451 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
1c79356b 9452
2d21ac55
A
9453 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9454 extended->share_mode = SM_PRIVATE;
1c79356b 9455
2d21ac55
A
9456 if (object_name)
9457 *object_name = IP_NULL;
9458 *address = start;
9459 *size = (entry->vme_end - start);
1c79356b 9460
2d21ac55
A
9461 vm_map_unlock_read(map);
9462 return(KERN_SUCCESS);
1c79356b
A
9463 }
9464 case VM_REGION_TOP_INFO:
9465 {
2d21ac55 9466 vm_region_top_info_t top;
1c79356b 9467
2d21ac55
A
9468 if (*count < VM_REGION_TOP_INFO_COUNT)
9469 return(KERN_INVALID_ARGUMENT);
1c79356b 9470
2d21ac55
A
9471 top = (vm_region_top_info_t) info;
9472 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 9473
2d21ac55 9474 vm_map_lock_read(map);
1c79356b 9475
2d21ac55
A
9476 start = *address;
9477 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9478 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9479 vm_map_unlock_read(map);
9480 return(KERN_INVALID_ADDRESS);
9481 }
9482 } else {
9483 entry = tmp_entry;
1c79356b 9484
2d21ac55
A
9485 }
9486 start = entry->vme_start;
1c79356b 9487
2d21ac55
A
9488 top->private_pages_resident = 0;
9489 top->shared_pages_resident = 0;
1c79356b 9490
2d21ac55 9491 vm_map_region_top_walk(entry, top);
1c79356b 9492
2d21ac55
A
9493 if (object_name)
9494 *object_name = IP_NULL;
9495 *address = start;
9496 *size = (entry->vme_end - start);
1c79356b 9497
2d21ac55
A
9498 vm_map_unlock_read(map);
9499 return(KERN_SUCCESS);
1c79356b
A
9500 }
9501 default:
2d21ac55 9502 return(KERN_INVALID_ARGUMENT);
1c79356b
A
9503 }
9504}
9505
b0d623f7
A
9506#define OBJ_RESIDENT_COUNT(obj, entry_size) \
9507 MIN((entry_size), \
9508 ((obj)->all_reusable ? \
9509 (obj)->wired_page_count : \
9510 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 9511
0c530ab8 9512void
91447636
A
9513vm_map_region_top_walk(
9514 vm_map_entry_t entry,
9515 vm_region_top_info_t top)
1c79356b 9516{
1c79356b 9517
91447636 9518 if (entry->object.vm_object == 0 || entry->is_sub_map) {
2d21ac55
A
9519 top->share_mode = SM_EMPTY;
9520 top->ref_count = 0;
9521 top->obj_id = 0;
9522 return;
1c79356b 9523 }
2d21ac55 9524
91447636 9525 {
2d21ac55
A
9526 struct vm_object *obj, *tmp_obj;
9527 int ref_count;
9528 uint32_t entry_size;
1c79356b 9529
b0d623f7 9530 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 9531
2d21ac55 9532 obj = entry->object.vm_object;
1c79356b 9533
2d21ac55
A
9534 vm_object_lock(obj);
9535
9536 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9537 ref_count--;
9538
b0d623f7 9539 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
9540 if (obj->shadow) {
9541 if (ref_count == 1)
b0d623f7
A
9542 top->private_pages_resident =
9543 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 9544 else
b0d623f7
A
9545 top->shared_pages_resident =
9546 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
9547 top->ref_count = ref_count;
9548 top->share_mode = SM_COW;
91447636 9549
2d21ac55
A
9550 while ((tmp_obj = obj->shadow)) {
9551 vm_object_lock(tmp_obj);
9552 vm_object_unlock(obj);
9553 obj = tmp_obj;
1c79356b 9554
2d21ac55
A
9555 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9556 ref_count--;
1c79356b 9557
b0d623f7
A
9558 assert(obj->reusable_page_count <= obj->resident_page_count);
9559 top->shared_pages_resident +=
9560 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
9561 top->ref_count += ref_count - 1;
9562 }
1c79356b 9563 } else {
6d2010ae
A
9564 if (entry->superpage_size) {
9565 top->share_mode = SM_LARGE_PAGE;
9566 top->shared_pages_resident = 0;
9567 top->private_pages_resident = entry_size;
9568 } else if (entry->needs_copy) {
2d21ac55 9569 top->share_mode = SM_COW;
b0d623f7
A
9570 top->shared_pages_resident =
9571 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
9572 } else {
9573 if (ref_count == 1 ||
9574 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9575 top->share_mode = SM_PRIVATE;
b0d623f7
A
9576 top->private_pages_resident =
9577 OBJ_RESIDENT_COUNT(obj,
9578 entry_size);
2d21ac55
A
9579 } else {
9580 top->share_mode = SM_SHARED;
b0d623f7
A
9581 top->shared_pages_resident =
9582 OBJ_RESIDENT_COUNT(obj,
9583 entry_size);
2d21ac55
A
9584 }
9585 }
9586 top->ref_count = ref_count;
1c79356b 9587 }
b0d623f7
A
9588 /* XXX K64: obj_id will be truncated */
9589 top->obj_id = (unsigned int) (uintptr_t)obj;
1c79356b 9590
2d21ac55 9591 vm_object_unlock(obj);
1c79356b 9592 }
91447636
A
9593}
9594
0c530ab8 9595void
91447636
A
9596vm_map_region_walk(
9597 vm_map_t map,
2d21ac55
A
9598 vm_map_offset_t va,
9599 vm_map_entry_t entry,
91447636
A
9600 vm_object_offset_t offset,
9601 vm_object_size_t range,
2d21ac55
A
9602 vm_region_extended_info_t extended,
9603 boolean_t look_for_pages)
91447636
A
9604{
9605 register struct vm_object *obj, *tmp_obj;
9606 register vm_map_offset_t last_offset;
9607 register int i;
9608 register int ref_count;
9609 struct vm_object *shadow_object;
9610 int shadow_depth;
9611
9612 if ((entry->object.vm_object == 0) ||
2d21ac55 9613 (entry->is_sub_map) ||
6d2010ae
A
9614 (entry->object.vm_object->phys_contiguous &&
9615 !entry->superpage_size)) {
2d21ac55
A
9616 extended->share_mode = SM_EMPTY;
9617 extended->ref_count = 0;
9618 return;
1c79356b 9619 }
6d2010ae
A
9620
9621 if (entry->superpage_size) {
9622 extended->shadow_depth = 0;
9623 extended->share_mode = SM_LARGE_PAGE;
9624 extended->ref_count = 1;
9625 extended->external_pager = 0;
9626 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9627 extended->shadow_depth = 0;
9628 return;
9629 }
9630
91447636 9631 {
2d21ac55
A
9632 obj = entry->object.vm_object;
9633
9634 vm_object_lock(obj);
9635
9636 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9637 ref_count--;
9638
9639 if (look_for_pages) {
9640 for (last_offset = offset + range;
9641 offset < last_offset;
9642 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9643 vm_map_region_look_for_page(map, va, obj,
9644 offset, ref_count,
9645 0, extended);
b0d623f7
A
9646 } else {
9647 shadow_object = obj->shadow;
9648 shadow_depth = 0;
9649
9650 if ( !(obj->pager_trusted) && !(obj->internal))
9651 extended->external_pager = 1;
9652
9653 if (shadow_object != VM_OBJECT_NULL) {
9654 vm_object_lock(shadow_object);
9655 for (;
9656 shadow_object != VM_OBJECT_NULL;
9657 shadow_depth++) {
9658 vm_object_t next_shadow;
9659
9660 if ( !(shadow_object->pager_trusted) &&
9661 !(shadow_object->internal))
9662 extended->external_pager = 1;
9663
9664 next_shadow = shadow_object->shadow;
9665 if (next_shadow) {
9666 vm_object_lock(next_shadow);
9667 }
9668 vm_object_unlock(shadow_object);
9669 shadow_object = next_shadow;
2d21ac55 9670 }
2d21ac55 9671 }
b0d623f7 9672 extended->shadow_depth = shadow_depth;
2d21ac55 9673 }
2d21ac55
A
9674
9675 if (extended->shadow_depth || entry->needs_copy)
9676 extended->share_mode = SM_COW;
91447636 9677 else {
2d21ac55
A
9678 if (ref_count == 1)
9679 extended->share_mode = SM_PRIVATE;
9680 else {
9681 if (obj->true_share)
9682 extended->share_mode = SM_TRUESHARED;
9683 else
9684 extended->share_mode = SM_SHARED;
9685 }
91447636 9686 }
2d21ac55 9687 extended->ref_count = ref_count - extended->shadow_depth;
91447636 9688
2d21ac55
A
9689 for (i = 0; i < extended->shadow_depth; i++) {
9690 if ((tmp_obj = obj->shadow) == 0)
9691 break;
9692 vm_object_lock(tmp_obj);
9693 vm_object_unlock(obj);
1c79356b 9694
2d21ac55
A
9695 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9696 ref_count--;
1c79356b 9697
2d21ac55
A
9698 extended->ref_count += ref_count;
9699 obj = tmp_obj;
9700 }
9701 vm_object_unlock(obj);
1c79356b 9702
2d21ac55
A
9703 if (extended->share_mode == SM_SHARED) {
9704 register vm_map_entry_t cur;
9705 register vm_map_entry_t last;
9706 int my_refs;
91447636 9707
2d21ac55
A
9708 obj = entry->object.vm_object;
9709 last = vm_map_to_entry(map);
9710 my_refs = 0;
91447636 9711
2d21ac55
A
9712 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9713 ref_count--;
9714 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9715 my_refs += vm_map_region_count_obj_refs(cur, obj);
91447636 9716
2d21ac55
A
9717 if (my_refs == ref_count)
9718 extended->share_mode = SM_PRIVATE_ALIASED;
9719 else if (my_refs > 1)
9720 extended->share_mode = SM_SHARED_ALIASED;
9721 }
91447636 9722 }
1c79356b
A
9723}
9724
1c79356b 9725
91447636
A
9726/* object is locked on entry and locked on return */
9727
9728
9729static void
9730vm_map_region_look_for_page(
9731 __unused vm_map_t map,
2d21ac55
A
9732 __unused vm_map_offset_t va,
9733 vm_object_t object,
9734 vm_object_offset_t offset,
91447636
A
9735 int max_refcnt,
9736 int depth,
9737 vm_region_extended_info_t extended)
1c79356b 9738{
2d21ac55
A
9739 register vm_page_t p;
9740 register vm_object_t shadow;
9741 register int ref_count;
9742 vm_object_t caller_object;
9743#if MACH_PAGEMAP
9744 kern_return_t kr;
9745#endif
91447636
A
9746 shadow = object->shadow;
9747 caller_object = object;
1c79356b 9748
91447636
A
9749
9750 while (TRUE) {
1c79356b 9751
91447636 9752 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 9753 extended->external_pager = 1;
1c79356b 9754
91447636
A
9755 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9756 if (shadow && (max_refcnt == 1))
9757 extended->pages_shared_now_private++;
1c79356b 9758
91447636
A
9759 if (!p->fictitious &&
9760 (p->dirty || pmap_is_modified(p->phys_page)))
9761 extended->pages_dirtied++;
1c79356b 9762
91447636
A
9763 extended->pages_resident++;
9764
9765 if(object != caller_object)
2d21ac55 9766 vm_object_unlock(object);
91447636
A
9767
9768 return;
1c79356b 9769 }
2d21ac55 9770#if MACH_PAGEMAP
91447636
A
9771 if (object->existence_map) {
9772 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
1c79356b 9773
91447636 9774 extended->pages_swapped_out++;
1c79356b 9775
91447636 9776 if(object != caller_object)
2d21ac55 9777 vm_object_unlock(object);
1c79356b 9778
91447636
A
9779 return;
9780 }
2d21ac55
A
9781 } else if (object->internal &&
9782 object->alive &&
9783 !object->terminating &&
9784 object->pager_ready) {
9785
9786 memory_object_t pager;
9787
9788 vm_object_paging_begin(object);
9789 pager = object->pager;
9790 vm_object_unlock(object);
9791
9792 kr = memory_object_data_request(
9793 pager,
9794 offset + object->paging_offset,
9795 0, /* just poke the pager */
9796 VM_PROT_READ,
9797 NULL);
9798
9799 vm_object_lock(object);
9800 vm_object_paging_end(object);
9801
9802 if (kr == KERN_SUCCESS) {
9803 /* the pager has that page */
9804 extended->pages_swapped_out++;
9805 if (object != caller_object)
9806 vm_object_unlock(object);
9807 return;
9808 }
1c79356b 9809 }
2d21ac55
A
9810#endif /* MACH_PAGEMAP */
9811
91447636 9812 if (shadow) {
2d21ac55 9813 vm_object_lock(shadow);
1c79356b 9814
91447636
A
9815 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9816 ref_count--;
1c79356b 9817
91447636
A
9818 if (++depth > extended->shadow_depth)
9819 extended->shadow_depth = depth;
1c79356b 9820
91447636
A
9821 if (ref_count > max_refcnt)
9822 max_refcnt = ref_count;
9823
9824 if(object != caller_object)
2d21ac55 9825 vm_object_unlock(object);
91447636 9826
6d2010ae 9827 offset = offset + object->vo_shadow_offset;
91447636
A
9828 object = shadow;
9829 shadow = object->shadow;
9830 continue;
1c79356b 9831 }
91447636 9832 if(object != caller_object)
2d21ac55 9833 vm_object_unlock(object);
91447636
A
9834 break;
9835 }
9836}
1c79356b 9837
91447636
A
9838static int
9839vm_map_region_count_obj_refs(
9840 vm_map_entry_t entry,
9841 vm_object_t object)
9842{
9843 register int ref_count;
9844 register vm_object_t chk_obj;
9845 register vm_object_t tmp_obj;
1c79356b 9846
91447636 9847 if (entry->object.vm_object == 0)
2d21ac55 9848 return(0);
1c79356b 9849
91447636 9850 if (entry->is_sub_map)
2d21ac55 9851 return(0);
91447636 9852 else {
2d21ac55 9853 ref_count = 0;
1c79356b 9854
2d21ac55
A
9855 chk_obj = entry->object.vm_object;
9856 vm_object_lock(chk_obj);
1c79356b 9857
2d21ac55
A
9858 while (chk_obj) {
9859 if (chk_obj == object)
9860 ref_count++;
9861 tmp_obj = chk_obj->shadow;
9862 if (tmp_obj)
9863 vm_object_lock(tmp_obj);
9864 vm_object_unlock(chk_obj);
1c79356b 9865
2d21ac55
A
9866 chk_obj = tmp_obj;
9867 }
1c79356b 9868 }
91447636 9869 return(ref_count);
1c79356b
A
9870}
9871
9872
9873/*
91447636
A
9874 * Routine: vm_map_simplify
9875 *
9876 * Description:
9877 * Attempt to simplify the map representation in
9878 * the vicinity of the given starting address.
9879 * Note:
9880 * This routine is intended primarily to keep the
9881 * kernel maps more compact -- they generally don't
9882 * benefit from the "expand a map entry" technology
9883 * at allocation time because the adjacent entry
9884 * is often wired down.
1c79356b 9885 */
91447636
A
9886void
9887vm_map_simplify_entry(
9888 vm_map_t map,
9889 vm_map_entry_t this_entry)
1c79356b 9890{
91447636 9891 vm_map_entry_t prev_entry;
1c79356b 9892
91447636 9893 counter(c_vm_map_simplify_entry_called++);
1c79356b 9894
91447636 9895 prev_entry = this_entry->vme_prev;
1c79356b 9896
91447636 9897 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 9898 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 9899
91447636 9900 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 9901
2d21ac55 9902 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
1c79356b 9903
91447636
A
9904 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9905 ((prev_entry->offset + (prev_entry->vme_end -
9906 prev_entry->vme_start))
9907 == this_entry->offset) &&
1c79356b 9908
91447636
A
9909 (prev_entry->inheritance == this_entry->inheritance) &&
9910 (prev_entry->protection == this_entry->protection) &&
9911 (prev_entry->max_protection == this_entry->max_protection) &&
9912 (prev_entry->behavior == this_entry->behavior) &&
9913 (prev_entry->alias == this_entry->alias) &&
b0d623f7 9914 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
2d21ac55 9915 (prev_entry->no_cache == this_entry->no_cache) &&
91447636
A
9916 (prev_entry->wired_count == this_entry->wired_count) &&
9917 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 9918
91447636 9919 (prev_entry->needs_copy == this_entry->needs_copy) &&
b0d623f7 9920 (prev_entry->permanent == this_entry->permanent) &&
1c79356b 9921
91447636
A
9922 (prev_entry->use_pmap == FALSE) &&
9923 (this_entry->use_pmap == FALSE) &&
9924 (prev_entry->in_transition == FALSE) &&
9925 (this_entry->in_transition == FALSE) &&
9926 (prev_entry->needs_wakeup == FALSE) &&
9927 (this_entry->needs_wakeup == FALSE) &&
9928 (prev_entry->is_shared == FALSE) &&
9929 (this_entry->is_shared == FALSE)
2d21ac55 9930 ) {
6d2010ae 9931 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
91447636
A
9932 this_entry->vme_start = prev_entry->vme_start;
9933 this_entry->offset = prev_entry->offset;
2d21ac55
A
9934 if (prev_entry->is_sub_map) {
9935 vm_map_deallocate(prev_entry->object.sub_map);
9936 } else {
9937 vm_object_deallocate(prev_entry->object.vm_object);
9938 }
91447636 9939 vm_map_entry_dispose(map, prev_entry);
0c530ab8 9940 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 9941 counter(c_vm_map_simplified++);
1c79356b 9942 }
91447636 9943}
1c79356b 9944
91447636
A
9945void
9946vm_map_simplify(
9947 vm_map_t map,
9948 vm_map_offset_t start)
9949{
9950 vm_map_entry_t this_entry;
1c79356b 9951
91447636
A
9952 vm_map_lock(map);
9953 if (vm_map_lookup_entry(map, start, &this_entry)) {
9954 vm_map_simplify_entry(map, this_entry);
9955 vm_map_simplify_entry(map, this_entry->vme_next);
9956 }
9957 counter(c_vm_map_simplify_called++);
9958 vm_map_unlock(map);
9959}
1c79356b 9960
91447636
A
9961static void
9962vm_map_simplify_range(
9963 vm_map_t map,
9964 vm_map_offset_t start,
9965 vm_map_offset_t end)
9966{
9967 vm_map_entry_t entry;
1c79356b 9968
91447636
A
9969 /*
9970 * The map should be locked (for "write") by the caller.
9971 */
1c79356b 9972
91447636
A
9973 if (start >= end) {
9974 /* invalid address range */
9975 return;
9976 }
1c79356b 9977
2d21ac55
A
9978 start = vm_map_trunc_page(start);
9979 end = vm_map_round_page(end);
9980
91447636
A
9981 if (!vm_map_lookup_entry(map, start, &entry)) {
9982 /* "start" is not mapped and "entry" ends before "start" */
9983 if (entry == vm_map_to_entry(map)) {
9984 /* start with first entry in the map */
9985 entry = vm_map_first_entry(map);
9986 } else {
9987 /* start with next entry */
9988 entry = entry->vme_next;
9989 }
9990 }
9991
9992 while (entry != vm_map_to_entry(map) &&
9993 entry->vme_start <= end) {
9994 /* try and coalesce "entry" with its previous entry */
9995 vm_map_simplify_entry(map, entry);
9996 entry = entry->vme_next;
9997 }
9998}
1c79356b 9999
1c79356b 10000
91447636
A
10001/*
10002 * Routine: vm_map_machine_attribute
10003 * Purpose:
10004 * Provide machine-specific attributes to mappings,
10005 * such as cachability etc. for machines that provide
10006 * them. NUMA architectures and machines with big/strange
10007 * caches will use this.
10008 * Note:
10009 * Responsibilities for locking and checking are handled here,
10010 * everything else in the pmap module. If any non-volatile
10011 * information must be kept, the pmap module should handle
10012 * it itself. [This assumes that attributes do not
10013 * need to be inherited, which seems ok to me]
10014 */
10015kern_return_t
10016vm_map_machine_attribute(
10017 vm_map_t map,
10018 vm_map_offset_t start,
10019 vm_map_offset_t end,
10020 vm_machine_attribute_t attribute,
10021 vm_machine_attribute_val_t* value) /* IN/OUT */
10022{
10023 kern_return_t ret;
10024 vm_map_size_t sync_size;
10025 vm_map_entry_t entry;
10026
10027 if (start < vm_map_min(map) || end > vm_map_max(map))
10028 return KERN_INVALID_ADDRESS;
1c79356b 10029
91447636
A
10030 /* Figure how much memory we need to flush (in page increments) */
10031 sync_size = end - start;
1c79356b 10032
91447636
A
10033 vm_map_lock(map);
10034
10035 if (attribute != MATTR_CACHE) {
10036 /* If we don't have to find physical addresses, we */
10037 /* don't have to do an explicit traversal here. */
10038 ret = pmap_attribute(map->pmap, start, end-start,
10039 attribute, value);
10040 vm_map_unlock(map);
10041 return ret;
10042 }
1c79356b 10043
91447636 10044 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 10045
91447636
A
10046 while(sync_size) {
10047 if (vm_map_lookup_entry(map, start, &entry)) {
10048 vm_map_size_t sub_size;
10049 if((entry->vme_end - start) > sync_size) {
10050 sub_size = sync_size;
10051 sync_size = 0;
10052 } else {
10053 sub_size = entry->vme_end - start;
2d21ac55 10054 sync_size -= sub_size;
91447636
A
10055 }
10056 if(entry->is_sub_map) {
10057 vm_map_offset_t sub_start;
10058 vm_map_offset_t sub_end;
1c79356b 10059
91447636 10060 sub_start = (start - entry->vme_start)
2d21ac55 10061 + entry->offset;
91447636
A
10062 sub_end = sub_start + sub_size;
10063 vm_map_machine_attribute(
10064 entry->object.sub_map,
10065 sub_start,
10066 sub_end,
10067 attribute, value);
10068 } else {
10069 if(entry->object.vm_object) {
10070 vm_page_t m;
10071 vm_object_t object;
10072 vm_object_t base_object;
10073 vm_object_t last_object;
10074 vm_object_offset_t offset;
10075 vm_object_offset_t base_offset;
10076 vm_map_size_t range;
10077 range = sub_size;
10078 offset = (start - entry->vme_start)
2d21ac55 10079 + entry->offset;
91447636
A
10080 base_offset = offset;
10081 object = entry->object.vm_object;
10082 base_object = object;
10083 last_object = NULL;
1c79356b 10084
91447636 10085 vm_object_lock(object);
1c79356b 10086
91447636
A
10087 while (range) {
10088 m = vm_page_lookup(
10089 object, offset);
1c79356b 10090
91447636
A
10091 if (m && !m->fictitious) {
10092 ret =
2d21ac55
A
10093 pmap_attribute_cache_sync(
10094 m->phys_page,
10095 PAGE_SIZE,
10096 attribute, value);
91447636
A
10097
10098 } else if (object->shadow) {
6d2010ae 10099 offset = offset + object->vo_shadow_offset;
91447636
A
10100 last_object = object;
10101 object = object->shadow;
10102 vm_object_lock(last_object->shadow);
10103 vm_object_unlock(last_object);
10104 continue;
10105 }
10106 range -= PAGE_SIZE;
1c79356b 10107
91447636
A
10108 if (base_object != object) {
10109 vm_object_unlock(object);
10110 vm_object_lock(base_object);
10111 object = base_object;
10112 }
10113 /* Bump to the next page */
10114 base_offset += PAGE_SIZE;
10115 offset = base_offset;
10116 }
10117 vm_object_unlock(object);
10118 }
10119 }
10120 start += sub_size;
10121 } else {
10122 vm_map_unlock(map);
10123 return KERN_FAILURE;
10124 }
10125
1c79356b 10126 }
e5568f75 10127
91447636 10128 vm_map_unlock(map);
e5568f75 10129
91447636
A
10130 return ret;
10131}
e5568f75 10132
91447636
A
10133/*
10134 * vm_map_behavior_set:
10135 *
10136 * Sets the paging reference behavior of the specified address
10137 * range in the target map. Paging reference behavior affects
10138 * how pagein operations resulting from faults on the map will be
10139 * clustered.
10140 */
10141kern_return_t
10142vm_map_behavior_set(
10143 vm_map_t map,
10144 vm_map_offset_t start,
10145 vm_map_offset_t end,
10146 vm_behavior_t new_behavior)
10147{
10148 register vm_map_entry_t entry;
10149 vm_map_entry_t temp_entry;
e5568f75 10150
91447636 10151 XPR(XPR_VM_MAP,
2d21ac55 10152 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 10153 map, start, end, new_behavior, 0);
e5568f75 10154
6d2010ae
A
10155 if (start > end ||
10156 start < vm_map_min(map) ||
10157 end > vm_map_max(map)) {
10158 return KERN_NO_SPACE;
10159 }
10160
91447636 10161 switch (new_behavior) {
b0d623f7
A
10162
10163 /*
10164 * This first block of behaviors all set a persistent state on the specified
10165 * memory range. All we have to do here is to record the desired behavior
10166 * in the vm_map_entry_t's.
10167 */
10168
91447636
A
10169 case VM_BEHAVIOR_DEFAULT:
10170 case VM_BEHAVIOR_RANDOM:
10171 case VM_BEHAVIOR_SEQUENTIAL:
10172 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
10173 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10174 vm_map_lock(map);
10175
10176 /*
10177 * The entire address range must be valid for the map.
10178 * Note that vm_map_range_check() does a
10179 * vm_map_lookup_entry() internally and returns the
10180 * entry containing the start of the address range if
10181 * the entire range is valid.
10182 */
10183 if (vm_map_range_check(map, start, end, &temp_entry)) {
10184 entry = temp_entry;
10185 vm_map_clip_start(map, entry, start);
10186 }
10187 else {
10188 vm_map_unlock(map);
10189 return(KERN_INVALID_ADDRESS);
10190 }
10191
10192 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10193 vm_map_clip_end(map, entry, end);
10194 assert(!entry->use_pmap);
10195
10196 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10197 entry->zero_wired_pages = TRUE;
10198 } else {
10199 entry->behavior = new_behavior;
10200 }
10201 entry = entry->vme_next;
10202 }
10203
10204 vm_map_unlock(map);
91447636 10205 break;
b0d623f7
A
10206
10207 /*
10208 * The rest of these are different from the above in that they cause
10209 * an immediate action to take place as opposed to setting a behavior that
10210 * affects future actions.
10211 */
10212
91447636 10213 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
10214 return vm_map_willneed(map, start, end);
10215
91447636 10216 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
10217 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10218
10219 case VM_BEHAVIOR_FREE:
10220 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10221
10222 case VM_BEHAVIOR_REUSABLE:
10223 return vm_map_reusable_pages(map, start, end);
10224
10225 case VM_BEHAVIOR_REUSE:
10226 return vm_map_reuse_pages(map, start, end);
10227
10228 case VM_BEHAVIOR_CAN_REUSE:
10229 return vm_map_can_reuse(map, start, end);
10230
1c79356b 10231 default:
91447636 10232 return(KERN_INVALID_ARGUMENT);
1c79356b 10233 }
1c79356b 10234
b0d623f7
A
10235 return(KERN_SUCCESS);
10236}
10237
10238
10239/*
10240 * Internals for madvise(MADV_WILLNEED) system call.
10241 *
10242 * The present implementation is to do a read-ahead if the mapping corresponds
10243 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10244 * and basically ignore the "advice" (which we are always free to do).
10245 */
10246
10247
10248static kern_return_t
10249vm_map_willneed(
10250 vm_map_t map,
10251 vm_map_offset_t start,
10252 vm_map_offset_t end
10253)
10254{
10255 vm_map_entry_t entry;
10256 vm_object_t object;
10257 memory_object_t pager;
10258 struct vm_object_fault_info fault_info;
10259 kern_return_t kr;
10260 vm_object_size_t len;
10261 vm_object_offset_t offset;
1c79356b 10262
91447636 10263 /*
b0d623f7
A
10264 * Fill in static values in fault_info. Several fields get ignored by the code
10265 * we call, but we'll fill them in anyway since uninitialized fields are bad
10266 * when it comes to future backwards compatibility.
91447636 10267 */
b0d623f7
A
10268
10269 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10270 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10271 fault_info.no_cache = FALSE; /* ignored value */
10272 fault_info.stealth = TRUE;
6d2010ae
A
10273 fault_info.io_sync = FALSE;
10274 fault_info.cs_bypass = FALSE;
0b4c1975 10275 fault_info.mark_zf_absent = FALSE;
b0d623f7
A
10276
10277 /*
10278 * The MADV_WILLNEED operation doesn't require any changes to the
10279 * vm_map_entry_t's, so the read lock is sufficient.
10280 */
10281
10282 vm_map_lock_read(map);
10283
10284 /*
10285 * The madvise semantics require that the address range be fully
10286 * allocated with no holes. Otherwise, we're required to return
10287 * an error.
10288 */
10289
6d2010ae
A
10290 if (! vm_map_range_check(map, start, end, &entry)) {
10291 vm_map_unlock_read(map);
10292 return KERN_INVALID_ADDRESS;
10293 }
b0d623f7 10294
6d2010ae
A
10295 /*
10296 * Examine each vm_map_entry_t in the range.
10297 */
10298 for (; entry != vm_map_to_entry(map) && start < end; ) {
10299
b0d623f7 10300 /*
6d2010ae
A
10301 * The first time through, the start address could be anywhere
10302 * within the vm_map_entry we found. So adjust the offset to
10303 * correspond. After that, the offset will always be zero to
10304 * correspond to the beginning of the current vm_map_entry.
b0d623f7 10305 */
6d2010ae 10306 offset = (start - entry->vme_start) + entry->offset;
b0d623f7 10307
6d2010ae
A
10308 /*
10309 * Set the length so we don't go beyond the end of the
10310 * map_entry or beyond the end of the range we were given.
10311 * This range could span also multiple map entries all of which
10312 * map different files, so make sure we only do the right amount
10313 * of I/O for each object. Note that it's possible for there
10314 * to be multiple map entries all referring to the same object
10315 * but with different page permissions, but it's not worth
10316 * trying to optimize that case.
10317 */
10318 len = MIN(entry->vme_end - start, end - start);
b0d623f7 10319
6d2010ae
A
10320 if ((vm_size_t) len != len) {
10321 /* 32-bit overflow */
10322 len = (vm_size_t) (0 - PAGE_SIZE);
10323 }
10324 fault_info.cluster_size = (vm_size_t) len;
10325 fault_info.lo_offset = offset;
10326 fault_info.hi_offset = offset + len;
10327 fault_info.user_tag = entry->alias;
b0d623f7 10328
6d2010ae
A
10329 /*
10330 * If there's no read permission to this mapping, then just
10331 * skip it.
10332 */
10333 if ((entry->protection & VM_PROT_READ) == 0) {
10334 entry = entry->vme_next;
10335 start = entry->vme_start;
10336 continue;
10337 }
b0d623f7 10338
6d2010ae
A
10339 /*
10340 * Find the file object backing this map entry. If there is
10341 * none, then we simply ignore the "will need" advice for this
10342 * entry and go on to the next one.
10343 */
10344 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10345 entry = entry->vme_next;
10346 start = entry->vme_start;
10347 continue;
10348 }
b0d623f7 10349
6d2010ae
A
10350 /*
10351 * The data_request() could take a long time, so let's
10352 * release the map lock to avoid blocking other threads.
10353 */
10354 vm_map_unlock_read(map);
b0d623f7 10355
6d2010ae
A
10356 vm_object_paging_begin(object);
10357 pager = object->pager;
10358 vm_object_unlock(object);
b0d623f7 10359
6d2010ae
A
10360 /*
10361 * Get the data from the object asynchronously.
10362 *
10363 * Note that memory_object_data_request() places limits on the
10364 * amount of I/O it will do. Regardless of the len we
10365 * specified, it won't do more than MAX_UPL_TRANSFER and it
10366 * silently truncates the len to that size. This isn't
10367 * necessarily bad since madvise shouldn't really be used to
10368 * page in unlimited amounts of data. Other Unix variants
10369 * limit the willneed case as well. If this turns out to be an
10370 * issue for developers, then we can always adjust the policy
10371 * here and still be backwards compatible since this is all
10372 * just "advice".
10373 */
10374 kr = memory_object_data_request(
10375 pager,
10376 offset + object->paging_offset,
10377 0, /* ignored */
10378 VM_PROT_READ,
10379 (memory_object_fault_info_t)&fault_info);
b0d623f7 10380
6d2010ae
A
10381 vm_object_lock(object);
10382 vm_object_paging_end(object);
10383 vm_object_unlock(object);
b0d623f7 10384
6d2010ae
A
10385 /*
10386 * If we couldn't do the I/O for some reason, just give up on
10387 * the madvise. We still return success to the user since
10388 * madvise isn't supposed to fail when the advice can't be
10389 * taken.
10390 */
10391 if (kr != KERN_SUCCESS) {
10392 return KERN_SUCCESS;
10393 }
b0d623f7 10394
6d2010ae
A
10395 start += len;
10396 if (start >= end) {
10397 /* done */
10398 return KERN_SUCCESS;
10399 }
b0d623f7 10400
6d2010ae
A
10401 /* look up next entry */
10402 vm_map_lock_read(map);
10403 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 10404 /*
6d2010ae 10405 * There's a new hole in the address range.
b0d623f7 10406 */
6d2010ae
A
10407 vm_map_unlock_read(map);
10408 return KERN_INVALID_ADDRESS;
b0d623f7 10409 }
6d2010ae 10410 }
b0d623f7
A
10411
10412 vm_map_unlock_read(map);
6d2010ae 10413 return KERN_SUCCESS;
b0d623f7
A
10414}
10415
10416static boolean_t
10417vm_map_entry_is_reusable(
10418 vm_map_entry_t entry)
10419{
10420 vm_object_t object;
10421
10422 if (entry->is_shared ||
10423 entry->is_sub_map ||
10424 entry->in_transition ||
10425 entry->protection != VM_PROT_DEFAULT ||
10426 entry->max_protection != VM_PROT_ALL ||
10427 entry->inheritance != VM_INHERIT_DEFAULT ||
10428 entry->no_cache ||
10429 entry->permanent ||
10430 entry->superpage_size != 0 ||
10431 entry->zero_wired_pages ||
10432 entry->wired_count != 0 ||
10433 entry->user_wired_count != 0) {
10434 return FALSE;
91447636 10435 }
b0d623f7
A
10436
10437 object = entry->object.vm_object;
10438 if (object == VM_OBJECT_NULL) {
10439 return TRUE;
10440 }
10441 if (object->ref_count == 1 &&
10442 object->wired_page_count == 0 &&
10443 object->copy == VM_OBJECT_NULL &&
10444 object->shadow == VM_OBJECT_NULL &&
10445 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10446 object->internal &&
10447 !object->true_share &&
6d2010ae 10448 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
10449 !object->code_signed) {
10450 return TRUE;
1c79356b 10451 }
b0d623f7
A
10452 return FALSE;
10453
10454
10455}
1c79356b 10456
b0d623f7
A
10457static kern_return_t
10458vm_map_reuse_pages(
10459 vm_map_t map,
10460 vm_map_offset_t start,
10461 vm_map_offset_t end)
10462{
10463 vm_map_entry_t entry;
10464 vm_object_t object;
10465 vm_object_offset_t start_offset, end_offset;
10466
10467 /*
10468 * The MADV_REUSE operation doesn't require any changes to the
10469 * vm_map_entry_t's, so the read lock is sufficient.
10470 */
0b4e3aa0 10471
b0d623f7 10472 vm_map_lock_read(map);
1c79356b 10473
b0d623f7
A
10474 /*
10475 * The madvise semantics require that the address range be fully
10476 * allocated with no holes. Otherwise, we're required to return
10477 * an error.
10478 */
10479
10480 if (!vm_map_range_check(map, start, end, &entry)) {
10481 vm_map_unlock_read(map);
10482 vm_page_stats_reusable.reuse_pages_failure++;
10483 return KERN_INVALID_ADDRESS;
1c79356b 10484 }
91447636 10485
b0d623f7
A
10486 /*
10487 * Examine each vm_map_entry_t in the range.
10488 */
10489 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10490 entry = entry->vme_next) {
10491 /*
10492 * Sanity check on the VM map entry.
10493 */
10494 if (! vm_map_entry_is_reusable(entry)) {
10495 vm_map_unlock_read(map);
10496 vm_page_stats_reusable.reuse_pages_failure++;
10497 return KERN_INVALID_ADDRESS;
10498 }
10499
10500 /*
10501 * The first time through, the start address could be anywhere
10502 * within the vm_map_entry we found. So adjust the offset to
10503 * correspond.
10504 */
10505 if (entry->vme_start < start) {
10506 start_offset = start - entry->vme_start;
10507 } else {
10508 start_offset = 0;
10509 }
10510 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10511 start_offset += entry->offset;
10512 end_offset += entry->offset;
10513
10514 object = entry->object.vm_object;
10515 if (object != VM_OBJECT_NULL) {
10516 vm_object_lock(object);
10517 vm_object_reuse_pages(object, start_offset, end_offset,
10518 TRUE);
10519 vm_object_unlock(object);
10520 }
10521
10522 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10523 /*
10524 * XXX
10525 * We do not hold the VM map exclusively here.
10526 * The "alias" field is not that critical, so it's
10527 * safe to update it here, as long as it is the only
10528 * one that can be modified while holding the VM map
10529 * "shared".
10530 */
10531 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10532 }
10533 }
10534
10535 vm_map_unlock_read(map);
10536 vm_page_stats_reusable.reuse_pages_success++;
10537 return KERN_SUCCESS;
1c79356b
A
10538}
10539
1c79356b 10540
b0d623f7
A
10541static kern_return_t
10542vm_map_reusable_pages(
10543 vm_map_t map,
10544 vm_map_offset_t start,
10545 vm_map_offset_t end)
10546{
10547 vm_map_entry_t entry;
10548 vm_object_t object;
10549 vm_object_offset_t start_offset, end_offset;
10550
10551 /*
10552 * The MADV_REUSABLE operation doesn't require any changes to the
10553 * vm_map_entry_t's, so the read lock is sufficient.
10554 */
10555
10556 vm_map_lock_read(map);
10557
10558 /*
10559 * The madvise semantics require that the address range be fully
10560 * allocated with no holes. Otherwise, we're required to return
10561 * an error.
10562 */
10563
10564 if (!vm_map_range_check(map, start, end, &entry)) {
10565 vm_map_unlock_read(map);
10566 vm_page_stats_reusable.reusable_pages_failure++;
10567 return KERN_INVALID_ADDRESS;
10568 }
10569
10570 /*
10571 * Examine each vm_map_entry_t in the range.
10572 */
10573 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10574 entry = entry->vme_next) {
10575 int kill_pages = 0;
10576
10577 /*
10578 * Sanity check on the VM map entry.
10579 */
10580 if (! vm_map_entry_is_reusable(entry)) {
10581 vm_map_unlock_read(map);
10582 vm_page_stats_reusable.reusable_pages_failure++;
10583 return KERN_INVALID_ADDRESS;
10584 }
10585
10586 /*
10587 * The first time through, the start address could be anywhere
10588 * within the vm_map_entry we found. So adjust the offset to
10589 * correspond.
10590 */
10591 if (entry->vme_start < start) {
10592 start_offset = start - entry->vme_start;
10593 } else {
10594 start_offset = 0;
10595 }
10596 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10597 start_offset += entry->offset;
10598 end_offset += entry->offset;
10599
10600 object = entry->object.vm_object;
10601 if (object == VM_OBJECT_NULL)
10602 continue;
10603
10604
10605 vm_object_lock(object);
10606 if (object->ref_count == 1 && !object->shadow)
10607 kill_pages = 1;
10608 else
10609 kill_pages = -1;
10610 if (kill_pages != -1) {
10611 vm_object_deactivate_pages(object,
10612 start_offset,
10613 end_offset - start_offset,
10614 kill_pages,
10615 TRUE /*reusable_pages*/);
10616 } else {
10617 vm_page_stats_reusable.reusable_pages_shared++;
10618 }
10619 vm_object_unlock(object);
10620
10621 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10622 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10623 /*
10624 * XXX
10625 * We do not hold the VM map exclusively here.
10626 * The "alias" field is not that critical, so it's
10627 * safe to update it here, as long as it is the only
10628 * one that can be modified while holding the VM map
10629 * "shared".
10630 */
10631 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10632 }
10633 }
10634
10635 vm_map_unlock_read(map);
10636 vm_page_stats_reusable.reusable_pages_success++;
10637 return KERN_SUCCESS;
10638}
10639
10640
10641static kern_return_t
10642vm_map_can_reuse(
10643 vm_map_t map,
10644 vm_map_offset_t start,
10645 vm_map_offset_t end)
10646{
10647 vm_map_entry_t entry;
10648
10649 /*
10650 * The MADV_REUSABLE operation doesn't require any changes to the
10651 * vm_map_entry_t's, so the read lock is sufficient.
10652 */
10653
10654 vm_map_lock_read(map);
10655
10656 /*
10657 * The madvise semantics require that the address range be fully
10658 * allocated with no holes. Otherwise, we're required to return
10659 * an error.
10660 */
10661
10662 if (!vm_map_range_check(map, start, end, &entry)) {
10663 vm_map_unlock_read(map);
10664 vm_page_stats_reusable.can_reuse_failure++;
10665 return KERN_INVALID_ADDRESS;
10666 }
10667
10668 /*
10669 * Examine each vm_map_entry_t in the range.
10670 */
10671 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10672 entry = entry->vme_next) {
10673 /*
10674 * Sanity check on the VM map entry.
10675 */
10676 if (! vm_map_entry_is_reusable(entry)) {
10677 vm_map_unlock_read(map);
10678 vm_page_stats_reusable.can_reuse_failure++;
10679 return KERN_INVALID_ADDRESS;
10680 }
10681 }
10682
10683 vm_map_unlock_read(map);
10684 vm_page_stats_reusable.can_reuse_success++;
10685 return KERN_SUCCESS;
10686}
10687
10688
10689
91447636
A
10690#include <mach_kdb.h>
10691#if MACH_KDB
10692#include <ddb/db_output.h>
10693#include <vm/vm_print.h>
1c79356b 10694
91447636 10695#define printf db_printf
1c79356b 10696
91447636
A
10697/*
10698 * Forward declarations for internal functions.
10699 */
10700extern void vm_map_links_print(
2d21ac55 10701 struct vm_map_links *links);
0b4e3aa0 10702
91447636 10703extern void vm_map_header_print(
2d21ac55 10704 struct vm_map_header *header);
1c79356b 10705
91447636 10706extern void vm_map_entry_print(
2d21ac55 10707 vm_map_entry_t entry);
0b4e3aa0 10708
91447636 10709extern void vm_follow_entry(
2d21ac55 10710 vm_map_entry_t entry);
0b4e3aa0 10711
91447636 10712extern void vm_follow_map(
2d21ac55 10713 vm_map_t map);
1c79356b 10714
91447636
A
10715/*
10716 * vm_map_links_print: [ debug ]
10717 */
10718void
10719vm_map_links_print(
10720 struct vm_map_links *links)
10721{
10722 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10723 links->prev,
10724 links->next,
10725 (unsigned long long)links->start,
10726 (unsigned long long)links->end);
10727}
1c79356b 10728
91447636
A
10729/*
10730 * vm_map_header_print: [ debug ]
10731 */
10732void
10733vm_map_header_print(
10734 struct vm_map_header *header)
10735{
10736 vm_map_links_print(&header->links);
10737 iprintf("nentries = %08X, %sentries_pageable\n",
10738 header->nentries,
10739 (header->entries_pageable ? "" : "!"));
10740}
1c79356b 10741
91447636
A
10742/*
10743 * vm_follow_entry: [ debug ]
10744 */
10745void
10746vm_follow_entry(
10747 vm_map_entry_t entry)
10748{
10749 int shadows;
1c79356b 10750
91447636 10751 iprintf("map entry %08X\n", entry);
1c79356b 10752
91447636 10753 db_indent += 2;
1c79356b 10754
91447636
A
10755 shadows = vm_follow_object(entry->object.vm_object);
10756 iprintf("Total objects : %d\n",shadows);
0b4e3aa0 10757
91447636
A
10758 db_indent -= 2;
10759}
1c79356b 10760
91447636
A
10761/*
10762 * vm_map_entry_print: [ debug ]
10763 */
1c79356b 10764void
91447636
A
10765vm_map_entry_print(
10766 register vm_map_entry_t entry)
1c79356b 10767{
91447636
A
10768 static const char *inheritance_name[4] =
10769 { "share", "copy", "none", "?"};
10770 static const char *behavior_name[4] =
10771 { "dflt", "rand", "seqtl", "rseqntl" };
0b4e3aa0 10772
91447636 10773 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
0b4e3aa0 10774
91447636 10775 db_indent += 2;
0b4e3aa0 10776
91447636 10777 vm_map_links_print(&entry->links);
0b4e3aa0 10778
91447636
A
10779 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10780 (unsigned long long)entry->vme_start,
10781 (unsigned long long)entry->vme_end,
10782 entry->protection,
10783 entry->max_protection,
10784 inheritance_name[(entry->inheritance & 0x3)]);
0b4e3aa0 10785
91447636
A
10786 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10787 behavior_name[(entry->behavior & 0x3)],
10788 entry->wired_count,
10789 entry->user_wired_count);
10790 iprintf("%sin_transition, %sneeds_wakeup\n",
10791 (entry->in_transition ? "" : "!"),
10792 (entry->needs_wakeup ? "" : "!"));
0b4e3aa0 10793
91447636
A
10794 if (entry->is_sub_map) {
10795 iprintf("submap = %08X - offset = %016llX\n",
2d21ac55
A
10796 entry->object.sub_map,
10797 (unsigned long long)entry->offset);
91447636
A
10798 } else {
10799 iprintf("object = %08X offset = %016llX - ",
10800 entry->object.vm_object,
10801 (unsigned long long)entry->offset);
10802 printf("%sis_shared, %sneeds_copy\n",
10803 (entry->is_shared ? "" : "!"),
10804 (entry->needs_copy ? "" : "!"));
1c79356b 10805 }
1c79356b 10806
91447636
A
10807 db_indent -= 2;
10808}
1c79356b 10809
91447636
A
10810/*
10811 * vm_follow_map: [ debug ]
10812 */
10813void
10814vm_follow_map(
10815 vm_map_t map)
1c79356b 10816{
91447636 10817 register vm_map_entry_t entry;
1c79356b 10818
91447636 10819 iprintf("task map %08X\n", map);
1c79356b 10820
91447636 10821 db_indent += 2;
55e303ae 10822
91447636
A
10823 for (entry = vm_map_first_entry(map);
10824 entry && entry != vm_map_to_entry(map);
10825 entry = entry->vme_next) {
2d21ac55 10826 vm_follow_entry(entry);
1c79356b 10827 }
1c79356b 10828
91447636
A
10829 db_indent -= 2;
10830}
1c79356b
A
10831
10832/*
91447636 10833 * vm_map_print: [ debug ]
1c79356b 10834 */
5353443c 10835void
91447636
A
10836vm_map_print(
10837 db_addr_t inmap)
5353443c 10838{
91447636
A
10839 register vm_map_entry_t entry;
10840 vm_map_t map;
10841#if TASK_SWAPPER
10842 char *swstate;
10843#endif /* TASK_SWAPPER */
5353443c 10844
91447636
A
10845 map = (vm_map_t)(long)
10846 inmap; /* Make sure we have the right type */
5353443c 10847
91447636 10848 iprintf("task map %08X\n", map);
5353443c 10849
91447636 10850 db_indent += 2;
5353443c 10851
91447636 10852 vm_map_header_print(&map->hdr);
5353443c 10853
91447636
A
10854 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
10855 map->pmap,
10856 map->size,
10857 map->ref_count,
10858 map->hint,
10859 map->first_free);
1c79356b 10860
91447636
A
10861 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10862 (map->wait_for_space ? "" : "!"),
10863 (map->wiring_required ? "" : "!"),
10864 map->timestamp);
10865
10866#if TASK_SWAPPER
10867 switch (map->sw_state) {
2d21ac55 10868 case MAP_SW_IN:
91447636
A
10869 swstate = "SW_IN";
10870 break;
2d21ac55 10871 case MAP_SW_OUT:
91447636
A
10872 swstate = "SW_OUT";
10873 break;
2d21ac55 10874 default:
91447636
A
10875 swstate = "????";
10876 break;
1c79356b 10877 }
91447636
A
10878 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10879#endif /* TASK_SWAPPER */
10880
10881 for (entry = vm_map_first_entry(map);
10882 entry && entry != vm_map_to_entry(map);
10883 entry = entry->vme_next) {
10884 vm_map_entry_print(entry);
10885 }
10886
10887 db_indent -= 2;
1c79356b
A
10888}
10889
1c79356b 10890/*
91447636 10891 * Routine: vm_map_copy_print
1c79356b 10892 * Purpose:
91447636 10893 * Pretty-print a copy object for ddb.
1c79356b 10894 */
91447636
A
10895
10896void
10897vm_map_copy_print(
10898 db_addr_t incopy)
1c79356b 10899{
91447636 10900 vm_map_copy_t copy;
9bccf70c 10901 vm_map_entry_t entry;
1c79356b 10902
91447636
A
10903 copy = (vm_map_copy_t)(long)
10904 incopy; /* Make sure we have the right type */
1c79356b 10905
91447636 10906 printf("copy object 0x%x\n", copy);
9bccf70c 10907
91447636 10908 db_indent += 2;
9bccf70c 10909
91447636
A
10910 iprintf("type=%d", copy->type);
10911 switch (copy->type) {
2d21ac55 10912 case VM_MAP_COPY_ENTRY_LIST:
91447636
A
10913 printf("[entry_list]");
10914 break;
9bccf70c 10915
2d21ac55 10916 case VM_MAP_COPY_OBJECT:
91447636 10917 printf("[object]");
1c79356b 10918 break;
91447636 10919
2d21ac55 10920 case VM_MAP_COPY_KERNEL_BUFFER:
91447636 10921 printf("[kernel_buffer]");
9bccf70c 10922 break;
1c79356b 10923
2d21ac55 10924 default:
91447636
A
10925 printf("[bad type]");
10926 break;
1c79356b 10927 }
91447636
A
10928 printf(", offset=0x%llx", (unsigned long long)copy->offset);
10929 printf(", size=0x%x\n", copy->size);
1c79356b 10930
91447636 10931 switch (copy->type) {
2d21ac55 10932 case VM_MAP_COPY_ENTRY_LIST:
91447636
A
10933 vm_map_header_print(&copy->cpy_hdr);
10934 for (entry = vm_map_copy_first_entry(copy);
10935 entry && entry != vm_map_copy_to_entry(copy);
10936 entry = entry->vme_next) {
10937 vm_map_entry_print(entry);
10938 }
10939 break;
1c79356b 10940
2d21ac55 10941 case VM_MAP_COPY_OBJECT:
91447636
A
10942 iprintf("object=0x%x\n", copy->cpy_object);
10943 break;
10944
2d21ac55 10945 case VM_MAP_COPY_KERNEL_BUFFER:
91447636
A
10946 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
10947 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
10948 break;
1c79356b 10949
1c79356b
A
10950 }
10951
91447636 10952 db_indent -=2;
1c79356b
A
10953}
10954
1c79356b 10955/*
91447636
A
10956 * db_vm_map_total_size(map) [ debug ]
10957 *
10958 * return the total virtual size (in bytes) of the map
1c79356b 10959 */
91447636
A
10960vm_map_size_t
10961db_vm_map_total_size(
10962 db_addr_t inmap)
10963{
10964 vm_map_entry_t entry;
10965 vm_map_size_t total;
10966 vm_map_t map;
1c79356b 10967
91447636
A
10968 map = (vm_map_t)(long)
10969 inmap; /* Make sure we have the right type */
1c79356b 10970
91447636
A
10971 total = 0;
10972 for (entry = vm_map_first_entry(map);
10973 entry != vm_map_to_entry(map);
10974 entry = entry->vme_next) {
10975 total += entry->vme_end - entry->vme_start;
10976 }
1c79356b 10977
91447636
A
10978 return total;
10979}
1c79356b 10980
91447636 10981#endif /* MACH_KDB */
1c79356b
A
10982
10983/*
91447636
A
10984 * Routine: vm_map_entry_insert
10985 *
10986 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 10987 */
91447636
A
10988vm_map_entry_t
10989vm_map_entry_insert(
10990 vm_map_t map,
10991 vm_map_entry_t insp_entry,
10992 vm_map_offset_t start,
10993 vm_map_offset_t end,
10994 vm_object_t object,
10995 vm_object_offset_t offset,
10996 boolean_t needs_copy,
10997 boolean_t is_shared,
10998 boolean_t in_transition,
10999 vm_prot_t cur_protection,
11000 vm_prot_t max_protection,
11001 vm_behavior_t behavior,
11002 vm_inherit_t inheritance,
2d21ac55 11003 unsigned wired_count,
b0d623f7
A
11004 boolean_t no_cache,
11005 boolean_t permanent,
11006 unsigned int superpage_size)
1c79356b 11007{
91447636 11008 vm_map_entry_t new_entry;
1c79356b 11009
91447636 11010 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 11011
91447636 11012 new_entry = vm_map_entry_create(map);
1c79356b 11013
91447636
A
11014 new_entry->vme_start = start;
11015 new_entry->vme_end = end;
11016 assert(page_aligned(new_entry->vme_start));
11017 assert(page_aligned(new_entry->vme_end));
1c79356b 11018
91447636
A
11019 new_entry->object.vm_object = object;
11020 new_entry->offset = offset;
11021 new_entry->is_shared = is_shared;
11022 new_entry->is_sub_map = FALSE;
11023 new_entry->needs_copy = needs_copy;
11024 new_entry->in_transition = in_transition;
11025 new_entry->needs_wakeup = FALSE;
11026 new_entry->inheritance = inheritance;
11027 new_entry->protection = cur_protection;
11028 new_entry->max_protection = max_protection;
11029 new_entry->behavior = behavior;
11030 new_entry->wired_count = wired_count;
11031 new_entry->user_wired_count = 0;
11032 new_entry->use_pmap = FALSE;
0c530ab8 11033 new_entry->alias = 0;
b0d623f7 11034 new_entry->zero_wired_pages = FALSE;
2d21ac55 11035 new_entry->no_cache = no_cache;
b0d623f7
A
11036 new_entry->permanent = permanent;
11037 new_entry->superpage_size = superpage_size;
6d2010ae 11038 new_entry->used_for_jit = FALSE;
1c79356b 11039
91447636
A
11040 /*
11041 * Insert the new entry into the list.
11042 */
1c79356b 11043
6d2010ae 11044 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
11045 map->size += end - start;
11046
11047 /*
11048 * Update the free space hint and the lookup hint.
11049 */
11050
0c530ab8 11051 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 11052 return new_entry;
1c79356b
A
11053}
11054
11055/*
91447636
A
11056 * Routine: vm_map_remap_extract
11057 *
11058 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 11059 */
91447636
A
11060static kern_return_t
11061vm_map_remap_extract(
11062 vm_map_t map,
11063 vm_map_offset_t addr,
11064 vm_map_size_t size,
11065 boolean_t copy,
11066 struct vm_map_header *map_header,
11067 vm_prot_t *cur_protection,
11068 vm_prot_t *max_protection,
11069 /* What, no behavior? */
11070 vm_inherit_t inheritance,
11071 boolean_t pageable)
1c79356b 11072{
91447636
A
11073 kern_return_t result;
11074 vm_map_size_t mapped_size;
11075 vm_map_size_t tmp_size;
11076 vm_map_entry_t src_entry; /* result of last map lookup */
11077 vm_map_entry_t new_entry;
11078 vm_object_offset_t offset;
11079 vm_map_offset_t map_address;
11080 vm_map_offset_t src_start; /* start of entry to map */
11081 vm_map_offset_t src_end; /* end of region to be mapped */
11082 vm_object_t object;
11083 vm_map_version_t version;
11084 boolean_t src_needs_copy;
11085 boolean_t new_entry_needs_copy;
1c79356b 11086
91447636
A
11087 assert(map != VM_MAP_NULL);
11088 assert(size != 0 && size == vm_map_round_page(size));
11089 assert(inheritance == VM_INHERIT_NONE ||
11090 inheritance == VM_INHERIT_COPY ||
11091 inheritance == VM_INHERIT_SHARE);
1c79356b 11092
91447636
A
11093 /*
11094 * Compute start and end of region.
11095 */
11096 src_start = vm_map_trunc_page(addr);
11097 src_end = vm_map_round_page(src_start + size);
1c79356b 11098
91447636
A
11099 /*
11100 * Initialize map_header.
11101 */
11102 map_header->links.next = (struct vm_map_entry *)&map_header->links;
11103 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11104 map_header->nentries = 0;
11105 map_header->entries_pageable = pageable;
1c79356b 11106
6d2010ae
A
11107 vm_map_store_init( map_header );
11108
91447636
A
11109 *cur_protection = VM_PROT_ALL;
11110 *max_protection = VM_PROT_ALL;
1c79356b 11111
91447636
A
11112 map_address = 0;
11113 mapped_size = 0;
11114 result = KERN_SUCCESS;
1c79356b 11115
91447636
A
11116 /*
11117 * The specified source virtual space might correspond to
11118 * multiple map entries, need to loop on them.
11119 */
11120 vm_map_lock(map);
11121 while (mapped_size != size) {
11122 vm_map_size_t entry_size;
1c79356b 11123
91447636
A
11124 /*
11125 * Find the beginning of the region.
11126 */
11127 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11128 result = KERN_INVALID_ADDRESS;
11129 break;
11130 }
1c79356b 11131
91447636
A
11132 if (src_start < src_entry->vme_start ||
11133 (mapped_size && src_start != src_entry->vme_start)) {
11134 result = KERN_INVALID_ADDRESS;
11135 break;
11136 }
1c79356b 11137
91447636
A
11138 tmp_size = size - mapped_size;
11139 if (src_end > src_entry->vme_end)
11140 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 11141
91447636 11142 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 11143 src_entry->vme_start);
1c79356b 11144
91447636
A
11145 if(src_entry->is_sub_map) {
11146 vm_map_reference(src_entry->object.sub_map);
11147 object = VM_OBJECT_NULL;
11148 } else {
11149 object = src_entry->object.vm_object;
55e303ae 11150
91447636
A
11151 if (object == VM_OBJECT_NULL) {
11152 object = vm_object_allocate(entry_size);
11153 src_entry->offset = 0;
11154 src_entry->object.vm_object = object;
11155 } else if (object->copy_strategy !=
11156 MEMORY_OBJECT_COPY_SYMMETRIC) {
11157 /*
11158 * We are already using an asymmetric
11159 * copy, and therefore we already have
11160 * the right object.
11161 */
11162 assert(!src_entry->needs_copy);
11163 } else if (src_entry->needs_copy || object->shadowed ||
11164 (object->internal && !object->true_share &&
2d21ac55 11165 !src_entry->is_shared &&
6d2010ae 11166 object->vo_size > entry_size)) {
1c79356b 11167
91447636
A
11168 vm_object_shadow(&src_entry->object.vm_object,
11169 &src_entry->offset,
11170 entry_size);
1c79356b 11171
91447636
A
11172 if (!src_entry->needs_copy &&
11173 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
11174 vm_prot_t prot;
11175
11176 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
11177
11178 if (override_nx(map, src_entry->alias) && prot)
0c530ab8 11179 prot |= VM_PROT_EXECUTE;
2d21ac55 11180
91447636 11181 if(map->mapped) {
2d21ac55
A
11182 vm_object_pmap_protect(
11183 src_entry->object.vm_object,
11184 src_entry->offset,
11185 entry_size,
11186 PMAP_NULL,
0c530ab8 11187 src_entry->vme_start,
0c530ab8 11188 prot);
2d21ac55
A
11189 } else {
11190 pmap_protect(vm_map_pmap(map),
11191 src_entry->vme_start,
11192 src_entry->vme_end,
11193 prot);
91447636
A
11194 }
11195 }
1c79356b 11196
91447636
A
11197 object = src_entry->object.vm_object;
11198 src_entry->needs_copy = FALSE;
11199 }
1c79356b 11200
1c79356b 11201
91447636 11202 vm_object_lock(object);
2d21ac55 11203 vm_object_reference_locked(object); /* object ref. for new entry */
91447636 11204 if (object->copy_strategy ==
2d21ac55 11205 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
11206 object->copy_strategy =
11207 MEMORY_OBJECT_COPY_DELAY;
11208 }
11209 vm_object_unlock(object);
11210 }
1c79356b 11211
91447636 11212 offset = src_entry->offset + (src_start - src_entry->vme_start);
1c79356b 11213
91447636
A
11214 new_entry = _vm_map_entry_create(map_header);
11215 vm_map_entry_copy(new_entry, src_entry);
11216 new_entry->use_pmap = FALSE; /* clr address space specifics */
1c79356b 11217
91447636
A
11218 new_entry->vme_start = map_address;
11219 new_entry->vme_end = map_address + tmp_size;
11220 new_entry->inheritance = inheritance;
11221 new_entry->offset = offset;
1c79356b 11222
91447636
A
11223 /*
11224 * The new region has to be copied now if required.
11225 */
11226 RestartCopy:
11227 if (!copy) {
11228 src_entry->is_shared = TRUE;
11229 new_entry->is_shared = TRUE;
11230 if (!(new_entry->is_sub_map))
11231 new_entry->needs_copy = FALSE;
1c79356b 11232
91447636
A
11233 } else if (src_entry->is_sub_map) {
11234 /* make this a COW sub_map if not already */
11235 new_entry->needs_copy = TRUE;
11236 object = VM_OBJECT_NULL;
11237 } else if (src_entry->wired_count == 0 &&
2d21ac55
A
11238 vm_object_copy_quickly(&new_entry->object.vm_object,
11239 new_entry->offset,
11240 (new_entry->vme_end -
11241 new_entry->vme_start),
11242 &src_needs_copy,
11243 &new_entry_needs_copy)) {
55e303ae 11244
91447636
A
11245 new_entry->needs_copy = new_entry_needs_copy;
11246 new_entry->is_shared = FALSE;
1c79356b 11247
91447636
A
11248 /*
11249 * Handle copy_on_write semantics.
11250 */
11251 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
11252 vm_prot_t prot;
11253
11254 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
11255
11256 if (override_nx(map, src_entry->alias) && prot)
0c530ab8 11257 prot |= VM_PROT_EXECUTE;
2d21ac55 11258
91447636
A
11259 vm_object_pmap_protect(object,
11260 offset,
11261 entry_size,
11262 ((src_entry->is_shared
2d21ac55 11263 || map->mapped) ?
91447636
A
11264 PMAP_NULL : map->pmap),
11265 src_entry->vme_start,
0c530ab8 11266 prot);
1c79356b 11267
91447636
A
11268 src_entry->needs_copy = TRUE;
11269 }
11270 /*
11271 * Throw away the old object reference of the new entry.
11272 */
11273 vm_object_deallocate(object);
1c79356b 11274
91447636
A
11275 } else {
11276 new_entry->is_shared = FALSE;
1c79356b 11277
91447636
A
11278 /*
11279 * The map can be safely unlocked since we
11280 * already hold a reference on the object.
11281 *
11282 * Record the timestamp of the map for later
11283 * verification, and unlock the map.
11284 */
11285 version.main_timestamp = map->timestamp;
11286 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 11287
91447636
A
11288 /*
11289 * Perform the copy.
11290 */
11291 if (src_entry->wired_count > 0) {
11292 vm_object_lock(object);
11293 result = vm_object_copy_slowly(
2d21ac55
A
11294 object,
11295 offset,
11296 entry_size,
11297 THREAD_UNINT,
11298 &new_entry->object.vm_object);
1c79356b 11299
91447636
A
11300 new_entry->offset = 0;
11301 new_entry->needs_copy = FALSE;
11302 } else {
11303 result = vm_object_copy_strategically(
2d21ac55
A
11304 object,
11305 offset,
11306 entry_size,
11307 &new_entry->object.vm_object,
11308 &new_entry->offset,
11309 &new_entry_needs_copy);
1c79356b 11310
91447636
A
11311 new_entry->needs_copy = new_entry_needs_copy;
11312 }
1c79356b 11313
91447636
A
11314 /*
11315 * Throw away the old object reference of the new entry.
11316 */
11317 vm_object_deallocate(object);
1c79356b 11318
91447636
A
11319 if (result != KERN_SUCCESS &&
11320 result != KERN_MEMORY_RESTART_COPY) {
11321 _vm_map_entry_dispose(map_header, new_entry);
11322 break;
11323 }
1c79356b 11324
91447636
A
11325 /*
11326 * Verify that the map has not substantially
11327 * changed while the copy was being made.
11328 */
1c79356b 11329
91447636
A
11330 vm_map_lock(map);
11331 if (version.main_timestamp + 1 != map->timestamp) {
11332 /*
11333 * Simple version comparison failed.
11334 *
11335 * Retry the lookup and verify that the
11336 * same object/offset are still present.
11337 */
11338 vm_object_deallocate(new_entry->
11339 object.vm_object);
11340 _vm_map_entry_dispose(map_header, new_entry);
11341 if (result == KERN_MEMORY_RESTART_COPY)
11342 result = KERN_SUCCESS;
11343 continue;
11344 }
1c79356b 11345
91447636
A
11346 if (result == KERN_MEMORY_RESTART_COPY) {
11347 vm_object_reference(object);
11348 goto RestartCopy;
11349 }
11350 }
1c79356b 11351
6d2010ae 11352 _vm_map_store_entry_link(map_header,
91447636 11353 map_header->links.prev, new_entry);
1c79356b 11354
6d2010ae
A
11355 /*Protections for submap mapping are irrelevant here*/
11356 if( !src_entry->is_sub_map ) {
11357 *cur_protection &= src_entry->protection;
11358 *max_protection &= src_entry->max_protection;
11359 }
91447636
A
11360 map_address += tmp_size;
11361 mapped_size += tmp_size;
11362 src_start += tmp_size;
1c79356b 11363
91447636 11364 } /* end while */
1c79356b 11365
91447636
A
11366 vm_map_unlock(map);
11367 if (result != KERN_SUCCESS) {
11368 /*
11369 * Free all allocated elements.
11370 */
11371 for (src_entry = map_header->links.next;
11372 src_entry != (struct vm_map_entry *)&map_header->links;
11373 src_entry = new_entry) {
11374 new_entry = src_entry->vme_next;
6d2010ae 11375 _vm_map_store_entry_unlink(map_header, src_entry);
91447636
A
11376 vm_object_deallocate(src_entry->object.vm_object);
11377 _vm_map_entry_dispose(map_header, src_entry);
11378 }
11379 }
11380 return result;
1c79356b
A
11381}
11382
11383/*
91447636 11384 * Routine: vm_remap
1c79356b 11385 *
91447636
A
11386 * Map portion of a task's address space.
11387 * Mapped region must not overlap more than
11388 * one vm memory object. Protections and
11389 * inheritance attributes remain the same
11390 * as in the original task and are out parameters.
11391 * Source and Target task can be identical
11392 * Other attributes are identical as for vm_map()
1c79356b
A
11393 */
11394kern_return_t
91447636
A
11395vm_map_remap(
11396 vm_map_t target_map,
11397 vm_map_address_t *address,
11398 vm_map_size_t size,
11399 vm_map_offset_t mask,
060df5ea 11400 int flags,
91447636
A
11401 vm_map_t src_map,
11402 vm_map_offset_t memory_address,
1c79356b 11403 boolean_t copy,
1c79356b
A
11404 vm_prot_t *cur_protection,
11405 vm_prot_t *max_protection,
91447636 11406 vm_inherit_t inheritance)
1c79356b
A
11407{
11408 kern_return_t result;
91447636 11409 vm_map_entry_t entry;
0c530ab8 11410 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 11411 vm_map_entry_t new_entry;
91447636 11412 struct vm_map_header map_header;
1c79356b 11413
91447636
A
11414 if (target_map == VM_MAP_NULL)
11415 return KERN_INVALID_ARGUMENT;
1c79356b 11416
91447636 11417 switch (inheritance) {
2d21ac55
A
11418 case VM_INHERIT_NONE:
11419 case VM_INHERIT_COPY:
11420 case VM_INHERIT_SHARE:
91447636
A
11421 if (size != 0 && src_map != VM_MAP_NULL)
11422 break;
11423 /*FALL THRU*/
2d21ac55 11424 default:
91447636
A
11425 return KERN_INVALID_ARGUMENT;
11426 }
1c79356b 11427
91447636 11428 size = vm_map_round_page(size);
1c79356b 11429
91447636 11430 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
11431 size, copy, &map_header,
11432 cur_protection,
11433 max_protection,
11434 inheritance,
11435 target_map->hdr.
11436 entries_pageable);
1c79356b 11437
91447636
A
11438 if (result != KERN_SUCCESS) {
11439 return result;
11440 }
1c79356b 11441
91447636
A
11442 /*
11443 * Allocate/check a range of free virtual address
11444 * space for the target
1c79356b 11445 */
91447636
A
11446 *address = vm_map_trunc_page(*address);
11447 vm_map_lock(target_map);
11448 result = vm_map_remap_range_allocate(target_map, address, size,
060df5ea 11449 mask, flags, &insp_entry);
1c79356b 11450
91447636
A
11451 for (entry = map_header.links.next;
11452 entry != (struct vm_map_entry *)&map_header.links;
11453 entry = new_entry) {
11454 new_entry = entry->vme_next;
6d2010ae 11455 _vm_map_store_entry_unlink(&map_header, entry);
91447636
A
11456 if (result == KERN_SUCCESS) {
11457 entry->vme_start += *address;
11458 entry->vme_end += *address;
6d2010ae 11459 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
11460 insp_entry = entry;
11461 } else {
11462 if (!entry->is_sub_map) {
11463 vm_object_deallocate(entry->object.vm_object);
11464 } else {
11465 vm_map_deallocate(entry->object.sub_map);
2d21ac55 11466 }
91447636 11467 _vm_map_entry_dispose(&map_header, entry);
1c79356b 11468 }
91447636 11469 }
1c79356b 11470
6d2010ae
A
11471 if( target_map->disable_vmentry_reuse == TRUE) {
11472 if( target_map->highest_entry_end < insp_entry->vme_end ){
11473 target_map->highest_entry_end = insp_entry->vme_end;
11474 }
11475 }
11476
91447636
A
11477 if (result == KERN_SUCCESS) {
11478 target_map->size += size;
0c530ab8 11479 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
11480 }
11481 vm_map_unlock(target_map);
1c79356b 11482
91447636
A
11483 if (result == KERN_SUCCESS && target_map->wiring_required)
11484 result = vm_map_wire(target_map, *address,
11485 *address + size, *cur_protection, TRUE);
11486 return result;
11487}
1c79356b 11488
91447636
A
11489/*
11490 * Routine: vm_map_remap_range_allocate
11491 *
11492 * Description:
11493 * Allocate a range in the specified virtual address map.
11494 * returns the address and the map entry just before the allocated
11495 * range
11496 *
11497 * Map must be locked.
11498 */
1c79356b 11499
91447636
A
11500static kern_return_t
11501vm_map_remap_range_allocate(
11502 vm_map_t map,
11503 vm_map_address_t *address, /* IN/OUT */
11504 vm_map_size_t size,
11505 vm_map_offset_t mask,
060df5ea 11506 int flags,
91447636
A
11507 vm_map_entry_t *map_entry) /* OUT */
11508{
060df5ea
A
11509 vm_map_entry_t entry;
11510 vm_map_offset_t start;
11511 vm_map_offset_t end;
11512 kern_return_t kr;
1c79356b 11513
2d21ac55 11514StartAgain: ;
1c79356b 11515
2d21ac55 11516 start = *address;
1c79356b 11517
060df5ea 11518 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55
A
11519 {
11520 /*
11521 * Calculate the first possible address.
11522 */
1c79356b 11523
2d21ac55
A
11524 if (start < map->min_offset)
11525 start = map->min_offset;
11526 if (start > map->max_offset)
11527 return(KERN_NO_SPACE);
91447636 11528
2d21ac55
A
11529 /*
11530 * Look for the first possible address;
11531 * if there's already something at this
11532 * address, we have to start after it.
11533 */
1c79356b 11534
6d2010ae
A
11535 if( map->disable_vmentry_reuse == TRUE) {
11536 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 11537 } else {
6d2010ae
A
11538 assert(first_free_is_valid(map));
11539 if (start == map->min_offset) {
11540 if ((entry = map->first_free) != vm_map_to_entry(map))
11541 start = entry->vme_end;
11542 } else {
11543 vm_map_entry_t tmp_entry;
11544 if (vm_map_lookup_entry(map, start, &tmp_entry))
11545 start = tmp_entry->vme_end;
11546 entry = tmp_entry;
11547 }
2d21ac55 11548 }
91447636 11549
2d21ac55
A
11550 /*
11551 * In any case, the "entry" always precedes
11552 * the proposed new region throughout the
11553 * loop:
11554 */
1c79356b 11555
2d21ac55
A
11556 while (TRUE) {
11557 register vm_map_entry_t next;
11558
11559 /*
11560 * Find the end of the proposed new region.
11561 * Be sure we didn't go beyond the end, or
11562 * wrap around the address.
11563 */
11564
11565 end = ((start + mask) & ~mask);
11566 if (end < start)
11567 return(KERN_NO_SPACE);
11568 start = end;
11569 end += size;
11570
11571 if ((end > map->max_offset) || (end < start)) {
11572 if (map->wait_for_space) {
11573 if (size <= (map->max_offset -
11574 map->min_offset)) {
11575 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11576 vm_map_unlock(map);
11577 thread_block(THREAD_CONTINUE_NULL);
11578 vm_map_lock(map);
11579 goto StartAgain;
11580 }
11581 }
91447636 11582
2d21ac55
A
11583 return(KERN_NO_SPACE);
11584 }
1c79356b 11585
2d21ac55
A
11586 /*
11587 * If there are no more entries, we must win.
11588 */
1c79356b 11589
2d21ac55
A
11590 next = entry->vme_next;
11591 if (next == vm_map_to_entry(map))
11592 break;
1c79356b 11593
2d21ac55
A
11594 /*
11595 * If there is another entry, it must be
11596 * after the end of the potential new region.
11597 */
1c79356b 11598
2d21ac55
A
11599 if (next->vme_start >= end)
11600 break;
1c79356b 11601
2d21ac55
A
11602 /*
11603 * Didn't fit -- move to the next entry.
11604 */
1c79356b 11605
2d21ac55
A
11606 entry = next;
11607 start = entry->vme_end;
11608 }
11609 *address = start;
11610 } else {
11611 vm_map_entry_t temp_entry;
91447636 11612
2d21ac55
A
11613 /*
11614 * Verify that:
11615 * the address doesn't itself violate
11616 * the mask requirement.
11617 */
1c79356b 11618
2d21ac55
A
11619 if ((start & mask) != 0)
11620 return(KERN_NO_SPACE);
1c79356b 11621
1c79356b 11622
2d21ac55
A
11623 /*
11624 * ... the address is within bounds
11625 */
1c79356b 11626
2d21ac55 11627 end = start + size;
1c79356b 11628
2d21ac55
A
11629 if ((start < map->min_offset) ||
11630 (end > map->max_offset) ||
11631 (start >= end)) {
11632 return(KERN_INVALID_ADDRESS);
11633 }
1c79356b 11634
060df5ea
A
11635 /*
11636 * If we're asked to overwrite whatever was mapped in that
11637 * range, first deallocate that range.
11638 */
11639 if (flags & VM_FLAGS_OVERWRITE) {
11640 vm_map_t zap_map;
11641
11642 /*
11643 * We use a "zap_map" to avoid having to unlock
11644 * the "map" in vm_map_delete(), which would compromise
11645 * the atomicity of the "deallocate" and then "remap"
11646 * combination.
11647 */
11648 zap_map = vm_map_create(PMAP_NULL,
11649 start,
11650 end - start,
11651 map->hdr.entries_pageable);
11652 if (zap_map == VM_MAP_NULL) {
11653 return KERN_RESOURCE_SHORTAGE;
11654 }
11655
11656 kr = vm_map_delete(map, start, end,
11657 VM_MAP_REMOVE_SAVE_ENTRIES,
11658 zap_map);
11659 if (kr == KERN_SUCCESS) {
11660 vm_map_destroy(zap_map,
11661 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11662 zap_map = VM_MAP_NULL;
11663 }
11664 }
11665
2d21ac55
A
11666 /*
11667 * ... the starting address isn't allocated
11668 */
91447636 11669
2d21ac55
A
11670 if (vm_map_lookup_entry(map, start, &temp_entry))
11671 return(KERN_NO_SPACE);
91447636 11672
2d21ac55 11673 entry = temp_entry;
91447636 11674
2d21ac55
A
11675 /*
11676 * ... the next region doesn't overlap the
11677 * end point.
11678 */
1c79356b 11679
2d21ac55
A
11680 if ((entry->vme_next != vm_map_to_entry(map)) &&
11681 (entry->vme_next->vme_start < end))
11682 return(KERN_NO_SPACE);
11683 }
11684 *map_entry = entry;
11685 return(KERN_SUCCESS);
91447636 11686}
1c79356b 11687
91447636
A
11688/*
11689 * vm_map_switch:
11690 *
11691 * Set the address map for the current thread to the specified map
11692 */
1c79356b 11693
91447636
A
11694vm_map_t
11695vm_map_switch(
11696 vm_map_t map)
11697{
11698 int mycpu;
11699 thread_t thread = current_thread();
11700 vm_map_t oldmap = thread->map;
1c79356b 11701
91447636
A
11702 mp_disable_preemption();
11703 mycpu = cpu_number();
1c79356b 11704
91447636
A
11705 /*
11706 * Deactivate the current map and activate the requested map
11707 */
11708 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 11709
91447636
A
11710 mp_enable_preemption();
11711 return(oldmap);
11712}
1c79356b 11713
1c79356b 11714
91447636
A
11715/*
11716 * Routine: vm_map_write_user
11717 *
11718 * Description:
11719 * Copy out data from a kernel space into space in the
11720 * destination map. The space must already exist in the
11721 * destination map.
11722 * NOTE: This routine should only be called by threads
11723 * which can block on a page fault. i.e. kernel mode user
11724 * threads.
11725 *
11726 */
11727kern_return_t
11728vm_map_write_user(
11729 vm_map_t map,
11730 void *src_p,
11731 vm_map_address_t dst_addr,
11732 vm_size_t size)
11733{
11734 kern_return_t kr = KERN_SUCCESS;
1c79356b 11735
91447636
A
11736 if(current_map() == map) {
11737 if (copyout(src_p, dst_addr, size)) {
11738 kr = KERN_INVALID_ADDRESS;
11739 }
11740 } else {
11741 vm_map_t oldmap;
1c79356b 11742
91447636
A
11743 /* take on the identity of the target map while doing */
11744 /* the transfer */
1c79356b 11745
91447636
A
11746 vm_map_reference(map);
11747 oldmap = vm_map_switch(map);
11748 if (copyout(src_p, dst_addr, size)) {
11749 kr = KERN_INVALID_ADDRESS;
1c79356b 11750 }
91447636
A
11751 vm_map_switch(oldmap);
11752 vm_map_deallocate(map);
1c79356b 11753 }
91447636 11754 return kr;
1c79356b
A
11755}
11756
11757/*
91447636
A
11758 * Routine: vm_map_read_user
11759 *
11760 * Description:
11761 * Copy in data from a user space source map into the
11762 * kernel map. The space must already exist in the
11763 * kernel map.
11764 * NOTE: This routine should only be called by threads
11765 * which can block on a page fault. i.e. kernel mode user
11766 * threads.
1c79356b 11767 *
1c79356b
A
11768 */
11769kern_return_t
91447636
A
11770vm_map_read_user(
11771 vm_map_t map,
11772 vm_map_address_t src_addr,
11773 void *dst_p,
11774 vm_size_t size)
1c79356b 11775{
91447636 11776 kern_return_t kr = KERN_SUCCESS;
1c79356b 11777
91447636
A
11778 if(current_map() == map) {
11779 if (copyin(src_addr, dst_p, size)) {
11780 kr = KERN_INVALID_ADDRESS;
11781 }
11782 } else {
11783 vm_map_t oldmap;
1c79356b 11784
91447636
A
11785 /* take on the identity of the target map while doing */
11786 /* the transfer */
11787
11788 vm_map_reference(map);
11789 oldmap = vm_map_switch(map);
11790 if (copyin(src_addr, dst_p, size)) {
11791 kr = KERN_INVALID_ADDRESS;
11792 }
11793 vm_map_switch(oldmap);
11794 vm_map_deallocate(map);
1c79356b 11795 }
91447636
A
11796 return kr;
11797}
11798
1c79356b 11799
91447636
A
11800/*
11801 * vm_map_check_protection:
11802 *
11803 * Assert that the target map allows the specified
11804 * privilege on the entire address region given.
11805 * The entire region must be allocated.
11806 */
2d21ac55
A
11807boolean_t
11808vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11809 vm_map_offset_t end, vm_prot_t protection)
91447636 11810{
2d21ac55
A
11811 vm_map_entry_t entry;
11812 vm_map_entry_t tmp_entry;
1c79356b 11813
91447636 11814 vm_map_lock(map);
1c79356b 11815
2d21ac55 11816 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 11817 {
2d21ac55
A
11818 vm_map_unlock(map);
11819 return (FALSE);
1c79356b
A
11820 }
11821
91447636
A
11822 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11823 vm_map_unlock(map);
11824 return(FALSE);
11825 }
1c79356b 11826
91447636
A
11827 entry = tmp_entry;
11828
11829 while (start < end) {
11830 if (entry == vm_map_to_entry(map)) {
11831 vm_map_unlock(map);
11832 return(FALSE);
1c79356b 11833 }
1c79356b 11834
91447636
A
11835 /*
11836 * No holes allowed!
11837 */
1c79356b 11838
91447636
A
11839 if (start < entry->vme_start) {
11840 vm_map_unlock(map);
11841 return(FALSE);
11842 }
11843
11844 /*
11845 * Check protection associated with entry.
11846 */
11847
11848 if ((entry->protection & protection) != protection) {
11849 vm_map_unlock(map);
11850 return(FALSE);
11851 }
11852
11853 /* go to next entry */
11854
11855 start = entry->vme_end;
11856 entry = entry->vme_next;
11857 }
11858 vm_map_unlock(map);
11859 return(TRUE);
1c79356b
A
11860}
11861
1c79356b 11862kern_return_t
91447636
A
11863vm_map_purgable_control(
11864 vm_map_t map,
11865 vm_map_offset_t address,
11866 vm_purgable_t control,
11867 int *state)
1c79356b 11868{
91447636
A
11869 vm_map_entry_t entry;
11870 vm_object_t object;
11871 kern_return_t kr;
1c79356b 11872
1c79356b 11873 /*
91447636
A
11874 * Vet all the input parameters and current type and state of the
11875 * underlaying object. Return with an error if anything is amiss.
1c79356b 11876 */
91447636
A
11877 if (map == VM_MAP_NULL)
11878 return(KERN_INVALID_ARGUMENT);
1c79356b 11879
91447636 11880 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7
A
11881 control != VM_PURGABLE_GET_STATE &&
11882 control != VM_PURGABLE_PURGE_ALL)
91447636 11883 return(KERN_INVALID_ARGUMENT);
1c79356b 11884
b0d623f7
A
11885 if (control == VM_PURGABLE_PURGE_ALL) {
11886 vm_purgeable_object_purge_all();
11887 return KERN_SUCCESS;
11888 }
11889
91447636 11890 if (control == VM_PURGABLE_SET_STATE &&
b0d623f7 11891 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 11892 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
11893 return(KERN_INVALID_ARGUMENT);
11894
b0d623f7 11895 vm_map_lock_read(map);
91447636
A
11896
11897 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11898
11899 /*
11900 * Must pass a valid non-submap address.
11901 */
b0d623f7 11902 vm_map_unlock_read(map);
91447636
A
11903 return(KERN_INVALID_ADDRESS);
11904 }
11905
11906 if ((entry->protection & VM_PROT_WRITE) == 0) {
11907 /*
11908 * Can't apply purgable controls to something you can't write.
11909 */
b0d623f7 11910 vm_map_unlock_read(map);
91447636
A
11911 return(KERN_PROTECTION_FAILURE);
11912 }
11913
11914 object = entry->object.vm_object;
11915 if (object == VM_OBJECT_NULL) {
11916 /*
11917 * Object must already be present or it can't be purgable.
11918 */
b0d623f7 11919 vm_map_unlock_read(map);
91447636
A
11920 return KERN_INVALID_ARGUMENT;
11921 }
11922
11923 vm_object_lock(object);
11924
11925 if (entry->offset != 0 ||
6d2010ae 11926 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
11927 /*
11928 * Can only apply purgable controls to the whole (existing)
11929 * object at once.
11930 */
b0d623f7 11931 vm_map_unlock_read(map);
91447636
A
11932 vm_object_unlock(object);
11933 return KERN_INVALID_ARGUMENT;
1c79356b
A
11934 }
11935
b0d623f7 11936 vm_map_unlock_read(map);
1c79356b 11937
91447636 11938 kr = vm_object_purgable_control(object, control, state);
1c79356b 11939
91447636 11940 vm_object_unlock(object);
1c79356b 11941
91447636
A
11942 return kr;
11943}
1c79356b 11944
91447636 11945kern_return_t
b0d623f7 11946vm_map_page_query_internal(
2d21ac55 11947 vm_map_t target_map,
91447636 11948 vm_map_offset_t offset,
2d21ac55
A
11949 int *disposition,
11950 int *ref_count)
91447636 11951{
b0d623f7
A
11952 kern_return_t kr;
11953 vm_page_info_basic_data_t info;
11954 mach_msg_type_number_t count;
11955
11956 count = VM_PAGE_INFO_BASIC_COUNT;
11957 kr = vm_map_page_info(target_map,
11958 offset,
11959 VM_PAGE_INFO_BASIC,
11960 (vm_page_info_t) &info,
11961 &count);
11962 if (kr == KERN_SUCCESS) {
11963 *disposition = info.disposition;
11964 *ref_count = info.ref_count;
11965 } else {
11966 *disposition = 0;
11967 *ref_count = 0;
11968 }
2d21ac55 11969
b0d623f7
A
11970 return kr;
11971}
11972
11973kern_return_t
11974vm_map_page_info(
11975 vm_map_t map,
11976 vm_map_offset_t offset,
11977 vm_page_info_flavor_t flavor,
11978 vm_page_info_t info,
11979 mach_msg_type_number_t *count)
11980{
11981 vm_map_entry_t map_entry;
11982 vm_object_t object;
11983 vm_page_t m;
11984 kern_return_t kr;
11985 kern_return_t retval = KERN_SUCCESS;
11986 boolean_t top_object;
11987 int disposition;
11988 int ref_count;
11989 vm_object_id_t object_id;
11990 vm_page_info_basic_t basic_info;
11991 int depth;
6d2010ae 11992 vm_map_offset_t offset_in_page;
2d21ac55 11993
b0d623f7
A
11994 switch (flavor) {
11995 case VM_PAGE_INFO_BASIC:
11996 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
11997 /*
11998 * The "vm_page_info_basic_data" structure was not
11999 * properly padded, so allow the size to be off by
12000 * one to maintain backwards binary compatibility...
12001 */
12002 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12003 return KERN_INVALID_ARGUMENT;
b0d623f7
A
12004 }
12005 break;
12006 default:
12007 return KERN_INVALID_ARGUMENT;
91447636 12008 }
2d21ac55 12009
b0d623f7
A
12010 disposition = 0;
12011 ref_count = 0;
12012 object_id = 0;
12013 top_object = TRUE;
12014 depth = 0;
12015
12016 retval = KERN_SUCCESS;
6d2010ae 12017 offset_in_page = offset & PAGE_MASK;
b0d623f7
A
12018 offset = vm_map_trunc_page(offset);
12019
12020 vm_map_lock_read(map);
12021
12022 /*
12023 * First, find the map entry covering "offset", going down
12024 * submaps if necessary.
12025 */
12026 for (;;) {
12027 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12028 vm_map_unlock_read(map);
12029 return KERN_INVALID_ADDRESS;
12030 }
12031 /* compute offset from this map entry's start */
12032 offset -= map_entry->vme_start;
12033 /* compute offset into this map entry's object (or submap) */
12034 offset += map_entry->offset;
12035
12036 if (map_entry->is_sub_map) {
12037 vm_map_t sub_map;
2d21ac55
A
12038
12039 sub_map = map_entry->object.sub_map;
12040 vm_map_lock_read(sub_map);
b0d623f7 12041 vm_map_unlock_read(map);
2d21ac55 12042
b0d623f7
A
12043 map = sub_map;
12044
12045 ref_count = MAX(ref_count, map->ref_count);
12046 continue;
1c79356b 12047 }
b0d623f7 12048 break;
91447636 12049 }
b0d623f7
A
12050
12051 object = map_entry->object.vm_object;
12052 if (object == VM_OBJECT_NULL) {
12053 /* no object -> no page */
12054 vm_map_unlock_read(map);
12055 goto done;
12056 }
12057
91447636 12058 vm_object_lock(object);
b0d623f7
A
12059 vm_map_unlock_read(map);
12060
12061 /*
12062 * Go down the VM object shadow chain until we find the page
12063 * we're looking for.
12064 */
12065 for (;;) {
12066 ref_count = MAX(ref_count, object->ref_count);
2d21ac55 12067
91447636 12068 m = vm_page_lookup(object, offset);
2d21ac55 12069
91447636 12070 if (m != VM_PAGE_NULL) {
b0d623f7 12071 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
91447636
A
12072 break;
12073 } else {
2d21ac55
A
12074#if MACH_PAGEMAP
12075 if (object->existence_map) {
b0d623f7
A
12076 if (vm_external_state_get(object->existence_map,
12077 offset) ==
12078 VM_EXTERNAL_STATE_EXISTS) {
2d21ac55
A
12079 /*
12080 * this page has been paged out
12081 */
b0d623f7 12082 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
2d21ac55
A
12083 break;
12084 }
12085 } else
12086#endif
b0d623f7 12087 {
2d21ac55 12088 if (object->internal &&
b0d623f7
A
12089 object->alive &&
12090 !object->terminating &&
12091 object->pager_ready) {
2d21ac55 12092
b0d623f7 12093 memory_object_t pager;
2d21ac55 12094
b0d623f7
A
12095 vm_object_paging_begin(object);
12096 pager = object->pager;
12097 vm_object_unlock(object);
2d21ac55 12098
2d21ac55 12099 /*
b0d623f7
A
12100 * Ask the default pager if
12101 * it has this page.
2d21ac55 12102 */
b0d623f7
A
12103 kr = memory_object_data_request(
12104 pager,
12105 offset + object->paging_offset,
12106 0, /* just poke the pager */
12107 VM_PROT_READ,
12108 NULL);
12109
12110 vm_object_lock(object);
12111 vm_object_paging_end(object);
12112
12113 if (kr == KERN_SUCCESS) {
12114 /* the default pager has it */
12115 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12116 break;
12117 }
2d21ac55
A
12118 }
12119 }
b0d623f7 12120
2d21ac55
A
12121 if (object->shadow != VM_OBJECT_NULL) {
12122 vm_object_t shadow;
12123
6d2010ae 12124 offset += object->vo_shadow_offset;
2d21ac55
A
12125 shadow = object->shadow;
12126
12127 vm_object_lock(shadow);
12128 vm_object_unlock(object);
12129
12130 object = shadow;
12131 top_object = FALSE;
b0d623f7 12132 depth++;
2d21ac55 12133 } else {
b0d623f7
A
12134// if (!object->internal)
12135// break;
12136// retval = KERN_FAILURE;
12137// goto done_with_object;
12138 break;
91447636 12139 }
91447636
A
12140 }
12141 }
91447636
A
12142 /* The ref_count is not strictly accurate, it measures the number */
12143 /* of entities holding a ref on the object, they may not be mapping */
12144 /* the object or may not be mapping the section holding the */
12145 /* target page but its still a ball park number and though an over- */
12146 /* count, it picks up the copy-on-write cases */
1c79356b 12147
91447636
A
12148 /* We could also get a picture of page sharing from pmap_attributes */
12149 /* but this would under count as only faulted-in mappings would */
12150 /* show up. */
1c79356b 12151
2d21ac55 12152 if (top_object == TRUE && object->shadow)
b0d623f7
A
12153 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12154
12155 if (! object->internal)
12156 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
2d21ac55
A
12157
12158 if (m == VM_PAGE_NULL)
b0d623f7 12159 goto done_with_object;
2d21ac55 12160
91447636 12161 if (m->fictitious) {
b0d623f7
A
12162 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12163 goto done_with_object;
91447636 12164 }
2d21ac55 12165 if (m->dirty || pmap_is_modified(m->phys_page))
b0d623f7 12166 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 12167
2d21ac55 12168 if (m->reference || pmap_is_referenced(m->phys_page))
b0d623f7 12169 disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 12170
2d21ac55 12171 if (m->speculative)
b0d623f7 12172 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
1c79356b 12173
593a1d5f 12174 if (m->cs_validated)
b0d623f7 12175 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
593a1d5f 12176 if (m->cs_tainted)
b0d623f7 12177 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
593a1d5f 12178
b0d623f7 12179done_with_object:
2d21ac55 12180 vm_object_unlock(object);
b0d623f7
A
12181done:
12182
12183 switch (flavor) {
12184 case VM_PAGE_INFO_BASIC:
12185 basic_info = (vm_page_info_basic_t) info;
12186 basic_info->disposition = disposition;
12187 basic_info->ref_count = ref_count;
12188 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
6d2010ae
A
12189 basic_info->offset =
12190 (memory_object_offset_t) offset + offset_in_page;
b0d623f7
A
12191 basic_info->depth = depth;
12192 break;
12193 }
0c530ab8 12194
2d21ac55 12195 return retval;
91447636
A
12196}
12197
12198/*
12199 * vm_map_msync
12200 *
12201 * Synchronises the memory range specified with its backing store
12202 * image by either flushing or cleaning the contents to the appropriate
12203 * memory manager engaging in a memory object synchronize dialog with
12204 * the manager. The client doesn't return until the manager issues
12205 * m_o_s_completed message. MIG Magically converts user task parameter
12206 * to the task's address map.
12207 *
12208 * interpretation of sync_flags
12209 * VM_SYNC_INVALIDATE - discard pages, only return precious
12210 * pages to manager.
12211 *
12212 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12213 * - discard pages, write dirty or precious
12214 * pages back to memory manager.
12215 *
12216 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12217 * - write dirty or precious pages back to
12218 * the memory manager.
12219 *
12220 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
12221 * is a hole in the region, and we would
12222 * have returned KERN_SUCCESS, return
12223 * KERN_INVALID_ADDRESS instead.
12224 *
12225 * NOTE
12226 * The memory object attributes have not yet been implemented, this
12227 * function will have to deal with the invalidate attribute
12228 *
12229 * RETURNS
12230 * KERN_INVALID_TASK Bad task parameter
12231 * KERN_INVALID_ARGUMENT both sync and async were specified.
12232 * KERN_SUCCESS The usual.
12233 * KERN_INVALID_ADDRESS There was a hole in the region.
12234 */
12235
12236kern_return_t
12237vm_map_msync(
12238 vm_map_t map,
12239 vm_map_address_t address,
12240 vm_map_size_t size,
12241 vm_sync_t sync_flags)
12242{
12243 msync_req_t msr;
12244 msync_req_t new_msr;
12245 queue_chain_t req_q; /* queue of requests for this msync */
12246 vm_map_entry_t entry;
12247 vm_map_size_t amount_left;
12248 vm_object_offset_t offset;
12249 boolean_t do_sync_req;
91447636 12250 boolean_t had_hole = FALSE;
2d21ac55 12251 memory_object_t pager;
91447636
A
12252
12253 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12254 (sync_flags & VM_SYNC_SYNCHRONOUS))
12255 return(KERN_INVALID_ARGUMENT);
1c79356b
A
12256
12257 /*
91447636 12258 * align address and size on page boundaries
1c79356b 12259 */
91447636
A
12260 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12261 address = vm_map_trunc_page(address);
1c79356b 12262
91447636
A
12263 if (map == VM_MAP_NULL)
12264 return(KERN_INVALID_TASK);
1c79356b 12265
91447636
A
12266 if (size == 0)
12267 return(KERN_SUCCESS);
1c79356b 12268
91447636
A
12269 queue_init(&req_q);
12270 amount_left = size;
1c79356b 12271
91447636
A
12272 while (amount_left > 0) {
12273 vm_object_size_t flush_size;
12274 vm_object_t object;
1c79356b 12275
91447636
A
12276 vm_map_lock(map);
12277 if (!vm_map_lookup_entry(map,
2d21ac55 12278 vm_map_trunc_page(address), &entry)) {
91447636 12279
2d21ac55 12280 vm_map_size_t skip;
91447636
A
12281
12282 /*
12283 * hole in the address map.
12284 */
12285 had_hole = TRUE;
12286
12287 /*
12288 * Check for empty map.
12289 */
12290 if (entry == vm_map_to_entry(map) &&
12291 entry->vme_next == entry) {
12292 vm_map_unlock(map);
12293 break;
12294 }
12295 /*
12296 * Check that we don't wrap and that
12297 * we have at least one real map entry.
12298 */
12299 if ((map->hdr.nentries == 0) ||
12300 (entry->vme_next->vme_start < address)) {
12301 vm_map_unlock(map);
12302 break;
12303 }
12304 /*
12305 * Move up to the next entry if needed
12306 */
12307 skip = (entry->vme_next->vme_start - address);
12308 if (skip >= amount_left)
12309 amount_left = 0;
12310 else
12311 amount_left -= skip;
12312 address = entry->vme_next->vme_start;
12313 vm_map_unlock(map);
12314 continue;
12315 }
1c79356b 12316
91447636 12317 offset = address - entry->vme_start;
1c79356b 12318
91447636
A
12319 /*
12320 * do we have more to flush than is contained in this
12321 * entry ?
12322 */
12323 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12324 flush_size = entry->vme_end -
2d21ac55 12325 (entry->vme_start + offset);
91447636
A
12326 } else {
12327 flush_size = amount_left;
12328 }
12329 amount_left -= flush_size;
12330 address += flush_size;
1c79356b 12331
91447636
A
12332 if (entry->is_sub_map == TRUE) {
12333 vm_map_t local_map;
12334 vm_map_offset_t local_offset;
1c79356b 12335
91447636
A
12336 local_map = entry->object.sub_map;
12337 local_offset = entry->offset;
12338 vm_map_unlock(map);
12339 if (vm_map_msync(
2d21ac55
A
12340 local_map,
12341 local_offset,
12342 flush_size,
12343 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
12344 had_hole = TRUE;
12345 }
12346 continue;
12347 }
12348 object = entry->object.vm_object;
1c79356b 12349
91447636
A
12350 /*
12351 * We can't sync this object if the object has not been
12352 * created yet
12353 */
12354 if (object == VM_OBJECT_NULL) {
12355 vm_map_unlock(map);
12356 continue;
12357 }
12358 offset += entry->offset;
1c79356b 12359
91447636 12360 vm_object_lock(object);
1c79356b 12361
91447636 12362 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
12363 int kill_pages = 0;
12364 boolean_t reusable_pages = FALSE;
91447636
A
12365
12366 if (sync_flags & VM_SYNC_KILLPAGES) {
b0d623f7 12367 if (object->ref_count == 1 && !object->shadow)
91447636
A
12368 kill_pages = 1;
12369 else
12370 kill_pages = -1;
12371 }
12372 if (kill_pages != -1)
12373 vm_object_deactivate_pages(object, offset,
b0d623f7 12374 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
91447636
A
12375 vm_object_unlock(object);
12376 vm_map_unlock(map);
12377 continue;
1c79356b 12378 }
91447636
A
12379 /*
12380 * We can't sync this object if there isn't a pager.
12381 * Don't bother to sync internal objects, since there can't
12382 * be any "permanent" storage for these objects anyway.
12383 */
12384 if ((object->pager == MEMORY_OBJECT_NULL) ||
12385 (object->internal) || (object->private)) {
12386 vm_object_unlock(object);
12387 vm_map_unlock(map);
12388 continue;
12389 }
12390 /*
12391 * keep reference on the object until syncing is done
12392 */
2d21ac55 12393 vm_object_reference_locked(object);
91447636 12394 vm_object_unlock(object);
1c79356b 12395
91447636 12396 vm_map_unlock(map);
1c79356b 12397
91447636 12398 do_sync_req = vm_object_sync(object,
2d21ac55
A
12399 offset,
12400 flush_size,
12401 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
12402 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12403 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 12404 sync_flags & VM_SYNC_SYNCHRONOUS);
91447636
A
12405 /*
12406 * only send a m_o_s if we returned pages or if the entry
12407 * is writable (ie dirty pages may have already been sent back)
12408 */
b0d623f7 12409 if (!do_sync_req) {
2d21ac55
A
12410 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12411 /*
12412 * clear out the clustering and read-ahead hints
12413 */
12414 vm_object_lock(object);
12415
12416 object->pages_created = 0;
12417 object->pages_used = 0;
12418 object->sequential = 0;
12419 object->last_alloc = 0;
12420
12421 vm_object_unlock(object);
12422 }
91447636
A
12423 vm_object_deallocate(object);
12424 continue;
1c79356b 12425 }
91447636 12426 msync_req_alloc(new_msr);
1c79356b 12427
91447636
A
12428 vm_object_lock(object);
12429 offset += object->paging_offset;
1c79356b 12430
91447636
A
12431 new_msr->offset = offset;
12432 new_msr->length = flush_size;
12433 new_msr->object = object;
12434 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
2d21ac55
A
12435 re_iterate:
12436
12437 /*
12438 * We can't sync this object if there isn't a pager. The
12439 * pager can disappear anytime we're not holding the object
12440 * lock. So this has to be checked anytime we goto re_iterate.
12441 */
12442
12443 pager = object->pager;
12444
12445 if (pager == MEMORY_OBJECT_NULL) {
12446 vm_object_unlock(object);
12447 vm_object_deallocate(object);
12448 continue;
12449 }
12450
91447636
A
12451 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12452 /*
12453 * need to check for overlapping entry, if found, wait
12454 * on overlapping msr to be done, then reiterate
12455 */
12456 msr_lock(msr);
12457 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12458 ((offset >= msr->offset &&
12459 offset < (msr->offset + msr->length)) ||
12460 (msr->offset >= offset &&
12461 msr->offset < (offset + flush_size))))
12462 {
12463 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12464 msr_unlock(msr);
12465 vm_object_unlock(object);
12466 thread_block(THREAD_CONTINUE_NULL);
12467 vm_object_lock(object);
12468 goto re_iterate;
12469 }
12470 msr_unlock(msr);
12471 }/* queue_iterate */
1c79356b 12472
91447636 12473 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
2d21ac55
A
12474
12475 vm_object_paging_begin(object);
91447636 12476 vm_object_unlock(object);
1c79356b 12477
91447636
A
12478 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12479
12480 (void) memory_object_synchronize(
2d21ac55
A
12481 pager,
12482 offset,
12483 flush_size,
12484 sync_flags & ~VM_SYNC_CONTIGUOUS);
12485
12486 vm_object_lock(object);
12487 vm_object_paging_end(object);
12488 vm_object_unlock(object);
91447636
A
12489 }/* while */
12490
12491 /*
12492 * wait for memory_object_sychronize_completed messages from pager(s)
12493 */
12494
12495 while (!queue_empty(&req_q)) {
12496 msr = (msync_req_t)queue_first(&req_q);
12497 msr_lock(msr);
12498 while(msr->flag != VM_MSYNC_DONE) {
12499 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12500 msr_unlock(msr);
12501 thread_block(THREAD_CONTINUE_NULL);
12502 msr_lock(msr);
12503 }/* while */
12504 queue_remove(&req_q, msr, msync_req_t, req_q);
12505 msr_unlock(msr);
12506 vm_object_deallocate(msr->object);
12507 msync_req_free(msr);
12508 }/* queue_iterate */
12509
12510 /* for proper msync() behaviour */
12511 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12512 return(KERN_INVALID_ADDRESS);
12513
12514 return(KERN_SUCCESS);
12515}/* vm_msync */
1c79356b 12516
1c79356b 12517/*
91447636
A
12518 * Routine: convert_port_entry_to_map
12519 * Purpose:
12520 * Convert from a port specifying an entry or a task
12521 * to a map. Doesn't consume the port ref; produces a map ref,
12522 * which may be null. Unlike convert_port_to_map, the
12523 * port may be task or a named entry backed.
12524 * Conditions:
12525 * Nothing locked.
1c79356b 12526 */
1c79356b 12527
1c79356b 12528
91447636
A
12529vm_map_t
12530convert_port_entry_to_map(
12531 ipc_port_t port)
12532{
12533 vm_map_t map;
12534 vm_named_entry_t named_entry;
2d21ac55 12535 uint32_t try_failed_count = 0;
1c79356b 12536
91447636
A
12537 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12538 while(TRUE) {
12539 ip_lock(port);
12540 if(ip_active(port) && (ip_kotype(port)
2d21ac55 12541 == IKOT_NAMED_ENTRY)) {
91447636 12542 named_entry =
2d21ac55 12543 (vm_named_entry_t)port->ip_kobject;
b0d623f7 12544 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 12545 ip_unlock(port);
2d21ac55
A
12546
12547 try_failed_count++;
12548 mutex_pause(try_failed_count);
91447636
A
12549 continue;
12550 }
12551 named_entry->ref_count++;
b0d623f7 12552 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
12553 ip_unlock(port);
12554 if ((named_entry->is_sub_map) &&
2d21ac55
A
12555 (named_entry->protection
12556 & VM_PROT_WRITE)) {
91447636
A
12557 map = named_entry->backing.map;
12558 } else {
12559 mach_destroy_memory_entry(port);
12560 return VM_MAP_NULL;
12561 }
12562 vm_map_reference_swap(map);
12563 mach_destroy_memory_entry(port);
12564 break;
12565 }
12566 else
12567 return VM_MAP_NULL;
12568 }
1c79356b 12569 }
91447636
A
12570 else
12571 map = convert_port_to_map(port);
1c79356b 12572
91447636
A
12573 return map;
12574}
1c79356b 12575
91447636
A
12576/*
12577 * Routine: convert_port_entry_to_object
12578 * Purpose:
12579 * Convert from a port specifying a named entry to an
12580 * object. Doesn't consume the port ref; produces a map ref,
12581 * which may be null.
12582 * Conditions:
12583 * Nothing locked.
12584 */
1c79356b 12585
1c79356b 12586
91447636
A
12587vm_object_t
12588convert_port_entry_to_object(
12589 ipc_port_t port)
12590{
12591 vm_object_t object;
12592 vm_named_entry_t named_entry;
2d21ac55 12593 uint32_t try_failed_count = 0;
1c79356b 12594
91447636
A
12595 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12596 while(TRUE) {
12597 ip_lock(port);
12598 if(ip_active(port) && (ip_kotype(port)
2d21ac55 12599 == IKOT_NAMED_ENTRY)) {
91447636 12600 named_entry =
2d21ac55 12601 (vm_named_entry_t)port->ip_kobject;
b0d623f7 12602 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 12603 ip_unlock(port);
2d21ac55
A
12604
12605 try_failed_count++;
12606 mutex_pause(try_failed_count);
91447636
A
12607 continue;
12608 }
12609 named_entry->ref_count++;
b0d623f7 12610 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
12611 ip_unlock(port);
12612 if ((!named_entry->is_sub_map) &&
2d21ac55
A
12613 (!named_entry->is_pager) &&
12614 (named_entry->protection
12615 & VM_PROT_WRITE)) {
91447636
A
12616 object = named_entry->backing.object;
12617 } else {
12618 mach_destroy_memory_entry(port);
12619 return (vm_object_t)NULL;
12620 }
12621 vm_object_reference(named_entry->backing.object);
12622 mach_destroy_memory_entry(port);
12623 break;
12624 }
12625 else
12626 return (vm_object_t)NULL;
1c79356b 12627 }
91447636
A
12628 } else {
12629 return (vm_object_t)NULL;
1c79356b 12630 }
91447636
A
12631
12632 return object;
1c79356b 12633}
9bccf70c
A
12634
12635/*
91447636
A
12636 * Export routines to other components for the things we access locally through
12637 * macros.
9bccf70c 12638 */
91447636
A
12639#undef current_map
12640vm_map_t
12641current_map(void)
9bccf70c 12642{
91447636 12643 return (current_map_fast());
9bccf70c
A
12644}
12645
12646/*
12647 * vm_map_reference:
12648 *
12649 * Most code internal to the osfmk will go through a
12650 * macro defining this. This is always here for the
12651 * use of other kernel components.
12652 */
12653#undef vm_map_reference
12654void
12655vm_map_reference(
12656 register vm_map_t map)
12657{
12658 if (map == VM_MAP_NULL)
12659 return;
12660
b0d623f7 12661 lck_mtx_lock(&map->s_lock);
9bccf70c
A
12662#if TASK_SWAPPER
12663 assert(map->res_count > 0);
12664 assert(map->ref_count >= map->res_count);
12665 map->res_count++;
12666#endif
12667 map->ref_count++;
b0d623f7 12668 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
12669}
12670
12671/*
12672 * vm_map_deallocate:
12673 *
12674 * Removes a reference from the specified map,
12675 * destroying it if no references remain.
12676 * The map should not be locked.
12677 */
12678void
12679vm_map_deallocate(
12680 register vm_map_t map)
12681{
12682 unsigned int ref;
12683
12684 if (map == VM_MAP_NULL)
12685 return;
12686
b0d623f7 12687 lck_mtx_lock(&map->s_lock);
9bccf70c
A
12688 ref = --map->ref_count;
12689 if (ref > 0) {
12690 vm_map_res_deallocate(map);
b0d623f7 12691 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
12692 return;
12693 }
12694 assert(map->ref_count == 0);
b0d623f7 12695 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
12696
12697#if TASK_SWAPPER
12698 /*
12699 * The map residence count isn't decremented here because
12700 * the vm_map_delete below will traverse the entire map,
12701 * deleting entries, and the residence counts on objects
12702 * and sharing maps will go away then.
12703 */
12704#endif
12705
2d21ac55 12706 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 12707}
91447636 12708
91447636 12709
0c530ab8
A
12710void
12711vm_map_disable_NX(vm_map_t map)
12712{
12713 if (map == NULL)
12714 return;
12715 if (map->pmap == NULL)
12716 return;
12717
12718 pmap_disable_NX(map->pmap);
12719}
12720
6d2010ae
A
12721void
12722vm_map_disallow_data_exec(vm_map_t map)
12723{
12724 if (map == NULL)
12725 return;
12726
12727 map->map_disallow_data_exec = TRUE;
12728}
12729
0c530ab8
A
12730/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12731 * more descriptive.
12732 */
12733void
12734vm_map_set_32bit(vm_map_t map)
12735{
12736 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12737}
12738
12739
12740void
12741vm_map_set_64bit(vm_map_t map)
12742{
12743 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12744}
12745
12746vm_map_offset_t
12747vm_compute_max_offset(unsigned is64)
12748{
12749 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12750}
12751
12752boolean_t
2d21ac55
A
12753vm_map_is_64bit(
12754 vm_map_t map)
12755{
12756 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12757}
12758
12759boolean_t
12760vm_map_has_4GB_pagezero(
12761 vm_map_t map)
0c530ab8
A
12762{
12763 /*
12764 * XXX FBDP
12765 * We should lock the VM map (for read) here but we can get away
12766 * with it for now because there can't really be any race condition:
12767 * the VM map's min_offset is changed only when the VM map is created
12768 * and when the zero page is established (when the binary gets loaded),
12769 * and this routine gets called only when the task terminates and the
12770 * VM map is being torn down, and when a new map is created via
12771 * load_machfile()/execve().
12772 */
12773 return (map->min_offset >= 0x100000000ULL);
12774}
12775
12776void
12777vm_map_set_4GB_pagezero(vm_map_t map)
12778{
6d2010ae 12779#if defined(__i386__)
0c530ab8 12780 pmap_set_4GB_pagezero(map->pmap);
b0d623f7
A
12781#else
12782#pragma unused(map)
12783#endif
12784
0c530ab8
A
12785}
12786
12787void
12788vm_map_clear_4GB_pagezero(vm_map_t map)
12789{
6d2010ae 12790#if defined(__i386__)
0c530ab8 12791 pmap_clear_4GB_pagezero(map->pmap);
b0d623f7
A
12792#else
12793#pragma unused(map)
12794#endif
0c530ab8
A
12795}
12796
12797/*
12798 * Raise a VM map's minimum offset.
12799 * To strictly enforce "page zero" reservation.
12800 */
12801kern_return_t
12802vm_map_raise_min_offset(
12803 vm_map_t map,
12804 vm_map_offset_t new_min_offset)
12805{
12806 vm_map_entry_t first_entry;
12807
12808 new_min_offset = vm_map_round_page(new_min_offset);
12809
12810 vm_map_lock(map);
12811
12812 if (new_min_offset < map->min_offset) {
12813 /*
12814 * Can't move min_offset backwards, as that would expose
12815 * a part of the address space that was previously, and for
12816 * possibly good reasons, inaccessible.
12817 */
12818 vm_map_unlock(map);
12819 return KERN_INVALID_ADDRESS;
12820 }
12821
12822 first_entry = vm_map_first_entry(map);
12823 if (first_entry != vm_map_to_entry(map) &&
12824 first_entry->vme_start < new_min_offset) {
12825 /*
12826 * Some memory was already allocated below the new
12827 * minimun offset. It's too late to change it now...
12828 */
12829 vm_map_unlock(map);
12830 return KERN_NO_SPACE;
12831 }
12832
12833 map->min_offset = new_min_offset;
12834
12835 vm_map_unlock(map);
12836
12837 return KERN_SUCCESS;
12838}
2d21ac55
A
12839
12840/*
12841 * Set the limit on the maximum amount of user wired memory allowed for this map.
12842 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12843 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12844 * don't have to reach over to the BSD data structures.
12845 */
12846
12847void
12848vm_map_set_user_wire_limit(vm_map_t map,
12849 vm_size_t limit)
12850{
12851 map->user_wire_limit = limit;
12852}
593a1d5f 12853
b0d623f7
A
12854
12855void vm_map_switch_protect(vm_map_t map,
12856 boolean_t val)
593a1d5f
A
12857{
12858 vm_map_lock(map);
b0d623f7 12859 map->switch_protect=val;
593a1d5f 12860 vm_map_unlock(map);
b0d623f7 12861}
b7266188
A
12862
12863/* Add (generate) code signature for memory range */
12864#if CONFIG_DYNAMIC_CODE_SIGNING
12865kern_return_t vm_map_sign(vm_map_t map,
12866 vm_map_offset_t start,
12867 vm_map_offset_t end)
12868{
12869 vm_map_entry_t entry;
12870 vm_page_t m;
12871 vm_object_t object;
12872
12873 /*
12874 * Vet all the input parameters and current type and state of the
12875 * underlaying object. Return with an error if anything is amiss.
12876 */
12877 if (map == VM_MAP_NULL)
12878 return(KERN_INVALID_ARGUMENT);
12879
12880 vm_map_lock_read(map);
12881
12882 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12883 /*
12884 * Must pass a valid non-submap address.
12885 */
12886 vm_map_unlock_read(map);
12887 return(KERN_INVALID_ADDRESS);
12888 }
12889
12890 if((entry->vme_start > start) || (entry->vme_end < end)) {
12891 /*
12892 * Map entry doesn't cover the requested range. Not handling
12893 * this situation currently.
12894 */
12895 vm_map_unlock_read(map);
12896 return(KERN_INVALID_ARGUMENT);
12897 }
12898
12899 object = entry->object.vm_object;
12900 if (object == VM_OBJECT_NULL) {
12901 /*
12902 * Object must already be present or we can't sign.
12903 */
12904 vm_map_unlock_read(map);
12905 return KERN_INVALID_ARGUMENT;
12906 }
12907
12908 vm_object_lock(object);
12909 vm_map_unlock_read(map);
12910
12911 while(start < end) {
12912 uint32_t refmod;
12913
12914 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12915 if (m==VM_PAGE_NULL) {
12916 /* shoud we try to fault a page here? we can probably
12917 * demand it exists and is locked for this request */
12918 vm_object_unlock(object);
12919 return KERN_FAILURE;
12920 }
12921 /* deal with special page status */
12922 if (m->busy ||
12923 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12924 vm_object_unlock(object);
12925 return KERN_FAILURE;
12926 }
12927
12928 /* Page is OK... now "validate" it */
12929 /* This is the place where we'll call out to create a code
12930 * directory, later */
12931 m->cs_validated = TRUE;
12932
12933 /* The page is now "clean" for codesigning purposes. That means
12934 * we don't consider it as modified (wpmapped) anymore. But
12935 * we'll disconnect the page so we note any future modification
12936 * attempts. */
12937 m->wpmapped = FALSE;
12938 refmod = pmap_disconnect(m->phys_page);
12939
12940 /* Pull the dirty status from the pmap, since we cleared the
12941 * wpmapped bit */
12942 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
12943 m->dirty = TRUE;
12944 }
12945
12946 /* On to the next page */
12947 start += PAGE_SIZE;
12948 }
12949 vm_object_unlock(object);
12950
12951 return KERN_SUCCESS;
12952}
12953#endif
6d2010ae
A
12954
12955#if CONFIG_FREEZE
12956
12957kern_return_t vm_map_freeze_walk(
12958 vm_map_t map,
12959 unsigned int *purgeable_count,
12960 unsigned int *wired_count,
12961 unsigned int *clean_count,
12962 unsigned int *dirty_count,
12963 boolean_t *has_shared)
12964{
12965 vm_map_entry_t entry;
12966
12967 vm_map_lock_read(map);
12968
12969 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
12970 *has_shared = FALSE;
12971
12972 for (entry = vm_map_first_entry(map);
12973 entry != vm_map_to_entry(map);
12974 entry = entry->vme_next) {
12975 unsigned int purgeable, clean, dirty, wired;
12976 boolean_t shared;
12977
12978 if ((entry->object.vm_object == 0) ||
12979 (entry->is_sub_map) ||
12980 (entry->object.vm_object->phys_contiguous)) {
12981 continue;
12982 }
12983
12984 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
12985
12986 *purgeable_count += purgeable;
12987 *wired_count += wired;
12988 *clean_count += clean;
12989 *dirty_count += dirty;
12990
12991 if (shared) {
12992 *has_shared = TRUE;
12993 }
12994 }
12995
12996 vm_map_unlock_read(map);
12997
12998 return KERN_SUCCESS;
12999}
13000
13001kern_return_t vm_map_freeze(
13002 vm_map_t map,
13003 unsigned int *purgeable_count,
13004 unsigned int *wired_count,
13005 unsigned int *clean_count,
13006 unsigned int *dirty_count,
13007 boolean_t *has_shared)
13008{
13009 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13010 vm_object_t compact_object = VM_OBJECT_NULL;
13011 vm_object_offset_t offset = 0x0;
13012 kern_return_t kr = KERN_SUCCESS;
13013 void *default_freezer_toc = NULL;
13014 boolean_t cleanup = FALSE;
13015
13016 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13017 *has_shared = FALSE;
13018
13019 /* Create our compact object */
13020 compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13021 if (!compact_object) {
13022 kr = KERN_FAILURE;
13023 goto done;
13024 }
13025
13026 default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13027 if (!default_freezer_toc) {
13028 kr = KERN_FAILURE;
13029 goto done;
13030 }
13031
13032 /*
13033 * We need the exclusive lock here so that we can
13034 * block any page faults or lookups while we are
13035 * in the middle of freezing this vm map.
13036 */
13037 vm_map_lock(map);
13038
13039 if (map->default_freezer_toc != NULL){
13040 /*
13041 * This map has already been frozen.
13042 */
13043 cleanup = TRUE;
13044 kr = KERN_SUCCESS;
13045 goto done;
13046 }
13047
13048 /* Get a mapping in place for the freezing about to commence */
13049 map->default_freezer_toc = default_freezer_toc;
13050
13051 vm_object_lock(compact_object);
13052
13053 for (entry2 = vm_map_first_entry(map);
13054 entry2 != vm_map_to_entry(map);
13055 entry2 = entry2->vme_next) {
13056
13057 vm_object_t src_object = entry2->object.vm_object;
13058
13059 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13060 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13061 unsigned int purgeable, clean, dirty, wired;
13062 boolean_t shared;
13063
13064 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13065 src_object, compact_object, &default_freezer_toc, &offset);
13066
13067 *purgeable_count += purgeable;
13068 *wired_count += wired;
13069 *clean_count += clean;
13070 *dirty_count += dirty;
13071
13072 if (shared) {
13073 *has_shared = TRUE;
13074 }
13075 }
13076 }
13077
13078 vm_object_unlock(compact_object);
13079
13080 /* Finally, throw out the pages to swap */
13081 vm_object_pageout(compact_object);
13082
13083done:
13084 vm_map_unlock(map);
13085
13086 /* Unwind if there was a failure */
13087 if ((cleanup) || (KERN_SUCCESS != kr)) {
13088 if (default_freezer_toc){
13089 default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13090 }
13091 if (compact_object){
13092 vm_object_deallocate(compact_object);
13093 }
13094 }
13095
13096 return kr;
13097}
13098
13099__private_extern__ vm_object_t default_freezer_get_compact_vm_object( void** );
13100
13101void
13102vm_map_thaw(
13103 vm_map_t map)
13104{
13105 void **default_freezer_toc;
13106 vm_object_t compact_object;
13107
13108 vm_map_lock(map);
13109
13110 if (map->default_freezer_toc == NULL){
13111 /*
13112 * This map is not in a frozen state.
13113 */
13114 goto out;
13115 }
13116
13117 default_freezer_toc = &(map->default_freezer_toc);
13118
13119 compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13120
13121 /* Bring the pages back in */
13122 vm_object_pagein(compact_object);
13123
13124 /* Shift pages back to their original objects */
13125 vm_object_unpack(compact_object, default_freezer_toc);
13126
13127 vm_object_deallocate(compact_object);
13128
13129 map->default_freezer_toc = NULL;
13130
13131out:
13132 vm_map_unlock(map);
13133}
13134#endif