]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1504.3.12.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
104
105 #ifdef ppc
106 #include <ppc/mappings.h>
107 #endif /* ppc */
108
109 #include <vm/vm_protos.h>
110 #include <vm/vm_shared_region.h>
111
112 /* Internal prototypes
113 */
114
115 static void vm_map_simplify_range(
116 vm_map_t map,
117 vm_map_offset_t start,
118 vm_map_offset_t end); /* forward */
119
120 static boolean_t vm_map_range_check(
121 vm_map_t map,
122 vm_map_offset_t start,
123 vm_map_offset_t end,
124 vm_map_entry_t *entry);
125
126 static vm_map_entry_t _vm_map_entry_create(
127 struct vm_map_header *map_header);
128
129 static void _vm_map_entry_dispose(
130 struct vm_map_header *map_header,
131 vm_map_entry_t entry);
132
133 static void vm_map_pmap_enter(
134 vm_map_t map,
135 vm_map_offset_t addr,
136 vm_map_offset_t end_addr,
137 vm_object_t object,
138 vm_object_offset_t offset,
139 vm_prot_t protection);
140
141 static void _vm_map_clip_end(
142 struct vm_map_header *map_header,
143 vm_map_entry_t entry,
144 vm_map_offset_t end);
145
146 static void _vm_map_clip_start(
147 struct vm_map_header *map_header,
148 vm_map_entry_t entry,
149 vm_map_offset_t start);
150
151 static void vm_map_entry_delete(
152 vm_map_t map,
153 vm_map_entry_t entry);
154
155 static kern_return_t vm_map_delete(
156 vm_map_t map,
157 vm_map_offset_t start,
158 vm_map_offset_t end,
159 int flags,
160 vm_map_t zap_map);
161
162 static kern_return_t vm_map_copy_overwrite_unaligned(
163 vm_map_t dst_map,
164 vm_map_entry_t entry,
165 vm_map_copy_t copy,
166 vm_map_address_t start);
167
168 static kern_return_t vm_map_copy_overwrite_aligned(
169 vm_map_t dst_map,
170 vm_map_entry_t tmp_entry,
171 vm_map_copy_t copy,
172 vm_map_offset_t start,
173 pmap_t pmap);
174
175 static kern_return_t vm_map_copyin_kernel_buffer(
176 vm_map_t src_map,
177 vm_map_address_t src_addr,
178 vm_map_size_t len,
179 boolean_t src_destroy,
180 vm_map_copy_t *copy_result); /* OUT */
181
182 static kern_return_t vm_map_copyout_kernel_buffer(
183 vm_map_t map,
184 vm_map_address_t *addr, /* IN/OUT */
185 vm_map_copy_t copy,
186 boolean_t overwrite);
187
188 static void vm_map_fork_share(
189 vm_map_t old_map,
190 vm_map_entry_t old_entry,
191 vm_map_t new_map);
192
193 static boolean_t vm_map_fork_copy(
194 vm_map_t old_map,
195 vm_map_entry_t *old_entry_p,
196 vm_map_t new_map);
197
198 void vm_map_region_top_walk(
199 vm_map_entry_t entry,
200 vm_region_top_info_t top);
201
202 void vm_map_region_walk(
203 vm_map_t map,
204 vm_map_offset_t va,
205 vm_map_entry_t entry,
206 vm_object_offset_t offset,
207 vm_object_size_t range,
208 vm_region_extended_info_t extended,
209 boolean_t look_for_pages);
210
211 static kern_return_t vm_map_wire_nested(
212 vm_map_t map,
213 vm_map_offset_t start,
214 vm_map_offset_t end,
215 vm_prot_t access_type,
216 boolean_t user_wire,
217 pmap_t map_pmap,
218 vm_map_offset_t pmap_addr);
219
220 static kern_return_t vm_map_unwire_nested(
221 vm_map_t map,
222 vm_map_offset_t start,
223 vm_map_offset_t end,
224 boolean_t user_wire,
225 pmap_t map_pmap,
226 vm_map_offset_t pmap_addr);
227
228 static kern_return_t vm_map_overwrite_submap_recurse(
229 vm_map_t dst_map,
230 vm_map_offset_t dst_addr,
231 vm_map_size_t dst_size);
232
233 static kern_return_t vm_map_copy_overwrite_nested(
234 vm_map_t dst_map,
235 vm_map_offset_t dst_addr,
236 vm_map_copy_t copy,
237 boolean_t interruptible,
238 pmap_t pmap);
239
240 static kern_return_t vm_map_remap_extract(
241 vm_map_t map,
242 vm_map_offset_t addr,
243 vm_map_size_t size,
244 boolean_t copy,
245 struct vm_map_header *map_header,
246 vm_prot_t *cur_protection,
247 vm_prot_t *max_protection,
248 vm_inherit_t inheritance,
249 boolean_t pageable);
250
251 static kern_return_t vm_map_remap_range_allocate(
252 vm_map_t map,
253 vm_map_address_t *address,
254 vm_map_size_t size,
255 vm_map_offset_t mask,
256 boolean_t anywhere,
257 vm_map_entry_t *map_entry);
258
259 static void vm_map_region_look_for_page(
260 vm_map_t map,
261 vm_map_offset_t va,
262 vm_object_t object,
263 vm_object_offset_t offset,
264 int max_refcnt,
265 int depth,
266 vm_region_extended_info_t extended);
267
268 static int vm_map_region_count_obj_refs(
269 vm_map_entry_t entry,
270 vm_object_t object);
271
272
273 static kern_return_t vm_map_willneed(
274 vm_map_t map,
275 vm_map_offset_t start,
276 vm_map_offset_t end);
277
278 static kern_return_t vm_map_reuse_pages(
279 vm_map_t map,
280 vm_map_offset_t start,
281 vm_map_offset_t end);
282
283 static kern_return_t vm_map_reusable_pages(
284 vm_map_t map,
285 vm_map_offset_t start,
286 vm_map_offset_t end);
287
288 static kern_return_t vm_map_can_reuse(
289 vm_map_t map,
290 vm_map_offset_t start,
291 vm_map_offset_t end);
292
293 /*
294 * Macros to copy a vm_map_entry. We must be careful to correctly
295 * manage the wired page count. vm_map_entry_copy() creates a new
296 * map entry to the same memory - the wired count in the new entry
297 * must be set to zero. vm_map_entry_copy_full() creates a new
298 * entry that is identical to the old entry. This preserves the
299 * wire count; it's used for map splitting and zone changing in
300 * vm_map_copyout.
301 */
302 #define vm_map_entry_copy(NEW,OLD) \
303 MACRO_BEGIN \
304 *(NEW) = *(OLD); \
305 (NEW)->is_shared = FALSE; \
306 (NEW)->needs_wakeup = FALSE; \
307 (NEW)->in_transition = FALSE; \
308 (NEW)->wired_count = 0; \
309 (NEW)->user_wired_count = 0; \
310 (NEW)->permanent = FALSE; \
311 MACRO_END
312
313 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
314
315 /*
316 * Decide if we want to allow processes to execute from their data or stack areas.
317 * override_nx() returns true if we do. Data/stack execution can be enabled independently
318 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
319 * or allow_stack_exec to enable data execution for that type of data area for that particular
320 * ABI (or both by or'ing the flags together). These are initialized in the architecture
321 * specific pmap files since the default behavior varies according to architecture. The
322 * main reason it varies is because of the need to provide binary compatibility with old
323 * applications that were written before these restrictions came into being. In the old
324 * days, an app could execute anything it could read, but this has slowly been tightened
325 * up over time. The default behavior is:
326 *
327 * 32-bit PPC apps may execute from both stack and data areas
328 * 32-bit Intel apps may exeucte from data areas but not stack
329 * 64-bit PPC/Intel apps may not execute from either data or stack
330 *
331 * An application on any architecture may override these defaults by explicitly
332 * adding PROT_EXEC permission to the page in question with the mprotect(2)
333 * system call. This code here just determines what happens when an app tries to
334 * execute from a page that lacks execute permission.
335 *
336 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
337 * default behavior for both 32 and 64 bit apps on a system-wide basis.
338 */
339
340 extern int allow_data_exec, allow_stack_exec;
341
342 int
343 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
344 {
345 int current_abi;
346
347 /*
348 * Determine if the app is running in 32 or 64 bit mode.
349 */
350
351 if (vm_map_is_64bit(map))
352 current_abi = VM_ABI_64;
353 else
354 current_abi = VM_ABI_32;
355
356 /*
357 * Determine if we should allow the execution based on whether it's a
358 * stack or data area and the current architecture.
359 */
360
361 if (user_tag == VM_MEMORY_STACK)
362 return allow_stack_exec & current_abi;
363
364 return allow_data_exec & current_abi;
365 }
366
367
368 /*
369 * Virtual memory maps provide for the mapping, protection,
370 * and sharing of virtual memory objects. In addition,
371 * this module provides for an efficient virtual copy of
372 * memory from one map to another.
373 *
374 * Synchronization is required prior to most operations.
375 *
376 * Maps consist of an ordered doubly-linked list of simple
377 * entries; a single hint is used to speed up lookups.
378 *
379 * Sharing maps have been deleted from this version of Mach.
380 * All shared objects are now mapped directly into the respective
381 * maps. This requires a change in the copy on write strategy;
382 * the asymmetric (delayed) strategy is used for shared temporary
383 * objects instead of the symmetric (shadow) strategy. All maps
384 * are now "top level" maps (either task map, kernel map or submap
385 * of the kernel map).
386 *
387 * Since portions of maps are specified by start/end addreses,
388 * which may not align with existing map entries, all
389 * routines merely "clip" entries to these start/end values.
390 * [That is, an entry is split into two, bordering at a
391 * start or end value.] Note that these clippings may not
392 * always be necessary (as the two resulting entries are then
393 * not changed); however, the clipping is done for convenience.
394 * No attempt is currently made to "glue back together" two
395 * abutting entries.
396 *
397 * The symmetric (shadow) copy strategy implements virtual copy
398 * by copying VM object references from one map to
399 * another, and then marking both regions as copy-on-write.
400 * It is important to note that only one writeable reference
401 * to a VM object region exists in any map when this strategy
402 * is used -- this means that shadow object creation can be
403 * delayed until a write operation occurs. The symmetric (delayed)
404 * strategy allows multiple maps to have writeable references to
405 * the same region of a vm object, and hence cannot delay creating
406 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
407 * Copying of permanent objects is completely different; see
408 * vm_object_copy_strategically() in vm_object.c.
409 */
410
411 static zone_t vm_map_zone; /* zone for vm_map structures */
412 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
413 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
414 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
415
416
417 /*
418 * Placeholder object for submap operations. This object is dropped
419 * into the range by a call to vm_map_find, and removed when
420 * vm_map_submap creates the submap.
421 */
422
423 vm_object_t vm_submap_object;
424
425 static void *map_data;
426 static vm_size_t map_data_size;
427 static void *kentry_data;
428 static vm_size_t kentry_data_size;
429 static int kentry_count = 2048; /* to init kentry_data_size */
430
431 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
432
433
434 /* Skip acquiring locks if we're in the midst of a kernel core dump */
435 unsigned int not_in_kdp = 1;
436
437 #if CONFIG_CODE_DECRYPTION
438 /*
439 * vm_map_apple_protected:
440 * This remaps the requested part of the object with an object backed by
441 * the decrypting pager.
442 * crypt_info contains entry points and session data for the crypt module.
443 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
444 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
445 */
446 kern_return_t
447 vm_map_apple_protected(
448 vm_map_t map,
449 vm_map_offset_t start,
450 vm_map_offset_t end,
451 struct pager_crypt_info *crypt_info)
452 {
453 boolean_t map_locked;
454 kern_return_t kr;
455 vm_map_entry_t map_entry;
456 memory_object_t protected_mem_obj;
457 vm_object_t protected_object;
458 vm_map_offset_t map_addr;
459
460 vm_map_lock_read(map);
461 map_locked = TRUE;
462
463 /* lookup the protected VM object */
464 if (!vm_map_lookup_entry(map,
465 start,
466 &map_entry) ||
467 map_entry->vme_end < end ||
468 map_entry->is_sub_map) {
469 /* that memory is not properly mapped */
470 kr = KERN_INVALID_ARGUMENT;
471 goto done;
472 }
473 protected_object = map_entry->object.vm_object;
474 if (protected_object == VM_OBJECT_NULL) {
475 /* there should be a VM object here at this point */
476 kr = KERN_INVALID_ARGUMENT;
477 goto done;
478 }
479
480 /* make sure protected object stays alive while map is unlocked */
481 vm_object_reference(protected_object);
482
483 vm_map_unlock_read(map);
484 map_locked = FALSE;
485
486 /*
487 * Lookup (and create if necessary) the protected memory object
488 * matching that VM object.
489 * If successful, this also grabs a reference on the memory object,
490 * to guarantee that it doesn't go away before we get a chance to map
491 * it.
492 */
493 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
494
495 /* release extra ref on protected object */
496 vm_object_deallocate(protected_object);
497
498 if (protected_mem_obj == NULL) {
499 kr = KERN_FAILURE;
500 goto done;
501 }
502
503 /* map this memory object in place of the current one */
504 map_addr = start;
505 kr = vm_map_enter_mem_object(map,
506 &map_addr,
507 end - start,
508 (mach_vm_offset_t) 0,
509 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
510 (ipc_port_t) protected_mem_obj,
511 (map_entry->offset +
512 (start - map_entry->vme_start)),
513 TRUE,
514 map_entry->protection,
515 map_entry->max_protection,
516 map_entry->inheritance);
517 assert(map_addr == start);
518 /*
519 * Release the reference obtained by apple_protect_pager_setup().
520 * The mapping (if it succeeded) is now holding a reference on the
521 * memory object.
522 */
523 memory_object_deallocate(protected_mem_obj);
524
525 done:
526 if (map_locked) {
527 vm_map_unlock_read(map);
528 }
529 return kr;
530 }
531 #endif /* CONFIG_CODE_DECRYPTION */
532
533
534 lck_grp_t vm_map_lck_grp;
535 lck_grp_attr_t vm_map_lck_grp_attr;
536 lck_attr_t vm_map_lck_attr;
537
538
539 /*
540 * vm_map_init:
541 *
542 * Initialize the vm_map module. Must be called before
543 * any other vm_map routines.
544 *
545 * Map and entry structures are allocated from zones -- we must
546 * initialize those zones.
547 *
548 * There are three zones of interest:
549 *
550 * vm_map_zone: used to allocate maps.
551 * vm_map_entry_zone: used to allocate map entries.
552 * vm_map_kentry_zone: used to allocate map entries for the kernel.
553 *
554 * The kernel allocates map entries from a special zone that is initially
555 * "crammed" with memory. It would be difficult (perhaps impossible) for
556 * the kernel to allocate more memory to a entry zone when it became
557 * empty since the very act of allocating memory implies the creation
558 * of a new entry.
559 */
560 void
561 vm_map_init(
562 void)
563 {
564 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
565 PAGE_SIZE, "maps");
566
567 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
568 1024*1024, PAGE_SIZE*5,
569 "non-kernel map entries");
570
571 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
572 kentry_data_size, kentry_data_size,
573 "kernel map entries");
574
575 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
576 16*1024, PAGE_SIZE, "map copies");
577
578 /*
579 * Cram the map and kentry zones with initial data.
580 * Set kentry_zone non-collectible to aid zone_gc().
581 */
582 zone_change(vm_map_zone, Z_COLLECT, FALSE);
583 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
584 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
585 zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
586 zcram(vm_map_zone, map_data, map_data_size);
587 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
588
589 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
590 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
591 lck_attr_setdefault(&vm_map_lck_attr);
592 }
593
594 void
595 vm_map_steal_memory(
596 void)
597 {
598 map_data_size = round_page(10 * sizeof(struct _vm_map));
599 map_data = pmap_steal_memory(map_data_size);
600
601 #if 0
602 /*
603 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
604 * physical page (i.e. that beyond the kernel image and page tables)
605 * individually; we guess at most one entry per eight pages in the
606 * real world. This works out to roughly .1 of 1% of physical memory,
607 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
608 */
609 #endif
610 kentry_count = pmap_free_pages() / 8;
611
612
613 kentry_data_size =
614 round_page(kentry_count * sizeof(struct vm_map_entry));
615 kentry_data = pmap_steal_memory(kentry_data_size);
616 }
617
618 /*
619 * vm_map_create:
620 *
621 * Creates and returns a new empty VM map with
622 * the given physical map structure, and having
623 * the given lower and upper address bounds.
624 */
625 vm_map_t
626 vm_map_create(
627 pmap_t pmap,
628 vm_map_offset_t min,
629 vm_map_offset_t max,
630 boolean_t pageable)
631 {
632 static int color_seed = 0;
633 register vm_map_t result;
634
635 result = (vm_map_t) zalloc(vm_map_zone);
636 if (result == VM_MAP_NULL)
637 panic("vm_map_create");
638
639 vm_map_first_entry(result) = vm_map_to_entry(result);
640 vm_map_last_entry(result) = vm_map_to_entry(result);
641 result->hdr.nentries = 0;
642 result->hdr.entries_pageable = pageable;
643
644 result->size = 0;
645 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
646 result->user_wire_size = 0;
647 result->ref_count = 1;
648 #if TASK_SWAPPER
649 result->res_count = 1;
650 result->sw_state = MAP_SW_IN;
651 #endif /* TASK_SWAPPER */
652 result->pmap = pmap;
653 result->min_offset = min;
654 result->max_offset = max;
655 result->wiring_required = FALSE;
656 result->no_zero_fill = FALSE;
657 result->mapped = FALSE;
658 result->wait_for_space = FALSE;
659 result->switch_protect = FALSE;
660 result->first_free = vm_map_to_entry(result);
661 result->hint = vm_map_to_entry(result);
662 result->color_rr = (color_seed++) & vm_color_mask;
663 vm_map_lock_init(result);
664 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
665
666 return(result);
667 }
668
669 /*
670 * vm_map_entry_create: [ internal use only ]
671 *
672 * Allocates a VM map entry for insertion in the
673 * given map (or map copy). No fields are filled.
674 */
675 #define vm_map_entry_create(map) \
676 _vm_map_entry_create(&(map)->hdr)
677
678 #define vm_map_copy_entry_create(copy) \
679 _vm_map_entry_create(&(copy)->cpy_hdr)
680
681 static vm_map_entry_t
682 _vm_map_entry_create(
683 register struct vm_map_header *map_header)
684 {
685 register zone_t zone;
686 register vm_map_entry_t entry;
687
688 if (map_header->entries_pageable)
689 zone = vm_map_entry_zone;
690 else
691 zone = vm_map_kentry_zone;
692
693 entry = (vm_map_entry_t) zalloc(zone);
694 if (entry == VM_MAP_ENTRY_NULL)
695 panic("vm_map_entry_create");
696
697 return(entry);
698 }
699
700 /*
701 * vm_map_entry_dispose: [ internal use only ]
702 *
703 * Inverse of vm_map_entry_create.
704 *
705 * write map lock held so no need to
706 * do anything special to insure correctness
707 * of the stores
708 */
709 #define vm_map_entry_dispose(map, entry) \
710 MACRO_BEGIN \
711 if((entry) == (map)->first_free) \
712 (map)->first_free = vm_map_to_entry(map); \
713 if((entry) == (map)->hint) \
714 (map)->hint = vm_map_to_entry(map); \
715 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
716 MACRO_END
717
718 #define vm_map_copy_entry_dispose(map, entry) \
719 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
720
721 static void
722 _vm_map_entry_dispose(
723 register struct vm_map_header *map_header,
724 register vm_map_entry_t entry)
725 {
726 register zone_t zone;
727
728 if (map_header->entries_pageable)
729 zone = vm_map_entry_zone;
730 else
731 zone = vm_map_kentry_zone;
732
733 zfree(zone, entry);
734 }
735
736 #if MACH_ASSERT
737 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
738 static boolean_t first_free_check = FALSE;
739 static boolean_t
740 first_free_is_valid(
741 vm_map_t map)
742 {
743 vm_map_entry_t entry, next;
744
745 if (!first_free_check)
746 return TRUE;
747
748 entry = vm_map_to_entry(map);
749 next = entry->vme_next;
750 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
751 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
752 next != vm_map_to_entry(map))) {
753 entry = next;
754 next = entry->vme_next;
755 if (entry == vm_map_to_entry(map))
756 break;
757 }
758 if (map->first_free != entry) {
759 printf("Bad first_free for map %p: %p should be %p\n",
760 map, map->first_free, entry);
761 return FALSE;
762 }
763 return TRUE;
764 }
765 #endif /* MACH_ASSERT */
766
767 /*
768 * UPDATE_FIRST_FREE:
769 *
770 * Updates the map->first_free pointer to the
771 * entry immediately before the first hole in the map.
772 * The map should be locked.
773 */
774 #define UPDATE_FIRST_FREE(map, new_first_free) \
775 MACRO_BEGIN \
776 vm_map_t UFF_map; \
777 vm_map_entry_t UFF_first_free; \
778 vm_map_entry_t UFF_next_entry; \
779 UFF_map = (map); \
780 UFF_first_free = (new_first_free); \
781 UFF_next_entry = UFF_first_free->vme_next; \
782 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
783 vm_map_trunc_page(UFF_first_free->vme_end) || \
784 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
785 vm_map_trunc_page(UFF_first_free->vme_start) && \
786 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
787 UFF_first_free = UFF_next_entry; \
788 UFF_next_entry = UFF_first_free->vme_next; \
789 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
790 break; \
791 } \
792 UFF_map->first_free = UFF_first_free; \
793 assert(first_free_is_valid(UFF_map)); \
794 MACRO_END
795
796 /*
797 * vm_map_entry_{un,}link:
798 *
799 * Insert/remove entries from maps (or map copies).
800 */
801 #define vm_map_entry_link(map, after_where, entry) \
802 MACRO_BEGIN \
803 vm_map_t VMEL_map; \
804 vm_map_entry_t VMEL_entry; \
805 VMEL_map = (map); \
806 VMEL_entry = (entry); \
807 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
808 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
809 MACRO_END
810
811
812 #define vm_map_copy_entry_link(copy, after_where, entry) \
813 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
814
815 #define _vm_map_entry_link(hdr, after_where, entry) \
816 MACRO_BEGIN \
817 (hdr)->nentries++; \
818 (entry)->vme_prev = (after_where); \
819 (entry)->vme_next = (after_where)->vme_next; \
820 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
821 MACRO_END
822
823 #define vm_map_entry_unlink(map, entry) \
824 MACRO_BEGIN \
825 vm_map_t VMEU_map; \
826 vm_map_entry_t VMEU_entry; \
827 vm_map_entry_t VMEU_first_free; \
828 VMEU_map = (map); \
829 VMEU_entry = (entry); \
830 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
831 VMEU_first_free = VMEU_entry->vme_prev; \
832 else \
833 VMEU_first_free = VMEU_map->first_free; \
834 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
835 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
836 MACRO_END
837
838 #define vm_map_copy_entry_unlink(copy, entry) \
839 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
840
841 #define _vm_map_entry_unlink(hdr, entry) \
842 MACRO_BEGIN \
843 (hdr)->nentries--; \
844 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
845 (entry)->vme_prev->vme_next = (entry)->vme_next; \
846 MACRO_END
847
848 #if MACH_ASSERT && TASK_SWAPPER
849 /*
850 * vm_map_res_reference:
851 *
852 * Adds another valid residence count to the given map.
853 *
854 * Map is locked so this function can be called from
855 * vm_map_swapin.
856 *
857 */
858 void vm_map_res_reference(register vm_map_t map)
859 {
860 /* assert map is locked */
861 assert(map->res_count >= 0);
862 assert(map->ref_count >= map->res_count);
863 if (map->res_count == 0) {
864 lck_mtx_unlock(&map->s_lock);
865 vm_map_lock(map);
866 vm_map_swapin(map);
867 lck_mtx_lock(&map->s_lock);
868 ++map->res_count;
869 vm_map_unlock(map);
870 } else
871 ++map->res_count;
872 }
873
874 /*
875 * vm_map_reference_swap:
876 *
877 * Adds valid reference and residence counts to the given map.
878 *
879 * The map may not be in memory (i.e. zero residence count).
880 *
881 */
882 void vm_map_reference_swap(register vm_map_t map)
883 {
884 assert(map != VM_MAP_NULL);
885 lck_mtx_lock(&map->s_lock);
886 assert(map->res_count >= 0);
887 assert(map->ref_count >= map->res_count);
888 map->ref_count++;
889 vm_map_res_reference(map);
890 lck_mtx_unlock(&map->s_lock);
891 }
892
893 /*
894 * vm_map_res_deallocate:
895 *
896 * Decrement residence count on a map; possibly causing swapout.
897 *
898 * The map must be in memory (i.e. non-zero residence count).
899 *
900 * The map is locked, so this function is callable from vm_map_deallocate.
901 *
902 */
903 void vm_map_res_deallocate(register vm_map_t map)
904 {
905 assert(map->res_count > 0);
906 if (--map->res_count == 0) {
907 lck_mtx_unlock(&map->s_lock);
908 vm_map_lock(map);
909 vm_map_swapout(map);
910 vm_map_unlock(map);
911 lck_mtx_lock(&map->s_lock);
912 }
913 assert(map->ref_count >= map->res_count);
914 }
915 #endif /* MACH_ASSERT && TASK_SWAPPER */
916
917 /*
918 * vm_map_destroy:
919 *
920 * Actually destroy a map.
921 */
922 void
923 vm_map_destroy(
924 vm_map_t map,
925 int flags)
926 {
927 vm_map_lock(map);
928
929 /* clean up regular map entries */
930 (void) vm_map_delete(map, map->min_offset, map->max_offset,
931 flags, VM_MAP_NULL);
932 /* clean up leftover special mappings (commpage, etc...) */
933 #ifdef __ppc__
934 /*
935 * PPC51: ppc64 is limited to 51-bit addresses.
936 * Memory beyond this 51-bit limit is mapped specially at the
937 * pmap level, so do not interfere.
938 * On PPC64, the commpage is mapped beyond the addressable range
939 * via a special pmap hack, so ask pmap to clean it explicitly...
940 */
941 if (map->pmap) {
942 pmap_unmap_sharedpage(map->pmap);
943 }
944 /* ... and do not let regular pmap cleanup apply here */
945 flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
946 #endif /* __ppc__ */
947 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
948 flags, VM_MAP_NULL);
949 vm_map_unlock(map);
950
951 assert(map->hdr.nentries == 0);
952
953 if(map->pmap)
954 pmap_destroy(map->pmap);
955
956 zfree(vm_map_zone, map);
957 }
958
959 #if TASK_SWAPPER
960 /*
961 * vm_map_swapin/vm_map_swapout
962 *
963 * Swap a map in and out, either referencing or releasing its resources.
964 * These functions are internal use only; however, they must be exported
965 * because they may be called from macros, which are exported.
966 *
967 * In the case of swapout, there could be races on the residence count,
968 * so if the residence count is up, we return, assuming that a
969 * vm_map_deallocate() call in the near future will bring us back.
970 *
971 * Locking:
972 * -- We use the map write lock for synchronization among races.
973 * -- The map write lock, and not the simple s_lock, protects the
974 * swap state of the map.
975 * -- If a map entry is a share map, then we hold both locks, in
976 * hierarchical order.
977 *
978 * Synchronization Notes:
979 * 1) If a vm_map_swapin() call happens while swapout in progress, it
980 * will block on the map lock and proceed when swapout is through.
981 * 2) A vm_map_reference() call at this time is illegal, and will
982 * cause a panic. vm_map_reference() is only allowed on resident
983 * maps, since it refuses to block.
984 * 3) A vm_map_swapin() call during a swapin will block, and
985 * proceeed when the first swapin is done, turning into a nop.
986 * This is the reason the res_count is not incremented until
987 * after the swapin is complete.
988 * 4) There is a timing hole after the checks of the res_count, before
989 * the map lock is taken, during which a swapin may get the lock
990 * before a swapout about to happen. If this happens, the swapin
991 * will detect the state and increment the reference count, causing
992 * the swapout to be a nop, thereby delaying it until a later
993 * vm_map_deallocate. If the swapout gets the lock first, then
994 * the swapin will simply block until the swapout is done, and
995 * then proceed.
996 *
997 * Because vm_map_swapin() is potentially an expensive operation, it
998 * should be used with caution.
999 *
1000 * Invariants:
1001 * 1) A map with a residence count of zero is either swapped, or
1002 * being swapped.
1003 * 2) A map with a non-zero residence count is either resident,
1004 * or being swapped in.
1005 */
1006
1007 int vm_map_swap_enable = 1;
1008
1009 void vm_map_swapin (vm_map_t map)
1010 {
1011 register vm_map_entry_t entry;
1012
1013 if (!vm_map_swap_enable) /* debug */
1014 return;
1015
1016 /*
1017 * Map is locked
1018 * First deal with various races.
1019 */
1020 if (map->sw_state == MAP_SW_IN)
1021 /*
1022 * we raced with swapout and won. Returning will incr.
1023 * the res_count, turning the swapout into a nop.
1024 */
1025 return;
1026
1027 /*
1028 * The residence count must be zero. If we raced with another
1029 * swapin, the state would have been IN; if we raced with a
1030 * swapout (after another competing swapin), we must have lost
1031 * the race to get here (see above comment), in which case
1032 * res_count is still 0.
1033 */
1034 assert(map->res_count == 0);
1035
1036 /*
1037 * There are no intermediate states of a map going out or
1038 * coming in, since the map is locked during the transition.
1039 */
1040 assert(map->sw_state == MAP_SW_OUT);
1041
1042 /*
1043 * We now operate upon each map entry. If the entry is a sub-
1044 * or share-map, we call vm_map_res_reference upon it.
1045 * If the entry is an object, we call vm_object_res_reference
1046 * (this may iterate through the shadow chain).
1047 * Note that we hold the map locked the entire time,
1048 * even if we get back here via a recursive call in
1049 * vm_map_res_reference.
1050 */
1051 entry = vm_map_first_entry(map);
1052
1053 while (entry != vm_map_to_entry(map)) {
1054 if (entry->object.vm_object != VM_OBJECT_NULL) {
1055 if (entry->is_sub_map) {
1056 vm_map_t lmap = entry->object.sub_map;
1057 lck_mtx_lock(&lmap->s_lock);
1058 vm_map_res_reference(lmap);
1059 lck_mtx_unlock(&lmap->s_lock);
1060 } else {
1061 vm_object_t object = entry->object.vm_object;
1062 vm_object_lock(object);
1063 /*
1064 * This call may iterate through the
1065 * shadow chain.
1066 */
1067 vm_object_res_reference(object);
1068 vm_object_unlock(object);
1069 }
1070 }
1071 entry = entry->vme_next;
1072 }
1073 assert(map->sw_state == MAP_SW_OUT);
1074 map->sw_state = MAP_SW_IN;
1075 }
1076
1077 void vm_map_swapout(vm_map_t map)
1078 {
1079 register vm_map_entry_t entry;
1080
1081 /*
1082 * Map is locked
1083 * First deal with various races.
1084 * If we raced with a swapin and lost, the residence count
1085 * will have been incremented to 1, and we simply return.
1086 */
1087 lck_mtx_lock(&map->s_lock);
1088 if (map->res_count != 0) {
1089 lck_mtx_unlock(&map->s_lock);
1090 return;
1091 }
1092 lck_mtx_unlock(&map->s_lock);
1093
1094 /*
1095 * There are no intermediate states of a map going out or
1096 * coming in, since the map is locked during the transition.
1097 */
1098 assert(map->sw_state == MAP_SW_IN);
1099
1100 if (!vm_map_swap_enable)
1101 return;
1102
1103 /*
1104 * We now operate upon each map entry. If the entry is a sub-
1105 * or share-map, we call vm_map_res_deallocate upon it.
1106 * If the entry is an object, we call vm_object_res_deallocate
1107 * (this may iterate through the shadow chain).
1108 * Note that we hold the map locked the entire time,
1109 * even if we get back here via a recursive call in
1110 * vm_map_res_deallocate.
1111 */
1112 entry = vm_map_first_entry(map);
1113
1114 while (entry != vm_map_to_entry(map)) {
1115 if (entry->object.vm_object != VM_OBJECT_NULL) {
1116 if (entry->is_sub_map) {
1117 vm_map_t lmap = entry->object.sub_map;
1118 lck_mtx_lock(&lmap->s_lock);
1119 vm_map_res_deallocate(lmap);
1120 lck_mtx_unlock(&lmap->s_lock);
1121 } else {
1122 vm_object_t object = entry->object.vm_object;
1123 vm_object_lock(object);
1124 /*
1125 * This call may take a long time,
1126 * since it could actively push
1127 * out pages (if we implement it
1128 * that way).
1129 */
1130 vm_object_res_deallocate(object);
1131 vm_object_unlock(object);
1132 }
1133 }
1134 entry = entry->vme_next;
1135 }
1136 assert(map->sw_state == MAP_SW_IN);
1137 map->sw_state = MAP_SW_OUT;
1138 }
1139
1140 #endif /* TASK_SWAPPER */
1141
1142
1143 /*
1144 * SAVE_HINT_MAP_READ:
1145 *
1146 * Saves the specified entry as the hint for
1147 * future lookups. only a read lock is held on map,
1148 * so make sure the store is atomic... OSCompareAndSwap
1149 * guarantees this... also, we don't care if we collide
1150 * and someone else wins and stores their 'hint'
1151 */
1152 #define SAVE_HINT_MAP_READ(map,value) \
1153 MACRO_BEGIN \
1154 OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
1155 MACRO_END
1156
1157
1158 /*
1159 * SAVE_HINT_MAP_WRITE:
1160 *
1161 * Saves the specified entry as the hint for
1162 * future lookups. write lock held on map,
1163 * so no one else can be writing or looking
1164 * until the lock is dropped, so it's safe
1165 * to just do an assignment
1166 */
1167 #define SAVE_HINT_MAP_WRITE(map,value) \
1168 MACRO_BEGIN \
1169 (map)->hint = (value); \
1170 MACRO_END
1171
1172 /*
1173 * vm_map_lookup_entry: [ internal use only ]
1174 *
1175 * Finds the map entry containing (or
1176 * immediately preceding) the specified address
1177 * in the given map; the entry is returned
1178 * in the "entry" parameter. The boolean
1179 * result indicates whether the address is
1180 * actually contained in the map.
1181 */
1182 boolean_t
1183 vm_map_lookup_entry(
1184 register vm_map_t map,
1185 register vm_map_offset_t address,
1186 vm_map_entry_t *entry) /* OUT */
1187 {
1188 register vm_map_entry_t cur;
1189 register vm_map_entry_t last;
1190
1191 /*
1192 * Start looking either from the head of the
1193 * list, or from the hint.
1194 */
1195 cur = map->hint;
1196
1197 if (cur == vm_map_to_entry(map))
1198 cur = cur->vme_next;
1199
1200 if (address >= cur->vme_start) {
1201 /*
1202 * Go from hint to end of list.
1203 *
1204 * But first, make a quick check to see if
1205 * we are already looking at the entry we
1206 * want (which is usually the case).
1207 * Note also that we don't need to save the hint
1208 * here... it is the same hint (unless we are
1209 * at the header, in which case the hint didn't
1210 * buy us anything anyway).
1211 */
1212 last = vm_map_to_entry(map);
1213 if ((cur != last) && (cur->vme_end > address)) {
1214 *entry = cur;
1215 return(TRUE);
1216 }
1217 }
1218 else {
1219 /*
1220 * Go from start to hint, *inclusively*
1221 */
1222 last = cur->vme_next;
1223 cur = vm_map_first_entry(map);
1224 }
1225
1226 /*
1227 * Search linearly
1228 */
1229
1230 while (cur != last) {
1231 if (cur->vme_end > address) {
1232 if (address >= cur->vme_start) {
1233 /*
1234 * Save this lookup for future
1235 * hints, and return
1236 */
1237
1238 *entry = cur;
1239 SAVE_HINT_MAP_READ(map, cur);
1240
1241 return(TRUE);
1242 }
1243 break;
1244 }
1245 cur = cur->vme_next;
1246 }
1247 *entry = cur->vme_prev;
1248 SAVE_HINT_MAP_READ(map, *entry);
1249
1250 return(FALSE);
1251 }
1252
1253 /*
1254 * Routine: vm_map_find_space
1255 * Purpose:
1256 * Allocate a range in the specified virtual address map,
1257 * returning the entry allocated for that range.
1258 * Used by kmem_alloc, etc.
1259 *
1260 * The map must be NOT be locked. It will be returned locked
1261 * on KERN_SUCCESS, unlocked on failure.
1262 *
1263 * If an entry is allocated, the object/offset fields
1264 * are initialized to zero.
1265 */
1266 kern_return_t
1267 vm_map_find_space(
1268 register vm_map_t map,
1269 vm_map_offset_t *address, /* OUT */
1270 vm_map_size_t size,
1271 vm_map_offset_t mask,
1272 int flags,
1273 vm_map_entry_t *o_entry) /* OUT */
1274 {
1275 register vm_map_entry_t entry, new_entry;
1276 register vm_map_offset_t start;
1277 register vm_map_offset_t end;
1278
1279 if (size == 0) {
1280 *address = 0;
1281 return KERN_INVALID_ARGUMENT;
1282 }
1283
1284 if (flags & VM_FLAGS_GUARD_AFTER) {
1285 /* account for the back guard page in the size */
1286 size += PAGE_SIZE_64;
1287 }
1288
1289 new_entry = vm_map_entry_create(map);
1290
1291 /*
1292 * Look for the first possible address; if there's already
1293 * something at this address, we have to start after it.
1294 */
1295
1296 vm_map_lock(map);
1297
1298 assert(first_free_is_valid(map));
1299 if ((entry = map->first_free) == vm_map_to_entry(map))
1300 start = map->min_offset;
1301 else
1302 start = entry->vme_end;
1303
1304 /*
1305 * In any case, the "entry" always precedes
1306 * the proposed new region throughout the loop:
1307 */
1308
1309 while (TRUE) {
1310 register vm_map_entry_t next;
1311
1312 /*
1313 * Find the end of the proposed new region.
1314 * Be sure we didn't go beyond the end, or
1315 * wrap around the address.
1316 */
1317
1318 if (flags & VM_FLAGS_GUARD_BEFORE) {
1319 /* reserve space for the front guard page */
1320 start += PAGE_SIZE_64;
1321 }
1322 end = ((start + mask) & ~mask);
1323
1324 if (end < start) {
1325 vm_map_entry_dispose(map, new_entry);
1326 vm_map_unlock(map);
1327 return(KERN_NO_SPACE);
1328 }
1329 start = end;
1330 end += size;
1331
1332 if ((end > map->max_offset) || (end < start)) {
1333 vm_map_entry_dispose(map, new_entry);
1334 vm_map_unlock(map);
1335 return(KERN_NO_SPACE);
1336 }
1337
1338 /*
1339 * If there are no more entries, we must win.
1340 */
1341
1342 next = entry->vme_next;
1343 if (next == vm_map_to_entry(map))
1344 break;
1345
1346 /*
1347 * If there is another entry, it must be
1348 * after the end of the potential new region.
1349 */
1350
1351 if (next->vme_start >= end)
1352 break;
1353
1354 /*
1355 * Didn't fit -- move to the next entry.
1356 */
1357
1358 entry = next;
1359 start = entry->vme_end;
1360 }
1361
1362 /*
1363 * At this point,
1364 * "start" and "end" should define the endpoints of the
1365 * available new range, and
1366 * "entry" should refer to the region before the new
1367 * range, and
1368 *
1369 * the map should be locked.
1370 */
1371
1372 if (flags & VM_FLAGS_GUARD_BEFORE) {
1373 /* go back for the front guard page */
1374 start -= PAGE_SIZE_64;
1375 }
1376 *address = start;
1377
1378 new_entry->vme_start = start;
1379 new_entry->vme_end = end;
1380 assert(page_aligned(new_entry->vme_start));
1381 assert(page_aligned(new_entry->vme_end));
1382
1383 new_entry->is_shared = FALSE;
1384 new_entry->is_sub_map = FALSE;
1385 new_entry->use_pmap = FALSE;
1386 new_entry->object.vm_object = VM_OBJECT_NULL;
1387 new_entry->offset = (vm_object_offset_t) 0;
1388
1389 new_entry->needs_copy = FALSE;
1390
1391 new_entry->inheritance = VM_INHERIT_DEFAULT;
1392 new_entry->protection = VM_PROT_DEFAULT;
1393 new_entry->max_protection = VM_PROT_ALL;
1394 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1395 new_entry->wired_count = 0;
1396 new_entry->user_wired_count = 0;
1397
1398 new_entry->in_transition = FALSE;
1399 new_entry->needs_wakeup = FALSE;
1400 new_entry->no_cache = FALSE;
1401 new_entry->permanent = FALSE;
1402 new_entry->superpage_size = 0;
1403
1404 new_entry->alias = 0;
1405 new_entry->zero_wired_pages = FALSE;
1406
1407 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1408
1409 /*
1410 * Insert the new entry into the list
1411 */
1412
1413 vm_map_entry_link(map, entry, new_entry);
1414
1415 map->size += size;
1416
1417 /*
1418 * Update the lookup hint
1419 */
1420 SAVE_HINT_MAP_WRITE(map, new_entry);
1421
1422 *o_entry = new_entry;
1423 return(KERN_SUCCESS);
1424 }
1425
1426 int vm_map_pmap_enter_print = FALSE;
1427 int vm_map_pmap_enter_enable = FALSE;
1428
1429 /*
1430 * Routine: vm_map_pmap_enter [internal only]
1431 *
1432 * Description:
1433 * Force pages from the specified object to be entered into
1434 * the pmap at the specified address if they are present.
1435 * As soon as a page not found in the object the scan ends.
1436 *
1437 * Returns:
1438 * Nothing.
1439 *
1440 * In/out conditions:
1441 * The source map should not be locked on entry.
1442 */
1443 static void
1444 vm_map_pmap_enter(
1445 vm_map_t map,
1446 register vm_map_offset_t addr,
1447 register vm_map_offset_t end_addr,
1448 register vm_object_t object,
1449 vm_object_offset_t offset,
1450 vm_prot_t protection)
1451 {
1452 int type_of_fault;
1453 kern_return_t kr;
1454
1455 if(map->pmap == 0)
1456 return;
1457
1458 while (addr < end_addr) {
1459 register vm_page_t m;
1460
1461 vm_object_lock(object);
1462
1463 m = vm_page_lookup(object, offset);
1464 /*
1465 * ENCRYPTED SWAP:
1466 * The user should never see encrypted data, so do not
1467 * enter an encrypted page in the page table.
1468 */
1469 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1470 m->fictitious ||
1471 (m->unusual && ( m->error || m->restart || m->absent))) {
1472 vm_object_unlock(object);
1473 return;
1474 }
1475
1476 if (vm_map_pmap_enter_print) {
1477 printf("vm_map_pmap_enter:");
1478 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1479 map, (unsigned long long)addr, object, (unsigned long long)offset);
1480 }
1481 type_of_fault = DBG_CACHE_HIT_FAULT;
1482 kr = vm_fault_enter(m, map->pmap, addr, protection,
1483 VM_PAGE_WIRED(m), FALSE, FALSE,
1484 &type_of_fault);
1485
1486 vm_object_unlock(object);
1487
1488 offset += PAGE_SIZE_64;
1489 addr += PAGE_SIZE;
1490 }
1491 }
1492
1493 boolean_t vm_map_pmap_is_empty(
1494 vm_map_t map,
1495 vm_map_offset_t start,
1496 vm_map_offset_t end);
1497 boolean_t vm_map_pmap_is_empty(
1498 vm_map_t map,
1499 vm_map_offset_t start,
1500 vm_map_offset_t end)
1501 {
1502 #ifdef MACHINE_PMAP_IS_EMPTY
1503 return pmap_is_empty(map->pmap, start, end);
1504 #else /* MACHINE_PMAP_IS_EMPTY */
1505 vm_map_offset_t offset;
1506 ppnum_t phys_page;
1507
1508 if (map->pmap == NULL) {
1509 return TRUE;
1510 }
1511
1512 for (offset = start;
1513 offset < end;
1514 offset += PAGE_SIZE) {
1515 phys_page = pmap_find_phys(map->pmap, offset);
1516 if (phys_page) {
1517 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1518 "page %d at 0x%llx\n",
1519 map, (long long)start, (long long)end,
1520 phys_page, (long long)offset);
1521 return FALSE;
1522 }
1523 }
1524 return TRUE;
1525 #endif /* MACHINE_PMAP_IS_EMPTY */
1526 }
1527
1528 /*
1529 * Routine: vm_map_enter
1530 *
1531 * Description:
1532 * Allocate a range in the specified virtual address map.
1533 * The resulting range will refer to memory defined by
1534 * the given memory object and offset into that object.
1535 *
1536 * Arguments are as defined in the vm_map call.
1537 */
1538 int _map_enter_debug = 0;
1539 static unsigned int vm_map_enter_restore_successes = 0;
1540 static unsigned int vm_map_enter_restore_failures = 0;
1541 kern_return_t
1542 vm_map_enter(
1543 vm_map_t map,
1544 vm_map_offset_t *address, /* IN/OUT */
1545 vm_map_size_t size,
1546 vm_map_offset_t mask,
1547 int flags,
1548 vm_object_t object,
1549 vm_object_offset_t offset,
1550 boolean_t needs_copy,
1551 vm_prot_t cur_protection,
1552 vm_prot_t max_protection,
1553 vm_inherit_t inheritance)
1554 {
1555 vm_map_entry_t entry, new_entry;
1556 vm_map_offset_t start, tmp_start, tmp_offset;
1557 vm_map_offset_t end, tmp_end;
1558 vm_map_offset_t tmp2_start, tmp2_end;
1559 vm_map_offset_t step;
1560 kern_return_t result = KERN_SUCCESS;
1561 vm_map_t zap_old_map = VM_MAP_NULL;
1562 vm_map_t zap_new_map = VM_MAP_NULL;
1563 boolean_t map_locked = FALSE;
1564 boolean_t pmap_empty = TRUE;
1565 boolean_t new_mapping_established = FALSE;
1566 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1567 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1568 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1569 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1570 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1571 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1572 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1573 char alias;
1574 vm_map_offset_t effective_min_offset, effective_max_offset;
1575 kern_return_t kr;
1576
1577 if (superpage_size) {
1578 switch (superpage_size) {
1579 /*
1580 * Note that the current implementation only supports
1581 * a single size for superpages, SUPERPAGE_SIZE, per
1582 * architecture. As soon as more sizes are supposed
1583 * to be supported, SUPERPAGE_SIZE has to be replaced
1584 * with a lookup of the size depending on superpage_size.
1585 */
1586 #ifdef __x86_64__
1587 case SUPERPAGE_SIZE_2MB:
1588 break;
1589 #endif
1590 default:
1591 return KERN_INVALID_ARGUMENT;
1592 }
1593 mask = SUPERPAGE_SIZE-1;
1594 if (size & (SUPERPAGE_SIZE-1))
1595 return KERN_INVALID_ARGUMENT;
1596 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1597 }
1598
1599 #if CONFIG_EMBEDDED
1600 if (cur_protection & VM_PROT_WRITE) {
1601 if (cur_protection & VM_PROT_EXECUTE) {
1602 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1603 cur_protection &= ~VM_PROT_EXECUTE;
1604 }
1605 }
1606 #endif /* CONFIG_EMBEDDED */
1607
1608 if (is_submap) {
1609 if (purgable) {
1610 /* submaps can not be purgeable */
1611 return KERN_INVALID_ARGUMENT;
1612 }
1613 if (object == VM_OBJECT_NULL) {
1614 /* submaps can not be created lazily */
1615 return KERN_INVALID_ARGUMENT;
1616 }
1617 }
1618 if (flags & VM_FLAGS_ALREADY) {
1619 /*
1620 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1621 * is already present. For it to be meaningul, the requested
1622 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1623 * we shouldn't try and remove what was mapped there first
1624 * (!VM_FLAGS_OVERWRITE).
1625 */
1626 if ((flags & VM_FLAGS_ANYWHERE) ||
1627 (flags & VM_FLAGS_OVERWRITE)) {
1628 return KERN_INVALID_ARGUMENT;
1629 }
1630 }
1631
1632 if (flags & VM_FLAGS_BELOW_MIN) {
1633 /*
1634 * Allow an insertion below the map's min offset.
1635 */
1636 effective_min_offset = 0ULL;
1637 } else {
1638 effective_min_offset = map->min_offset;
1639 }
1640
1641 if (flags & VM_FLAGS_BEYOND_MAX) {
1642 /*
1643 * Allow an insertion beyond the map's max offset.
1644 */
1645 if (vm_map_is_64bit(map))
1646 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1647 else
1648 effective_max_offset = 0x00000000FFFFF000ULL;
1649 } else {
1650 effective_max_offset = map->max_offset;
1651 }
1652
1653 if (size == 0 ||
1654 (offset & PAGE_MASK_64) != 0) {
1655 *address = 0;
1656 return KERN_INVALID_ARGUMENT;
1657 }
1658
1659 VM_GET_FLAGS_ALIAS(flags, alias);
1660
1661 #define RETURN(value) { result = value; goto BailOut; }
1662
1663 assert(page_aligned(*address));
1664 assert(page_aligned(size));
1665
1666 /*
1667 * Only zero-fill objects are allowed to be purgable.
1668 * LP64todo - limit purgable objects to 32-bits for now
1669 */
1670 if (purgable &&
1671 (offset != 0 ||
1672 (object != VM_OBJECT_NULL &&
1673 (object->size != size ||
1674 object->purgable == VM_PURGABLE_DENY))
1675 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1676 return KERN_INVALID_ARGUMENT;
1677
1678 if (!anywhere && overwrite) {
1679 /*
1680 * Create a temporary VM map to hold the old mappings in the
1681 * affected area while we create the new one.
1682 * This avoids releasing the VM map lock in
1683 * vm_map_entry_delete() and allows atomicity
1684 * when we want to replace some mappings with a new one.
1685 * It also allows us to restore the old VM mappings if the
1686 * new mapping fails.
1687 */
1688 zap_old_map = vm_map_create(PMAP_NULL,
1689 *address,
1690 *address + size,
1691 map->hdr.entries_pageable);
1692 }
1693
1694 StartAgain: ;
1695
1696 start = *address;
1697
1698 if (anywhere) {
1699 vm_map_lock(map);
1700 map_locked = TRUE;
1701
1702 /*
1703 * Calculate the first possible address.
1704 */
1705
1706 if (start < effective_min_offset)
1707 start = effective_min_offset;
1708 if (start > effective_max_offset)
1709 RETURN(KERN_NO_SPACE);
1710
1711 /*
1712 * Look for the first possible address;
1713 * if there's already something at this
1714 * address, we have to start after it.
1715 */
1716
1717 assert(first_free_is_valid(map));
1718 if (start == effective_min_offset) {
1719 if ((entry = map->first_free) != vm_map_to_entry(map))
1720 start = entry->vme_end;
1721 } else {
1722 vm_map_entry_t tmp_entry;
1723 if (vm_map_lookup_entry(map, start, &tmp_entry))
1724 start = tmp_entry->vme_end;
1725 entry = tmp_entry;
1726 }
1727
1728 /*
1729 * In any case, the "entry" always precedes
1730 * the proposed new region throughout the
1731 * loop:
1732 */
1733
1734 while (TRUE) {
1735 register vm_map_entry_t next;
1736
1737 /*
1738 * Find the end of the proposed new region.
1739 * Be sure we didn't go beyond the end, or
1740 * wrap around the address.
1741 */
1742
1743 end = ((start + mask) & ~mask);
1744 if (end < start)
1745 RETURN(KERN_NO_SPACE);
1746 start = end;
1747 end += size;
1748
1749 if ((end > effective_max_offset) || (end < start)) {
1750 if (map->wait_for_space) {
1751 if (size <= (effective_max_offset -
1752 effective_min_offset)) {
1753 assert_wait((event_t)map,
1754 THREAD_ABORTSAFE);
1755 vm_map_unlock(map);
1756 map_locked = FALSE;
1757 thread_block(THREAD_CONTINUE_NULL);
1758 goto StartAgain;
1759 }
1760 }
1761 RETURN(KERN_NO_SPACE);
1762 }
1763
1764 /*
1765 * If there are no more entries, we must win.
1766 */
1767
1768 next = entry->vme_next;
1769 if (next == vm_map_to_entry(map))
1770 break;
1771
1772 /*
1773 * If there is another entry, it must be
1774 * after the end of the potential new region.
1775 */
1776
1777 if (next->vme_start >= end)
1778 break;
1779
1780 /*
1781 * Didn't fit -- move to the next entry.
1782 */
1783
1784 entry = next;
1785 start = entry->vme_end;
1786 }
1787 *address = start;
1788 } else {
1789 /*
1790 * Verify that:
1791 * the address doesn't itself violate
1792 * the mask requirement.
1793 */
1794
1795 vm_map_lock(map);
1796 map_locked = TRUE;
1797 if ((start & mask) != 0)
1798 RETURN(KERN_NO_SPACE);
1799
1800 /*
1801 * ... the address is within bounds
1802 */
1803
1804 end = start + size;
1805
1806 if ((start < effective_min_offset) ||
1807 (end > effective_max_offset) ||
1808 (start >= end)) {
1809 RETURN(KERN_INVALID_ADDRESS);
1810 }
1811
1812 if (overwrite && zap_old_map != VM_MAP_NULL) {
1813 /*
1814 * Fixed mapping and "overwrite" flag: attempt to
1815 * remove all existing mappings in the specified
1816 * address range, saving them in our "zap_old_map".
1817 */
1818 (void) vm_map_delete(map, start, end,
1819 VM_MAP_REMOVE_SAVE_ENTRIES,
1820 zap_old_map);
1821 }
1822
1823 /*
1824 * ... the starting address isn't allocated
1825 */
1826
1827 if (vm_map_lookup_entry(map, start, &entry)) {
1828 if (! (flags & VM_FLAGS_ALREADY)) {
1829 RETURN(KERN_NO_SPACE);
1830 }
1831 /*
1832 * Check if what's already there is what we want.
1833 */
1834 tmp_start = start;
1835 tmp_offset = offset;
1836 if (entry->vme_start < start) {
1837 tmp_start -= start - entry->vme_start;
1838 tmp_offset -= start - entry->vme_start;
1839
1840 }
1841 for (; entry->vme_start < end;
1842 entry = entry->vme_next) {
1843 /*
1844 * Check if the mapping's attributes
1845 * match the existing map entry.
1846 */
1847 if (entry == vm_map_to_entry(map) ||
1848 entry->vme_start != tmp_start ||
1849 entry->is_sub_map != is_submap ||
1850 entry->offset != tmp_offset ||
1851 entry->needs_copy != needs_copy ||
1852 entry->protection != cur_protection ||
1853 entry->max_protection != max_protection ||
1854 entry->inheritance != inheritance ||
1855 entry->alias != alias) {
1856 /* not the same mapping ! */
1857 RETURN(KERN_NO_SPACE);
1858 }
1859 /*
1860 * Check if the same object is being mapped.
1861 */
1862 if (is_submap) {
1863 if (entry->object.sub_map !=
1864 (vm_map_t) object) {
1865 /* not the same submap */
1866 RETURN(KERN_NO_SPACE);
1867 }
1868 } else {
1869 if (entry->object.vm_object != object) {
1870 /* not the same VM object... */
1871 vm_object_t obj2;
1872
1873 obj2 = entry->object.vm_object;
1874 if ((obj2 == VM_OBJECT_NULL ||
1875 obj2->internal) &&
1876 (object == VM_OBJECT_NULL ||
1877 object->internal)) {
1878 /*
1879 * ... but both are
1880 * anonymous memory,
1881 * so equivalent.
1882 */
1883 } else {
1884 RETURN(KERN_NO_SPACE);
1885 }
1886 }
1887 }
1888
1889 tmp_offset += entry->vme_end - entry->vme_start;
1890 tmp_start += entry->vme_end - entry->vme_start;
1891 if (entry->vme_end >= end) {
1892 /* reached the end of our mapping */
1893 break;
1894 }
1895 }
1896 /* it all matches: let's use what's already there ! */
1897 RETURN(KERN_MEMORY_PRESENT);
1898 }
1899
1900 /*
1901 * ... the next region doesn't overlap the
1902 * end point.
1903 */
1904
1905 if ((entry->vme_next != vm_map_to_entry(map)) &&
1906 (entry->vme_next->vme_start < end))
1907 RETURN(KERN_NO_SPACE);
1908 }
1909
1910 /*
1911 * At this point,
1912 * "start" and "end" should define the endpoints of the
1913 * available new range, and
1914 * "entry" should refer to the region before the new
1915 * range, and
1916 *
1917 * the map should be locked.
1918 */
1919
1920 /*
1921 * See whether we can avoid creating a new entry (and object) by
1922 * extending one of our neighbors. [So far, we only attempt to
1923 * extend from below.] Note that we can never extend/join
1924 * purgable objects because they need to remain distinct
1925 * entities in order to implement their "volatile object"
1926 * semantics.
1927 */
1928
1929 if (purgable) {
1930 if (object == VM_OBJECT_NULL) {
1931 object = vm_object_allocate(size);
1932 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1933 object->purgable = VM_PURGABLE_NONVOLATILE;
1934 offset = (vm_object_offset_t)0;
1935 }
1936 } else if ((is_submap == FALSE) &&
1937 (object == VM_OBJECT_NULL) &&
1938 (entry != vm_map_to_entry(map)) &&
1939 (entry->vme_end == start) &&
1940 (!entry->is_shared) &&
1941 (!entry->is_sub_map) &&
1942 (entry->alias == alias) &&
1943 (entry->inheritance == inheritance) &&
1944 (entry->protection == cur_protection) &&
1945 (entry->max_protection == max_protection) &&
1946 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1947 (entry->in_transition == 0) &&
1948 (entry->no_cache == no_cache) &&
1949 ((entry->vme_end - entry->vme_start) + size <=
1950 (alias == VM_MEMORY_REALLOC ?
1951 ANON_CHUNK_SIZE :
1952 NO_COALESCE_LIMIT)) &&
1953 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1954 if (vm_object_coalesce(entry->object.vm_object,
1955 VM_OBJECT_NULL,
1956 entry->offset,
1957 (vm_object_offset_t) 0,
1958 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1959 (vm_map_size_t)(end - entry->vme_end))) {
1960
1961 /*
1962 * Coalesced the two objects - can extend
1963 * the previous map entry to include the
1964 * new range.
1965 */
1966 map->size += (end - entry->vme_end);
1967 entry->vme_end = end;
1968 UPDATE_FIRST_FREE(map, map->first_free);
1969 RETURN(KERN_SUCCESS);
1970 }
1971 }
1972
1973 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1974 new_entry = NULL;
1975
1976 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1977 tmp2_end = tmp2_start + step;
1978 /*
1979 * Create a new entry
1980 * LP64todo - for now, we can only allocate 4GB internal objects
1981 * because the default pager can't page bigger ones. Remove this
1982 * when it can.
1983 *
1984 * XXX FBDP
1985 * The reserved "page zero" in each process's address space can
1986 * be arbitrarily large. Splitting it into separate 4GB objects and
1987 * therefore different VM map entries serves no purpose and just
1988 * slows down operations on the VM map, so let's not split the
1989 * allocation into 4GB chunks if the max protection is NONE. That
1990 * memory should never be accessible, so it will never get to the
1991 * default pager.
1992 */
1993 tmp_start = tmp2_start;
1994 if (object == VM_OBJECT_NULL &&
1995 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1996 max_protection != VM_PROT_NONE &&
1997 superpage_size == 0)
1998 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1999 else
2000 tmp_end = tmp2_end;
2001 do {
2002 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2003 object, offset, needs_copy,
2004 FALSE, FALSE,
2005 cur_protection, max_protection,
2006 VM_BEHAVIOR_DEFAULT,
2007 inheritance, 0, no_cache,
2008 permanent, superpage_size);
2009 new_entry->alias = alias;
2010 if (is_submap) {
2011 vm_map_t submap;
2012 boolean_t submap_is_64bit;
2013 boolean_t use_pmap;
2014
2015 new_entry->is_sub_map = TRUE;
2016 submap = (vm_map_t) object;
2017 submap_is_64bit = vm_map_is_64bit(submap);
2018 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2019 #ifndef NO_NESTED_PMAP
2020 if (use_pmap && submap->pmap == NULL) {
2021 /* we need a sub pmap to nest... */
2022 submap->pmap = pmap_create(0, submap_is_64bit);
2023 if (submap->pmap == NULL) {
2024 /* let's proceed without nesting... */
2025 }
2026 }
2027 if (use_pmap && submap->pmap != NULL) {
2028 kr = pmap_nest(map->pmap,
2029 submap->pmap,
2030 tmp_start,
2031 tmp_start,
2032 tmp_end - tmp_start);
2033 if (kr != KERN_SUCCESS) {
2034 printf("vm_map_enter: "
2035 "pmap_nest(0x%llx,0x%llx) "
2036 "error 0x%x\n",
2037 (long long)tmp_start,
2038 (long long)tmp_end,
2039 kr);
2040 } else {
2041 /* we're now nested ! */
2042 new_entry->use_pmap = TRUE;
2043 pmap_empty = FALSE;
2044 }
2045 }
2046 #endif /* NO_NESTED_PMAP */
2047 }
2048 entry = new_entry;
2049
2050 if (superpage_size) {
2051 vm_page_t pages, m;
2052 vm_object_t sp_object;
2053
2054 entry->offset = 0;
2055
2056 /* allocate one superpage */
2057 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2058 if (kr != KERN_SUCCESS) {
2059 new_mapping_established = TRUE; /* will cause deallocation of whole range */
2060 RETURN(kr);
2061 }
2062
2063 /* create one vm_object per superpage */
2064 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2065 sp_object->phys_contiguous = TRUE;
2066 sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2067 entry->object.vm_object = sp_object;
2068
2069 /* enter the base pages into the object */
2070 vm_object_lock(sp_object);
2071 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2072 m = pages;
2073 pmap_zero_page(m->phys_page);
2074 pages = NEXT_PAGE(m);
2075 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2076 vm_page_insert(m, sp_object, offset);
2077 }
2078 vm_object_unlock(sp_object);
2079 }
2080 } while (tmp_end != tmp2_end &&
2081 (tmp_start = tmp_end) &&
2082 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2083 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2084 }
2085
2086 vm_map_unlock(map);
2087 map_locked = FALSE;
2088
2089 new_mapping_established = TRUE;
2090
2091 /* Wire down the new entry if the user
2092 * requested all new map entries be wired.
2093 */
2094 if ((map->wiring_required)||(superpage_size)) {
2095 pmap_empty = FALSE; /* pmap won't be empty */
2096 result = vm_map_wire(map, start, end,
2097 new_entry->protection, TRUE);
2098 RETURN(result);
2099 }
2100
2101 if ((object != VM_OBJECT_NULL) &&
2102 (vm_map_pmap_enter_enable) &&
2103 (!anywhere) &&
2104 (!needs_copy) &&
2105 (size < (128*1024))) {
2106 pmap_empty = FALSE; /* pmap won't be empty */
2107
2108 if (override_nx(map, alias) && cur_protection)
2109 cur_protection |= VM_PROT_EXECUTE;
2110
2111 vm_map_pmap_enter(map, start, end,
2112 object, offset, cur_protection);
2113 }
2114
2115 BailOut: ;
2116 if (result == KERN_SUCCESS) {
2117 vm_prot_t pager_prot;
2118 memory_object_t pager;
2119
2120 if (pmap_empty &&
2121 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2122 assert(vm_map_pmap_is_empty(map,
2123 *address,
2124 *address+size));
2125 }
2126
2127 /*
2128 * For "named" VM objects, let the pager know that the
2129 * memory object is being mapped. Some pagers need to keep
2130 * track of this, to know when they can reclaim the memory
2131 * object, for example.
2132 * VM calls memory_object_map() for each mapping (specifying
2133 * the protection of each mapping) and calls
2134 * memory_object_last_unmap() when all the mappings are gone.
2135 */
2136 pager_prot = max_protection;
2137 if (needs_copy) {
2138 /*
2139 * Copy-On-Write mapping: won't modify
2140 * the memory object.
2141 */
2142 pager_prot &= ~VM_PROT_WRITE;
2143 }
2144 if (!is_submap &&
2145 object != VM_OBJECT_NULL &&
2146 object->named &&
2147 object->pager != MEMORY_OBJECT_NULL) {
2148 vm_object_lock(object);
2149 pager = object->pager;
2150 if (object->named &&
2151 pager != MEMORY_OBJECT_NULL) {
2152 assert(object->pager_ready);
2153 vm_object_mapping_wait(object, THREAD_UNINT);
2154 vm_object_mapping_begin(object);
2155 vm_object_unlock(object);
2156
2157 kr = memory_object_map(pager, pager_prot);
2158 assert(kr == KERN_SUCCESS);
2159
2160 vm_object_lock(object);
2161 vm_object_mapping_end(object);
2162 }
2163 vm_object_unlock(object);
2164 }
2165 } else {
2166 if (new_mapping_established) {
2167 /*
2168 * We have to get rid of the new mappings since we
2169 * won't make them available to the user.
2170 * Try and do that atomically, to minimize the risk
2171 * that someone else create new mappings that range.
2172 */
2173 zap_new_map = vm_map_create(PMAP_NULL,
2174 *address,
2175 *address + size,
2176 map->hdr.entries_pageable);
2177 if (!map_locked) {
2178 vm_map_lock(map);
2179 map_locked = TRUE;
2180 }
2181 (void) vm_map_delete(map, *address, *address+size,
2182 VM_MAP_REMOVE_SAVE_ENTRIES,
2183 zap_new_map);
2184 }
2185 if (zap_old_map != VM_MAP_NULL &&
2186 zap_old_map->hdr.nentries != 0) {
2187 vm_map_entry_t entry1, entry2;
2188
2189 /*
2190 * The new mapping failed. Attempt to restore
2191 * the old mappings, saved in the "zap_old_map".
2192 */
2193 if (!map_locked) {
2194 vm_map_lock(map);
2195 map_locked = TRUE;
2196 }
2197
2198 /* first check if the coast is still clear */
2199 start = vm_map_first_entry(zap_old_map)->vme_start;
2200 end = vm_map_last_entry(zap_old_map)->vme_end;
2201 if (vm_map_lookup_entry(map, start, &entry1) ||
2202 vm_map_lookup_entry(map, end, &entry2) ||
2203 entry1 != entry2) {
2204 /*
2205 * Part of that range has already been
2206 * re-mapped: we can't restore the old
2207 * mappings...
2208 */
2209 vm_map_enter_restore_failures++;
2210 } else {
2211 /*
2212 * Transfer the saved map entries from
2213 * "zap_old_map" to the original "map",
2214 * inserting them all after "entry1".
2215 */
2216 for (entry2 = vm_map_first_entry(zap_old_map);
2217 entry2 != vm_map_to_entry(zap_old_map);
2218 entry2 = vm_map_first_entry(zap_old_map)) {
2219 vm_map_size_t entry_size;
2220
2221 entry_size = (entry2->vme_end -
2222 entry2->vme_start);
2223 vm_map_entry_unlink(zap_old_map,
2224 entry2);
2225 zap_old_map->size -= entry_size;
2226 vm_map_entry_link(map, entry1, entry2);
2227 map->size += entry_size;
2228 entry1 = entry2;
2229 }
2230 if (map->wiring_required) {
2231 /*
2232 * XXX TODO: we should rewire the
2233 * old pages here...
2234 */
2235 }
2236 vm_map_enter_restore_successes++;
2237 }
2238 }
2239 }
2240
2241 if (map_locked) {
2242 vm_map_unlock(map);
2243 }
2244
2245 /*
2246 * Get rid of the "zap_maps" and all the map entries that
2247 * they may still contain.
2248 */
2249 if (zap_old_map != VM_MAP_NULL) {
2250 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2251 zap_old_map = VM_MAP_NULL;
2252 }
2253 if (zap_new_map != VM_MAP_NULL) {
2254 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2255 zap_new_map = VM_MAP_NULL;
2256 }
2257
2258 return result;
2259
2260 #undef RETURN
2261 }
2262
2263 kern_return_t
2264 vm_map_enter_mem_object(
2265 vm_map_t target_map,
2266 vm_map_offset_t *address,
2267 vm_map_size_t initial_size,
2268 vm_map_offset_t mask,
2269 int flags,
2270 ipc_port_t port,
2271 vm_object_offset_t offset,
2272 boolean_t copy,
2273 vm_prot_t cur_protection,
2274 vm_prot_t max_protection,
2275 vm_inherit_t inheritance)
2276 {
2277 vm_map_address_t map_addr;
2278 vm_map_size_t map_size;
2279 vm_object_t object;
2280 vm_object_size_t size;
2281 kern_return_t result;
2282
2283 /*
2284 * Check arguments for validity
2285 */
2286 if ((target_map == VM_MAP_NULL) ||
2287 (cur_protection & ~VM_PROT_ALL) ||
2288 (max_protection & ~VM_PROT_ALL) ||
2289 (inheritance > VM_INHERIT_LAST_VALID) ||
2290 initial_size == 0)
2291 return KERN_INVALID_ARGUMENT;
2292
2293 map_addr = vm_map_trunc_page(*address);
2294 map_size = vm_map_round_page(initial_size);
2295 size = vm_object_round_page(initial_size);
2296
2297 /*
2298 * Find the vm object (if any) corresponding to this port.
2299 */
2300 if (!IP_VALID(port)) {
2301 object = VM_OBJECT_NULL;
2302 offset = 0;
2303 copy = FALSE;
2304 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2305 vm_named_entry_t named_entry;
2306
2307 named_entry = (vm_named_entry_t) port->ip_kobject;
2308 /* a few checks to make sure user is obeying rules */
2309 if (size == 0) {
2310 if (offset >= named_entry->size)
2311 return KERN_INVALID_RIGHT;
2312 size = named_entry->size - offset;
2313 }
2314 if ((named_entry->protection & max_protection) !=
2315 max_protection)
2316 return KERN_INVALID_RIGHT;
2317 if ((named_entry->protection & cur_protection) !=
2318 cur_protection)
2319 return KERN_INVALID_RIGHT;
2320 if (named_entry->size < (offset + size))
2321 return KERN_INVALID_ARGUMENT;
2322
2323 /* the callers parameter offset is defined to be the */
2324 /* offset from beginning of named entry offset in object */
2325 offset = offset + named_entry->offset;
2326
2327 named_entry_lock(named_entry);
2328 if (named_entry->is_sub_map) {
2329 vm_map_t submap;
2330
2331 submap = named_entry->backing.map;
2332 vm_map_lock(submap);
2333 vm_map_reference(submap);
2334 vm_map_unlock(submap);
2335 named_entry_unlock(named_entry);
2336
2337 result = vm_map_enter(target_map,
2338 &map_addr,
2339 map_size,
2340 mask,
2341 flags | VM_FLAGS_SUBMAP,
2342 (vm_object_t) submap,
2343 offset,
2344 copy,
2345 cur_protection,
2346 max_protection,
2347 inheritance);
2348 if (result != KERN_SUCCESS) {
2349 vm_map_deallocate(submap);
2350 } else {
2351 /*
2352 * No need to lock "submap" just to check its
2353 * "mapped" flag: that flag is never reset
2354 * once it's been set and if we race, we'll
2355 * just end up setting it twice, which is OK.
2356 */
2357 if (submap->mapped == FALSE) {
2358 /*
2359 * This submap has never been mapped.
2360 * Set its "mapped" flag now that it
2361 * has been mapped.
2362 * This happens only for the first ever
2363 * mapping of a "submap".
2364 */
2365 vm_map_lock(submap);
2366 submap->mapped = TRUE;
2367 vm_map_unlock(submap);
2368 }
2369 *address = map_addr;
2370 }
2371 return result;
2372
2373 } else if (named_entry->is_pager) {
2374 unsigned int access;
2375 vm_prot_t protections;
2376 unsigned int wimg_mode;
2377 boolean_t cache_attr;
2378
2379 protections = named_entry->protection & VM_PROT_ALL;
2380 access = GET_MAP_MEM(named_entry->protection);
2381
2382 object = vm_object_enter(named_entry->backing.pager,
2383 named_entry->size,
2384 named_entry->internal,
2385 FALSE,
2386 FALSE);
2387 if (object == VM_OBJECT_NULL) {
2388 named_entry_unlock(named_entry);
2389 return KERN_INVALID_OBJECT;
2390 }
2391
2392 /* JMM - drop reference on pager here */
2393
2394 /* create an extra ref for the named entry */
2395 vm_object_lock(object);
2396 vm_object_reference_locked(object);
2397 named_entry->backing.object = object;
2398 named_entry->is_pager = FALSE;
2399 named_entry_unlock(named_entry);
2400
2401 wimg_mode = object->wimg_bits;
2402 if (access == MAP_MEM_IO) {
2403 wimg_mode = VM_WIMG_IO;
2404 } else if (access == MAP_MEM_COPYBACK) {
2405 wimg_mode = VM_WIMG_USE_DEFAULT;
2406 } else if (access == MAP_MEM_WTHRU) {
2407 wimg_mode = VM_WIMG_WTHRU;
2408 } else if (access == MAP_MEM_WCOMB) {
2409 wimg_mode = VM_WIMG_WCOMB;
2410 }
2411 if (wimg_mode == VM_WIMG_IO ||
2412 wimg_mode == VM_WIMG_WCOMB)
2413 cache_attr = TRUE;
2414 else
2415 cache_attr = FALSE;
2416
2417 /* wait for object (if any) to be ready */
2418 if (!named_entry->internal) {
2419 while (!object->pager_ready) {
2420 vm_object_wait(
2421 object,
2422 VM_OBJECT_EVENT_PAGER_READY,
2423 THREAD_UNINT);
2424 vm_object_lock(object);
2425 }
2426 }
2427
2428 if (object->wimg_bits != wimg_mode) {
2429 vm_page_t p;
2430
2431 vm_object_paging_wait(object, THREAD_UNINT);
2432
2433 object->wimg_bits = wimg_mode;
2434 queue_iterate(&object->memq, p, vm_page_t, listq) {
2435 if (!p->fictitious) {
2436 if (p->pmapped)
2437 pmap_disconnect(p->phys_page);
2438 if (cache_attr)
2439 pmap_sync_page_attributes_phys(p->phys_page);
2440 }
2441 }
2442 }
2443 object->true_share = TRUE;
2444 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2445 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2446 vm_object_unlock(object);
2447 } else {
2448 /* This is the case where we are going to map */
2449 /* an already mapped object. If the object is */
2450 /* not ready it is internal. An external */
2451 /* object cannot be mapped until it is ready */
2452 /* we can therefore avoid the ready check */
2453 /* in this case. */
2454 object = named_entry->backing.object;
2455 assert(object != VM_OBJECT_NULL);
2456 named_entry_unlock(named_entry);
2457 vm_object_reference(object);
2458 }
2459 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2460 /*
2461 * JMM - This is temporary until we unify named entries
2462 * and raw memory objects.
2463 *
2464 * Detected fake ip_kotype for a memory object. In
2465 * this case, the port isn't really a port at all, but
2466 * instead is just a raw memory object.
2467 */
2468
2469 object = vm_object_enter((memory_object_t)port,
2470 size, FALSE, FALSE, FALSE);
2471 if (object == VM_OBJECT_NULL)
2472 return KERN_INVALID_OBJECT;
2473
2474 /* wait for object (if any) to be ready */
2475 if (object != VM_OBJECT_NULL) {
2476 if (object == kernel_object) {
2477 printf("Warning: Attempt to map kernel object"
2478 " by a non-private kernel entity\n");
2479 return KERN_INVALID_OBJECT;
2480 }
2481 if (!object->pager_ready) {
2482 vm_object_lock(object);
2483
2484 while (!object->pager_ready) {
2485 vm_object_wait(object,
2486 VM_OBJECT_EVENT_PAGER_READY,
2487 THREAD_UNINT);
2488 vm_object_lock(object);
2489 }
2490 vm_object_unlock(object);
2491 }
2492 }
2493 } else {
2494 return KERN_INVALID_OBJECT;
2495 }
2496
2497 if (object != VM_OBJECT_NULL &&
2498 object->named &&
2499 object->pager != MEMORY_OBJECT_NULL &&
2500 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2501 memory_object_t pager;
2502 vm_prot_t pager_prot;
2503 kern_return_t kr;
2504
2505 /*
2506 * For "named" VM objects, let the pager know that the
2507 * memory object is being mapped. Some pagers need to keep
2508 * track of this, to know when they can reclaim the memory
2509 * object, for example.
2510 * VM calls memory_object_map() for each mapping (specifying
2511 * the protection of each mapping) and calls
2512 * memory_object_last_unmap() when all the mappings are gone.
2513 */
2514 pager_prot = max_protection;
2515 if (copy) {
2516 /*
2517 * Copy-On-Write mapping: won't modify the
2518 * memory object.
2519 */
2520 pager_prot &= ~VM_PROT_WRITE;
2521 }
2522 vm_object_lock(object);
2523 pager = object->pager;
2524 if (object->named &&
2525 pager != MEMORY_OBJECT_NULL &&
2526 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2527 assert(object->pager_ready);
2528 vm_object_mapping_wait(object, THREAD_UNINT);
2529 vm_object_mapping_begin(object);
2530 vm_object_unlock(object);
2531
2532 kr = memory_object_map(pager, pager_prot);
2533 assert(kr == KERN_SUCCESS);
2534
2535 vm_object_lock(object);
2536 vm_object_mapping_end(object);
2537 }
2538 vm_object_unlock(object);
2539 }
2540
2541 /*
2542 * Perform the copy if requested
2543 */
2544
2545 if (copy) {
2546 vm_object_t new_object;
2547 vm_object_offset_t new_offset;
2548
2549 result = vm_object_copy_strategically(object, offset, size,
2550 &new_object, &new_offset,
2551 &copy);
2552
2553
2554 if (result == KERN_MEMORY_RESTART_COPY) {
2555 boolean_t success;
2556 boolean_t src_needs_copy;
2557
2558 /*
2559 * XXX
2560 * We currently ignore src_needs_copy.
2561 * This really is the issue of how to make
2562 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2563 * non-kernel users to use. Solution forthcoming.
2564 * In the meantime, since we don't allow non-kernel
2565 * memory managers to specify symmetric copy,
2566 * we won't run into problems here.
2567 */
2568 new_object = object;
2569 new_offset = offset;
2570 success = vm_object_copy_quickly(&new_object,
2571 new_offset, size,
2572 &src_needs_copy,
2573 &copy);
2574 assert(success);
2575 result = KERN_SUCCESS;
2576 }
2577 /*
2578 * Throw away the reference to the
2579 * original object, as it won't be mapped.
2580 */
2581
2582 vm_object_deallocate(object);
2583
2584 if (result != KERN_SUCCESS)
2585 return result;
2586
2587 object = new_object;
2588 offset = new_offset;
2589 }
2590
2591 result = vm_map_enter(target_map,
2592 &map_addr, map_size,
2593 (vm_map_offset_t)mask,
2594 flags,
2595 object, offset,
2596 copy,
2597 cur_protection, max_protection, inheritance);
2598 if (result != KERN_SUCCESS)
2599 vm_object_deallocate(object);
2600 *address = map_addr;
2601 return result;
2602 }
2603
2604
2605
2606
2607 kern_return_t
2608 vm_map_enter_mem_object_control(
2609 vm_map_t target_map,
2610 vm_map_offset_t *address,
2611 vm_map_size_t initial_size,
2612 vm_map_offset_t mask,
2613 int flags,
2614 memory_object_control_t control,
2615 vm_object_offset_t offset,
2616 boolean_t copy,
2617 vm_prot_t cur_protection,
2618 vm_prot_t max_protection,
2619 vm_inherit_t inheritance)
2620 {
2621 vm_map_address_t map_addr;
2622 vm_map_size_t map_size;
2623 vm_object_t object;
2624 vm_object_size_t size;
2625 kern_return_t result;
2626 memory_object_t pager;
2627 vm_prot_t pager_prot;
2628 kern_return_t kr;
2629
2630 /*
2631 * Check arguments for validity
2632 */
2633 if ((target_map == VM_MAP_NULL) ||
2634 (cur_protection & ~VM_PROT_ALL) ||
2635 (max_protection & ~VM_PROT_ALL) ||
2636 (inheritance > VM_INHERIT_LAST_VALID) ||
2637 initial_size == 0)
2638 return KERN_INVALID_ARGUMENT;
2639
2640 map_addr = vm_map_trunc_page(*address);
2641 map_size = vm_map_round_page(initial_size);
2642 size = vm_object_round_page(initial_size);
2643
2644 object = memory_object_control_to_vm_object(control);
2645
2646 if (object == VM_OBJECT_NULL)
2647 return KERN_INVALID_OBJECT;
2648
2649 if (object == kernel_object) {
2650 printf("Warning: Attempt to map kernel object"
2651 " by a non-private kernel entity\n");
2652 return KERN_INVALID_OBJECT;
2653 }
2654
2655 vm_object_lock(object);
2656 object->ref_count++;
2657 vm_object_res_reference(object);
2658
2659 /*
2660 * For "named" VM objects, let the pager know that the
2661 * memory object is being mapped. Some pagers need to keep
2662 * track of this, to know when they can reclaim the memory
2663 * object, for example.
2664 * VM calls memory_object_map() for each mapping (specifying
2665 * the protection of each mapping) and calls
2666 * memory_object_last_unmap() when all the mappings are gone.
2667 */
2668 pager_prot = max_protection;
2669 if (copy) {
2670 pager_prot &= ~VM_PROT_WRITE;
2671 }
2672 pager = object->pager;
2673 if (object->named &&
2674 pager != MEMORY_OBJECT_NULL &&
2675 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2676 assert(object->pager_ready);
2677 vm_object_mapping_wait(object, THREAD_UNINT);
2678 vm_object_mapping_begin(object);
2679 vm_object_unlock(object);
2680
2681 kr = memory_object_map(pager, pager_prot);
2682 assert(kr == KERN_SUCCESS);
2683
2684 vm_object_lock(object);
2685 vm_object_mapping_end(object);
2686 }
2687 vm_object_unlock(object);
2688
2689 /*
2690 * Perform the copy if requested
2691 */
2692
2693 if (copy) {
2694 vm_object_t new_object;
2695 vm_object_offset_t new_offset;
2696
2697 result = vm_object_copy_strategically(object, offset, size,
2698 &new_object, &new_offset,
2699 &copy);
2700
2701
2702 if (result == KERN_MEMORY_RESTART_COPY) {
2703 boolean_t success;
2704 boolean_t src_needs_copy;
2705
2706 /*
2707 * XXX
2708 * We currently ignore src_needs_copy.
2709 * This really is the issue of how to make
2710 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2711 * non-kernel users to use. Solution forthcoming.
2712 * In the meantime, since we don't allow non-kernel
2713 * memory managers to specify symmetric copy,
2714 * we won't run into problems here.
2715 */
2716 new_object = object;
2717 new_offset = offset;
2718 success = vm_object_copy_quickly(&new_object,
2719 new_offset, size,
2720 &src_needs_copy,
2721 &copy);
2722 assert(success);
2723 result = KERN_SUCCESS;
2724 }
2725 /*
2726 * Throw away the reference to the
2727 * original object, as it won't be mapped.
2728 */
2729
2730 vm_object_deallocate(object);
2731
2732 if (result != KERN_SUCCESS)
2733 return result;
2734
2735 object = new_object;
2736 offset = new_offset;
2737 }
2738
2739 result = vm_map_enter(target_map,
2740 &map_addr, map_size,
2741 (vm_map_offset_t)mask,
2742 flags,
2743 object, offset,
2744 copy,
2745 cur_protection, max_protection, inheritance);
2746 if (result != KERN_SUCCESS)
2747 vm_object_deallocate(object);
2748 *address = map_addr;
2749
2750 return result;
2751 }
2752
2753
2754 #if VM_CPM
2755
2756 #ifdef MACH_ASSERT
2757 extern pmap_paddr_t avail_start, avail_end;
2758 #endif
2759
2760 /*
2761 * Allocate memory in the specified map, with the caveat that
2762 * the memory is physically contiguous. This call may fail
2763 * if the system can't find sufficient contiguous memory.
2764 * This call may cause or lead to heart-stopping amounts of
2765 * paging activity.
2766 *
2767 * Memory obtained from this call should be freed in the
2768 * normal way, viz., via vm_deallocate.
2769 */
2770 kern_return_t
2771 vm_map_enter_cpm(
2772 vm_map_t map,
2773 vm_map_offset_t *addr,
2774 vm_map_size_t size,
2775 int flags)
2776 {
2777 vm_object_t cpm_obj;
2778 pmap_t pmap;
2779 vm_page_t m, pages;
2780 kern_return_t kr;
2781 vm_map_offset_t va, start, end, offset;
2782 #if MACH_ASSERT
2783 vm_map_offset_t prev_addr;
2784 #endif /* MACH_ASSERT */
2785
2786 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2787
2788 if (!vm_allocate_cpm_enabled)
2789 return KERN_FAILURE;
2790
2791 if (size == 0) {
2792 *addr = 0;
2793 return KERN_SUCCESS;
2794 }
2795 if (anywhere)
2796 *addr = vm_map_min(map);
2797 else
2798 *addr = vm_map_trunc_page(*addr);
2799 size = vm_map_round_page(size);
2800
2801 /*
2802 * LP64todo - cpm_allocate should probably allow
2803 * allocations of >4GB, but not with the current
2804 * algorithm, so just cast down the size for now.
2805 */
2806 if (size > VM_MAX_ADDRESS)
2807 return KERN_RESOURCE_SHORTAGE;
2808 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2809 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2810 return kr;
2811
2812 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2813 assert(cpm_obj != VM_OBJECT_NULL);
2814 assert(cpm_obj->internal);
2815 assert(cpm_obj->size == (vm_object_size_t)size);
2816 assert(cpm_obj->can_persist == FALSE);
2817 assert(cpm_obj->pager_created == FALSE);
2818 assert(cpm_obj->pageout == FALSE);
2819 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2820
2821 /*
2822 * Insert pages into object.
2823 */
2824
2825 vm_object_lock(cpm_obj);
2826 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2827 m = pages;
2828 pages = NEXT_PAGE(m);
2829 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2830
2831 assert(!m->gobbled);
2832 assert(!m->wanted);
2833 assert(!m->pageout);
2834 assert(!m->tabled);
2835 assert(VM_PAGE_WIRED(m));
2836 /*
2837 * ENCRYPTED SWAP:
2838 * "m" is not supposed to be pageable, so it
2839 * should not be encrypted. It wouldn't be safe
2840 * to enter it in a new VM object while encrypted.
2841 */
2842 ASSERT_PAGE_DECRYPTED(m);
2843 assert(m->busy);
2844 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2845
2846 m->busy = FALSE;
2847 vm_page_insert(m, cpm_obj, offset);
2848 }
2849 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2850 vm_object_unlock(cpm_obj);
2851
2852 /*
2853 * Hang onto a reference on the object in case a
2854 * multi-threaded application for some reason decides
2855 * to deallocate the portion of the address space into
2856 * which we will insert this object.
2857 *
2858 * Unfortunately, we must insert the object now before
2859 * we can talk to the pmap module about which addresses
2860 * must be wired down. Hence, the race with a multi-
2861 * threaded app.
2862 */
2863 vm_object_reference(cpm_obj);
2864
2865 /*
2866 * Insert object into map.
2867 */
2868
2869 kr = vm_map_enter(
2870 map,
2871 addr,
2872 size,
2873 (vm_map_offset_t)0,
2874 flags,
2875 cpm_obj,
2876 (vm_object_offset_t)0,
2877 FALSE,
2878 VM_PROT_ALL,
2879 VM_PROT_ALL,
2880 VM_INHERIT_DEFAULT);
2881
2882 if (kr != KERN_SUCCESS) {
2883 /*
2884 * A CPM object doesn't have can_persist set,
2885 * so all we have to do is deallocate it to
2886 * free up these pages.
2887 */
2888 assert(cpm_obj->pager_created == FALSE);
2889 assert(cpm_obj->can_persist == FALSE);
2890 assert(cpm_obj->pageout == FALSE);
2891 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2892 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2893 vm_object_deallocate(cpm_obj); /* kill creation ref */
2894 }
2895
2896 /*
2897 * Inform the physical mapping system that the
2898 * range of addresses may not fault, so that
2899 * page tables and such can be locked down as well.
2900 */
2901 start = *addr;
2902 end = start + size;
2903 pmap = vm_map_pmap(map);
2904 pmap_pageable(pmap, start, end, FALSE);
2905
2906 /*
2907 * Enter each page into the pmap, to avoid faults.
2908 * Note that this loop could be coded more efficiently,
2909 * if the need arose, rather than looking up each page
2910 * again.
2911 */
2912 for (offset = 0, va = start; offset < size;
2913 va += PAGE_SIZE, offset += PAGE_SIZE) {
2914 int type_of_fault;
2915
2916 vm_object_lock(cpm_obj);
2917 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2918 assert(m != VM_PAGE_NULL);
2919
2920 vm_page_zero_fill(m);
2921
2922 type_of_fault = DBG_ZERO_FILL_FAULT;
2923
2924 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2925 VM_PAGE_WIRED(m), FALSE, FALSE,
2926 &type_of_fault);
2927
2928 vm_object_unlock(cpm_obj);
2929 }
2930
2931 #if MACH_ASSERT
2932 /*
2933 * Verify ordering in address space.
2934 */
2935 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2936 vm_object_lock(cpm_obj);
2937 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2938 vm_object_unlock(cpm_obj);
2939 if (m == VM_PAGE_NULL)
2940 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2941 cpm_obj, offset);
2942 assert(m->tabled);
2943 assert(!m->busy);
2944 assert(!m->wanted);
2945 assert(!m->fictitious);
2946 assert(!m->private);
2947 assert(!m->absent);
2948 assert(!m->error);
2949 assert(!m->cleaning);
2950 assert(!m->precious);
2951 assert(!m->clustered);
2952 if (offset != 0) {
2953 if (m->phys_page != prev_addr + 1) {
2954 printf("start 0x%x end 0x%x va 0x%x\n",
2955 start, end, va);
2956 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2957 printf("m 0x%x prev_address 0x%x\n", m,
2958 prev_addr);
2959 panic("vm_allocate_cpm: pages not contig!");
2960 }
2961 }
2962 prev_addr = m->phys_page;
2963 }
2964 #endif /* MACH_ASSERT */
2965
2966 vm_object_deallocate(cpm_obj); /* kill extra ref */
2967
2968 return kr;
2969 }
2970
2971
2972 #else /* VM_CPM */
2973
2974 /*
2975 * Interface is defined in all cases, but unless the kernel
2976 * is built explicitly for this option, the interface does
2977 * nothing.
2978 */
2979
2980 kern_return_t
2981 vm_map_enter_cpm(
2982 __unused vm_map_t map,
2983 __unused vm_map_offset_t *addr,
2984 __unused vm_map_size_t size,
2985 __unused int flags)
2986 {
2987 return KERN_FAILURE;
2988 }
2989 #endif /* VM_CPM */
2990
2991 /* Not used without nested pmaps */
2992 #ifndef NO_NESTED_PMAP
2993 /*
2994 * Clip and unnest a portion of a nested submap mapping.
2995 */
2996
2997
2998 static void
2999 vm_map_clip_unnest(
3000 vm_map_t map,
3001 vm_map_entry_t entry,
3002 vm_map_offset_t start_unnest,
3003 vm_map_offset_t end_unnest)
3004 {
3005 vm_map_offset_t old_start_unnest = start_unnest;
3006 vm_map_offset_t old_end_unnest = end_unnest;
3007
3008 assert(entry->is_sub_map);
3009 assert(entry->object.sub_map != NULL);
3010
3011 /*
3012 * Query the platform for the optimal unnest range.
3013 * DRK: There's some duplication of effort here, since
3014 * callers may have adjusted the range to some extent. This
3015 * routine was introduced to support 1GiB subtree nesting
3016 * for x86 platforms, which can also nest on 2MiB boundaries
3017 * depending on size/alignment.
3018 */
3019 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3020 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3021 }
3022
3023 if (entry->vme_start > start_unnest ||
3024 entry->vme_end < end_unnest) {
3025 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3026 "bad nested entry: start=0x%llx end=0x%llx\n",
3027 (long long)start_unnest, (long long)end_unnest,
3028 (long long)entry->vme_start, (long long)entry->vme_end);
3029 }
3030
3031 if (start_unnest > entry->vme_start) {
3032 _vm_map_clip_start(&map->hdr,
3033 entry,
3034 start_unnest);
3035 UPDATE_FIRST_FREE(map, map->first_free);
3036 }
3037 if (entry->vme_end > end_unnest) {
3038 _vm_map_clip_end(&map->hdr,
3039 entry,
3040 end_unnest);
3041 UPDATE_FIRST_FREE(map, map->first_free);
3042 }
3043
3044 pmap_unnest(map->pmap,
3045 entry->vme_start,
3046 entry->vme_end - entry->vme_start);
3047 if ((map->mapped) && (map->ref_count)) {
3048 /* clean up parent map/maps */
3049 vm_map_submap_pmap_clean(
3050 map, entry->vme_start,
3051 entry->vme_end,
3052 entry->object.sub_map,
3053 entry->offset);
3054 }
3055 entry->use_pmap = FALSE;
3056 }
3057 #endif /* NO_NESTED_PMAP */
3058
3059 /*
3060 * vm_map_clip_start: [ internal use only ]
3061 *
3062 * Asserts that the given entry begins at or after
3063 * the specified address; if necessary,
3064 * it splits the entry into two.
3065 */
3066 static void
3067 vm_map_clip_start(
3068 vm_map_t map,
3069 vm_map_entry_t entry,
3070 vm_map_offset_t startaddr)
3071 {
3072 #ifndef NO_NESTED_PMAP
3073 if (entry->use_pmap &&
3074 startaddr >= entry->vme_start) {
3075 vm_map_offset_t start_unnest, end_unnest;
3076
3077 /*
3078 * Make sure "startaddr" is no longer in a nested range
3079 * before we clip. Unnest only the minimum range the platform
3080 * can handle.
3081 * vm_map_clip_unnest may perform additional adjustments to
3082 * the unnest range.
3083 */
3084 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3085 end_unnest = start_unnest + pmap_nesting_size_min;
3086 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3087 }
3088 #endif /* NO_NESTED_PMAP */
3089 if (startaddr > entry->vme_start) {
3090 if (entry->object.vm_object &&
3091 !entry->is_sub_map &&
3092 entry->object.vm_object->phys_contiguous) {
3093 pmap_remove(map->pmap,
3094 (addr64_t)(entry->vme_start),
3095 (addr64_t)(entry->vme_end));
3096 }
3097 _vm_map_clip_start(&map->hdr, entry, startaddr);
3098 UPDATE_FIRST_FREE(map, map->first_free);
3099 }
3100 }
3101
3102
3103 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3104 MACRO_BEGIN \
3105 if ((startaddr) > (entry)->vme_start) \
3106 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3107 MACRO_END
3108
3109 /*
3110 * This routine is called only when it is known that
3111 * the entry must be split.
3112 */
3113 static void
3114 _vm_map_clip_start(
3115 register struct vm_map_header *map_header,
3116 register vm_map_entry_t entry,
3117 register vm_map_offset_t start)
3118 {
3119 register vm_map_entry_t new_entry;
3120
3121 /*
3122 * Split off the front portion --
3123 * note that we must insert the new
3124 * entry BEFORE this one, so that
3125 * this entry has the specified starting
3126 * address.
3127 */
3128
3129 new_entry = _vm_map_entry_create(map_header);
3130 vm_map_entry_copy_full(new_entry, entry);
3131
3132 new_entry->vme_end = start;
3133 entry->offset += (start - entry->vme_start);
3134 entry->vme_start = start;
3135
3136 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
3137
3138 if (entry->is_sub_map)
3139 vm_map_reference(new_entry->object.sub_map);
3140 else
3141 vm_object_reference(new_entry->object.vm_object);
3142 }
3143
3144
3145 /*
3146 * vm_map_clip_end: [ internal use only ]
3147 *
3148 * Asserts that the given entry ends at or before
3149 * the specified address; if necessary,
3150 * it splits the entry into two.
3151 */
3152 static void
3153 vm_map_clip_end(
3154 vm_map_t map,
3155 vm_map_entry_t entry,
3156 vm_map_offset_t endaddr)
3157 {
3158 if (endaddr > entry->vme_end) {
3159 /*
3160 * Within the scope of this clipping, limit "endaddr" to
3161 * the end of this map entry...
3162 */
3163 endaddr = entry->vme_end;
3164 }
3165 #ifndef NO_NESTED_PMAP
3166 if (entry->use_pmap) {
3167 vm_map_offset_t start_unnest, end_unnest;
3168
3169 /*
3170 * Make sure the range between the start of this entry and
3171 * the new "endaddr" is no longer nested before we clip.
3172 * Unnest only the minimum range the platform can handle.
3173 * vm_map_clip_unnest may perform additional adjustments to
3174 * the unnest range.
3175 */
3176 start_unnest = entry->vme_start;
3177 end_unnest =
3178 (endaddr + pmap_nesting_size_min - 1) &
3179 ~(pmap_nesting_size_min - 1);
3180 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3181 }
3182 #endif /* NO_NESTED_PMAP */
3183 if (endaddr < entry->vme_end) {
3184 if (entry->object.vm_object &&
3185 !entry->is_sub_map &&
3186 entry->object.vm_object->phys_contiguous) {
3187 pmap_remove(map->pmap,
3188 (addr64_t)(entry->vme_start),
3189 (addr64_t)(entry->vme_end));
3190 }
3191 _vm_map_clip_end(&map->hdr, entry, endaddr);
3192 UPDATE_FIRST_FREE(map, map->first_free);
3193 }
3194 }
3195
3196
3197 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3198 MACRO_BEGIN \
3199 if ((endaddr) < (entry)->vme_end) \
3200 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3201 MACRO_END
3202
3203 /*
3204 * This routine is called only when it is known that
3205 * the entry must be split.
3206 */
3207 static void
3208 _vm_map_clip_end(
3209 register struct vm_map_header *map_header,
3210 register vm_map_entry_t entry,
3211 register vm_map_offset_t end)
3212 {
3213 register vm_map_entry_t new_entry;
3214
3215 /*
3216 * Create a new entry and insert it
3217 * AFTER the specified entry
3218 */
3219
3220 new_entry = _vm_map_entry_create(map_header);
3221 vm_map_entry_copy_full(new_entry, entry);
3222
3223 new_entry->vme_start = entry->vme_end = end;
3224 new_entry->offset += (end - entry->vme_start);
3225
3226 _vm_map_entry_link(map_header, entry, new_entry);
3227
3228 if (entry->is_sub_map)
3229 vm_map_reference(new_entry->object.sub_map);
3230 else
3231 vm_object_reference(new_entry->object.vm_object);
3232 }
3233
3234
3235 /*
3236 * VM_MAP_RANGE_CHECK: [ internal use only ]
3237 *
3238 * Asserts that the starting and ending region
3239 * addresses fall within the valid range of the map.
3240 */
3241 #define VM_MAP_RANGE_CHECK(map, start, end) \
3242 MACRO_BEGIN \
3243 if (start < vm_map_min(map)) \
3244 start = vm_map_min(map); \
3245 if (end > vm_map_max(map)) \
3246 end = vm_map_max(map); \
3247 if (start > end) \
3248 start = end; \
3249 MACRO_END
3250
3251 /*
3252 * vm_map_range_check: [ internal use only ]
3253 *
3254 * Check that the region defined by the specified start and
3255 * end addresses are wholly contained within a single map
3256 * entry or set of adjacent map entries of the spacified map,
3257 * i.e. the specified region contains no unmapped space.
3258 * If any or all of the region is unmapped, FALSE is returned.
3259 * Otherwise, TRUE is returned and if the output argument 'entry'
3260 * is not NULL it points to the map entry containing the start
3261 * of the region.
3262 *
3263 * The map is locked for reading on entry and is left locked.
3264 */
3265 static boolean_t
3266 vm_map_range_check(
3267 register vm_map_t map,
3268 register vm_map_offset_t start,
3269 register vm_map_offset_t end,
3270 vm_map_entry_t *entry)
3271 {
3272 vm_map_entry_t cur;
3273 register vm_map_offset_t prev;
3274
3275 /*
3276 * Basic sanity checks first
3277 */
3278 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3279 return (FALSE);
3280
3281 /*
3282 * Check first if the region starts within a valid
3283 * mapping for the map.
3284 */
3285 if (!vm_map_lookup_entry(map, start, &cur))
3286 return (FALSE);
3287
3288 /*
3289 * Optimize for the case that the region is contained
3290 * in a single map entry.
3291 */
3292 if (entry != (vm_map_entry_t *) NULL)
3293 *entry = cur;
3294 if (end <= cur->vme_end)
3295 return (TRUE);
3296
3297 /*
3298 * If the region is not wholly contained within a
3299 * single entry, walk the entries looking for holes.
3300 */
3301 prev = cur->vme_end;
3302 cur = cur->vme_next;
3303 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3304 if (end <= cur->vme_end)
3305 return (TRUE);
3306 prev = cur->vme_end;
3307 cur = cur->vme_next;
3308 }
3309 return (FALSE);
3310 }
3311
3312 /*
3313 * vm_map_submap: [ kernel use only ]
3314 *
3315 * Mark the given range as handled by a subordinate map.
3316 *
3317 * This range must have been created with vm_map_find using
3318 * the vm_submap_object, and no other operations may have been
3319 * performed on this range prior to calling vm_map_submap.
3320 *
3321 * Only a limited number of operations can be performed
3322 * within this rage after calling vm_map_submap:
3323 * vm_fault
3324 * [Don't try vm_map_copyin!]
3325 *
3326 * To remove a submapping, one must first remove the
3327 * range from the superior map, and then destroy the
3328 * submap (if desired). [Better yet, don't try it.]
3329 */
3330 kern_return_t
3331 vm_map_submap(
3332 vm_map_t map,
3333 vm_map_offset_t start,
3334 vm_map_offset_t end,
3335 vm_map_t submap,
3336 vm_map_offset_t offset,
3337 #ifdef NO_NESTED_PMAP
3338 __unused
3339 #endif /* NO_NESTED_PMAP */
3340 boolean_t use_pmap)
3341 {
3342 vm_map_entry_t entry;
3343 register kern_return_t result = KERN_INVALID_ARGUMENT;
3344 register vm_object_t object;
3345
3346 vm_map_lock(map);
3347
3348 if (! vm_map_lookup_entry(map, start, &entry)) {
3349 entry = entry->vme_next;
3350 }
3351
3352 if (entry == vm_map_to_entry(map) ||
3353 entry->is_sub_map) {
3354 vm_map_unlock(map);
3355 return KERN_INVALID_ARGUMENT;
3356 }
3357
3358 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3359 vm_map_clip_start(map, entry, start);
3360 vm_map_clip_end(map, entry, end);
3361
3362 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3363 (!entry->is_sub_map) &&
3364 ((object = entry->object.vm_object) == vm_submap_object) &&
3365 (object->resident_page_count == 0) &&
3366 (object->copy == VM_OBJECT_NULL) &&
3367 (object->shadow == VM_OBJECT_NULL) &&
3368 (!object->pager_created)) {
3369 entry->offset = (vm_object_offset_t)offset;
3370 entry->object.vm_object = VM_OBJECT_NULL;
3371 vm_object_deallocate(object);
3372 entry->is_sub_map = TRUE;
3373 entry->object.sub_map = submap;
3374 vm_map_reference(submap);
3375 submap->mapped = TRUE;
3376
3377 #ifndef NO_NESTED_PMAP
3378 if (use_pmap) {
3379 /* nest if platform code will allow */
3380 if(submap->pmap == NULL) {
3381 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3382 if(submap->pmap == PMAP_NULL) {
3383 vm_map_unlock(map);
3384 return(KERN_NO_SPACE);
3385 }
3386 }
3387 result = pmap_nest(map->pmap,
3388 (entry->object.sub_map)->pmap,
3389 (addr64_t)start,
3390 (addr64_t)start,
3391 (uint64_t)(end - start));
3392 if(result)
3393 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3394 entry->use_pmap = TRUE;
3395 }
3396 #else /* NO_NESTED_PMAP */
3397 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3398 #endif /* NO_NESTED_PMAP */
3399 result = KERN_SUCCESS;
3400 }
3401 vm_map_unlock(map);
3402
3403 return(result);
3404 }
3405
3406 /*
3407 * vm_map_protect:
3408 *
3409 * Sets the protection of the specified address
3410 * region in the target map. If "set_max" is
3411 * specified, the maximum protection is to be set;
3412 * otherwise, only the current protection is affected.
3413 */
3414 kern_return_t
3415 vm_map_protect(
3416 register vm_map_t map,
3417 register vm_map_offset_t start,
3418 register vm_map_offset_t end,
3419 register vm_prot_t new_prot,
3420 register boolean_t set_max)
3421 {
3422 register vm_map_entry_t current;
3423 register vm_map_offset_t prev;
3424 vm_map_entry_t entry;
3425 vm_prot_t new_max;
3426
3427 XPR(XPR_VM_MAP,
3428 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3429 map, start, end, new_prot, set_max);
3430
3431 vm_map_lock(map);
3432
3433 /* LP64todo - remove this check when vm_map_commpage64()
3434 * no longer has to stuff in a map_entry for the commpage
3435 * above the map's max_offset.
3436 */
3437 if (start >= map->max_offset) {
3438 vm_map_unlock(map);
3439 return(KERN_INVALID_ADDRESS);
3440 }
3441
3442 while(1) {
3443 /*
3444 * Lookup the entry. If it doesn't start in a valid
3445 * entry, return an error.
3446 */
3447 if (! vm_map_lookup_entry(map, start, &entry)) {
3448 vm_map_unlock(map);
3449 return(KERN_INVALID_ADDRESS);
3450 }
3451
3452 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3453 start = SUPERPAGE_ROUND_DOWN(start);
3454 continue;
3455 }
3456 break;
3457 }
3458 if (entry->superpage_size)
3459 end = SUPERPAGE_ROUND_UP(end);
3460
3461 /*
3462 * Make a first pass to check for protection and address
3463 * violations.
3464 */
3465
3466 current = entry;
3467 prev = current->vme_start;
3468 while ((current != vm_map_to_entry(map)) &&
3469 (current->vme_start < end)) {
3470
3471 /*
3472 * If there is a hole, return an error.
3473 */
3474 if (current->vme_start != prev) {
3475 vm_map_unlock(map);
3476 return(KERN_INVALID_ADDRESS);
3477 }
3478
3479 new_max = current->max_protection;
3480 if(new_prot & VM_PROT_COPY) {
3481 new_max |= VM_PROT_WRITE;
3482 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3483 vm_map_unlock(map);
3484 return(KERN_PROTECTION_FAILURE);
3485 }
3486 } else {
3487 if ((new_prot & new_max) != new_prot) {
3488 vm_map_unlock(map);
3489 return(KERN_PROTECTION_FAILURE);
3490 }
3491 }
3492
3493 #if CONFIG_EMBEDDED
3494 if (new_prot & VM_PROT_WRITE) {
3495 if (new_prot & VM_PROT_EXECUTE) {
3496 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3497 new_prot &= ~VM_PROT_EXECUTE;
3498 }
3499 }
3500 #endif
3501
3502 prev = current->vme_end;
3503 current = current->vme_next;
3504 }
3505 if (end > prev) {
3506 vm_map_unlock(map);
3507 return(KERN_INVALID_ADDRESS);
3508 }
3509
3510 /*
3511 * Go back and fix up protections.
3512 * Clip to start here if the range starts within
3513 * the entry.
3514 */
3515
3516 current = entry;
3517 if (current != vm_map_to_entry(map)) {
3518 /* clip and unnest if necessary */
3519 vm_map_clip_start(map, current, start);
3520 }
3521
3522 while ((current != vm_map_to_entry(map)) &&
3523 (current->vme_start < end)) {
3524
3525 vm_prot_t old_prot;
3526
3527 vm_map_clip_end(map, current, end);
3528
3529 assert(!current->use_pmap); /* clipping did unnest if needed */
3530
3531 old_prot = current->protection;
3532
3533 if(new_prot & VM_PROT_COPY) {
3534 /* caller is asking specifically to copy the */
3535 /* mapped data, this implies that max protection */
3536 /* will include write. Caller must be prepared */
3537 /* for loss of shared memory communication in the */
3538 /* target area after taking this step */
3539 current->needs_copy = TRUE;
3540 current->max_protection |= VM_PROT_WRITE;
3541 }
3542
3543 if (set_max)
3544 current->protection =
3545 (current->max_protection =
3546 new_prot & ~VM_PROT_COPY) &
3547 old_prot;
3548 else
3549 current->protection = new_prot & ~VM_PROT_COPY;
3550
3551 /*
3552 * Update physical map if necessary.
3553 * If the request is to turn off write protection,
3554 * we won't do it for real (in pmap). This is because
3555 * it would cause copy-on-write to fail. We've already
3556 * set, the new protection in the map, so if a
3557 * write-protect fault occurred, it will be fixed up
3558 * properly, COW or not.
3559 */
3560 if (current->protection != old_prot) {
3561 /* Look one level in we support nested pmaps */
3562 /* from mapped submaps which are direct entries */
3563 /* in our map */
3564
3565 vm_prot_t prot;
3566
3567 prot = current->protection & ~VM_PROT_WRITE;
3568
3569 if (override_nx(map, current->alias) && prot)
3570 prot |= VM_PROT_EXECUTE;
3571
3572 if (current->is_sub_map && current->use_pmap) {
3573 pmap_protect(current->object.sub_map->pmap,
3574 current->vme_start,
3575 current->vme_end,
3576 prot);
3577 } else {
3578 pmap_protect(map->pmap,
3579 current->vme_start,
3580 current->vme_end,
3581 prot);
3582 }
3583 }
3584 current = current->vme_next;
3585 }
3586
3587 current = entry;
3588 while ((current != vm_map_to_entry(map)) &&
3589 (current->vme_start <= end)) {
3590 vm_map_simplify_entry(map, current);
3591 current = current->vme_next;
3592 }
3593
3594 vm_map_unlock(map);
3595 return(KERN_SUCCESS);
3596 }
3597
3598 /*
3599 * vm_map_inherit:
3600 *
3601 * Sets the inheritance of the specified address
3602 * range in the target map. Inheritance
3603 * affects how the map will be shared with
3604 * child maps at the time of vm_map_fork.
3605 */
3606 kern_return_t
3607 vm_map_inherit(
3608 register vm_map_t map,
3609 register vm_map_offset_t start,
3610 register vm_map_offset_t end,
3611 register vm_inherit_t new_inheritance)
3612 {
3613 register vm_map_entry_t entry;
3614 vm_map_entry_t temp_entry;
3615
3616 vm_map_lock(map);
3617
3618 VM_MAP_RANGE_CHECK(map, start, end);
3619
3620 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3621 entry = temp_entry;
3622 }
3623 else {
3624 temp_entry = temp_entry->vme_next;
3625 entry = temp_entry;
3626 }
3627
3628 /* first check entire range for submaps which can't support the */
3629 /* given inheritance. */
3630 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3631 if(entry->is_sub_map) {
3632 if(new_inheritance == VM_INHERIT_COPY) {
3633 vm_map_unlock(map);
3634 return(KERN_INVALID_ARGUMENT);
3635 }
3636 }
3637
3638 entry = entry->vme_next;
3639 }
3640
3641 entry = temp_entry;
3642 if (entry != vm_map_to_entry(map)) {
3643 /* clip and unnest if necessary */
3644 vm_map_clip_start(map, entry, start);
3645 }
3646
3647 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3648 vm_map_clip_end(map, entry, end);
3649 assert(!entry->use_pmap); /* clip did unnest if needed */
3650
3651 entry->inheritance = new_inheritance;
3652
3653 entry = entry->vme_next;
3654 }
3655
3656 vm_map_unlock(map);
3657 return(KERN_SUCCESS);
3658 }
3659
3660 /*
3661 * Update the accounting for the amount of wired memory in this map. If the user has
3662 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3663 */
3664
3665 static kern_return_t
3666 add_wire_counts(
3667 vm_map_t map,
3668 vm_map_entry_t entry,
3669 boolean_t user_wire)
3670 {
3671 vm_map_size_t size;
3672
3673 if (user_wire) {
3674
3675 /*
3676 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3677 * this map entry.
3678 */
3679
3680 if (entry->user_wired_count == 0) {
3681 size = entry->vme_end - entry->vme_start;
3682
3683 /*
3684 * Since this is the first time the user is wiring this map entry, check to see if we're
3685 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3686 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3687 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3688 * limit, then we fail.
3689 */
3690
3691 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3692 size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit ||
3693 size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount)
3694 return KERN_RESOURCE_SHORTAGE;
3695
3696 /*
3697 * The first time the user wires an entry, we also increment the wired_count and add this to
3698 * the total that has been wired in the map.
3699 */
3700
3701 if (entry->wired_count >= MAX_WIRE_COUNT)
3702 return KERN_FAILURE;
3703
3704 entry->wired_count++;
3705 map->user_wire_size += size;
3706 }
3707
3708 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3709 return KERN_FAILURE;
3710
3711 entry->user_wired_count++;
3712
3713 } else {
3714
3715 /*
3716 * The kernel's wiring the memory. Just bump the count and continue.
3717 */
3718
3719 if (entry->wired_count >= MAX_WIRE_COUNT)
3720 panic("vm_map_wire: too many wirings");
3721
3722 entry->wired_count++;
3723 }
3724
3725 return KERN_SUCCESS;
3726 }
3727
3728 /*
3729 * Update the memory wiring accounting now that the given map entry is being unwired.
3730 */
3731
3732 static void
3733 subtract_wire_counts(
3734 vm_map_t map,
3735 vm_map_entry_t entry,
3736 boolean_t user_wire)
3737 {
3738
3739 if (user_wire) {
3740
3741 /*
3742 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3743 */
3744
3745 if (entry->user_wired_count == 1) {
3746
3747 /*
3748 * We're removing the last user wire reference. Decrement the wired_count and the total
3749 * user wired memory for this map.
3750 */
3751
3752 assert(entry->wired_count >= 1);
3753 entry->wired_count--;
3754 map->user_wire_size -= entry->vme_end - entry->vme_start;
3755 }
3756
3757 assert(entry->user_wired_count >= 1);
3758 entry->user_wired_count--;
3759
3760 } else {
3761
3762 /*
3763 * The kernel is unwiring the memory. Just update the count.
3764 */
3765
3766 assert(entry->wired_count >= 1);
3767 entry->wired_count--;
3768 }
3769 }
3770
3771 /*
3772 * vm_map_wire:
3773 *
3774 * Sets the pageability of the specified address range in the
3775 * target map as wired. Regions specified as not pageable require
3776 * locked-down physical memory and physical page maps. The
3777 * access_type variable indicates types of accesses that must not
3778 * generate page faults. This is checked against protection of
3779 * memory being locked-down.
3780 *
3781 * The map must not be locked, but a reference must remain to the
3782 * map throughout the call.
3783 */
3784 static kern_return_t
3785 vm_map_wire_nested(
3786 register vm_map_t map,
3787 register vm_map_offset_t start,
3788 register vm_map_offset_t end,
3789 register vm_prot_t access_type,
3790 boolean_t user_wire,
3791 pmap_t map_pmap,
3792 vm_map_offset_t pmap_addr)
3793 {
3794 register vm_map_entry_t entry;
3795 struct vm_map_entry *first_entry, tmp_entry;
3796 vm_map_t real_map;
3797 register vm_map_offset_t s,e;
3798 kern_return_t rc;
3799 boolean_t need_wakeup;
3800 boolean_t main_map = FALSE;
3801 wait_interrupt_t interruptible_state;
3802 thread_t cur_thread;
3803 unsigned int last_timestamp;
3804 vm_map_size_t size;
3805
3806 vm_map_lock(map);
3807 if(map_pmap == NULL)
3808 main_map = TRUE;
3809 last_timestamp = map->timestamp;
3810
3811 VM_MAP_RANGE_CHECK(map, start, end);
3812 assert(page_aligned(start));
3813 assert(page_aligned(end));
3814 if (start == end) {
3815 /* We wired what the caller asked for, zero pages */
3816 vm_map_unlock(map);
3817 return KERN_SUCCESS;
3818 }
3819
3820 need_wakeup = FALSE;
3821 cur_thread = current_thread();
3822
3823 s = start;
3824 rc = KERN_SUCCESS;
3825
3826 if (vm_map_lookup_entry(map, s, &first_entry)) {
3827 entry = first_entry;
3828 /*
3829 * vm_map_clip_start will be done later.
3830 * We don't want to unnest any nested submaps here !
3831 */
3832 } else {
3833 /* Start address is not in map */
3834 rc = KERN_INVALID_ADDRESS;
3835 goto done;
3836 }
3837
3838 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3839 /*
3840 * At this point, we have wired from "start" to "s".
3841 * We still need to wire from "s" to "end".
3842 *
3843 * "entry" hasn't been clipped, so it could start before "s"
3844 * and/or end after "end".
3845 */
3846
3847 /* "e" is how far we want to wire in this entry */
3848 e = entry->vme_end;
3849 if (e > end)
3850 e = end;
3851
3852 /*
3853 * If another thread is wiring/unwiring this entry then
3854 * block after informing other thread to wake us up.
3855 */
3856 if (entry->in_transition) {
3857 wait_result_t wait_result;
3858
3859 /*
3860 * We have not clipped the entry. Make sure that
3861 * the start address is in range so that the lookup
3862 * below will succeed.
3863 * "s" is the current starting point: we've already
3864 * wired from "start" to "s" and we still have
3865 * to wire from "s" to "end".
3866 */
3867
3868 entry->needs_wakeup = TRUE;
3869
3870 /*
3871 * wake up anybody waiting on entries that we have
3872 * already wired.
3873 */
3874 if (need_wakeup) {
3875 vm_map_entry_wakeup(map);
3876 need_wakeup = FALSE;
3877 }
3878 /*
3879 * User wiring is interruptible
3880 */
3881 wait_result = vm_map_entry_wait(map,
3882 (user_wire) ? THREAD_ABORTSAFE :
3883 THREAD_UNINT);
3884 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3885 /*
3886 * undo the wirings we have done so far
3887 * We do not clear the needs_wakeup flag,
3888 * because we cannot tell if we were the
3889 * only one waiting.
3890 */
3891 rc = KERN_FAILURE;
3892 goto done;
3893 }
3894
3895 /*
3896 * Cannot avoid a lookup here. reset timestamp.
3897 */
3898 last_timestamp = map->timestamp;
3899
3900 /*
3901 * The entry could have been clipped, look it up again.
3902 * Worse that can happen is, it may not exist anymore.
3903 */
3904 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3905 if (!user_wire)
3906 panic("vm_map_wire: re-lookup failed");
3907
3908 /*
3909 * User: undo everything upto the previous
3910 * entry. let vm_map_unwire worry about
3911 * checking the validity of the range.
3912 */
3913 rc = KERN_FAILURE;
3914 goto done;
3915 }
3916 entry = first_entry;
3917 continue;
3918 }
3919
3920 if (entry->is_sub_map) {
3921 vm_map_offset_t sub_start;
3922 vm_map_offset_t sub_end;
3923 vm_map_offset_t local_start;
3924 vm_map_offset_t local_end;
3925 pmap_t pmap;
3926
3927 vm_map_clip_start(map, entry, s);
3928 vm_map_clip_end(map, entry, end);
3929
3930 sub_start = entry->offset;
3931 sub_end = entry->vme_end;
3932 sub_end += entry->offset - entry->vme_start;
3933
3934 local_end = entry->vme_end;
3935 if(map_pmap == NULL) {
3936 vm_object_t object;
3937 vm_object_offset_t offset;
3938 vm_prot_t prot;
3939 boolean_t wired;
3940 vm_map_entry_t local_entry;
3941 vm_map_version_t version;
3942 vm_map_t lookup_map;
3943
3944 if(entry->use_pmap) {
3945 pmap = entry->object.sub_map->pmap;
3946 /* ppc implementation requires that */
3947 /* submaps pmap address ranges line */
3948 /* up with parent map */
3949 #ifdef notdef
3950 pmap_addr = sub_start;
3951 #endif
3952 pmap_addr = s;
3953 } else {
3954 pmap = map->pmap;
3955 pmap_addr = s;
3956 }
3957
3958 if (entry->wired_count) {
3959 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3960 goto done;
3961
3962 /*
3963 * The map was not unlocked:
3964 * no need to goto re-lookup.
3965 * Just go directly to next entry.
3966 */
3967 entry = entry->vme_next;
3968 s = entry->vme_start;
3969 continue;
3970
3971 }
3972
3973 /* call vm_map_lookup_locked to */
3974 /* cause any needs copy to be */
3975 /* evaluated */
3976 local_start = entry->vme_start;
3977 lookup_map = map;
3978 vm_map_lock_write_to_read(map);
3979 if(vm_map_lookup_locked(
3980 &lookup_map, local_start,
3981 access_type,
3982 OBJECT_LOCK_EXCLUSIVE,
3983 &version, &object,
3984 &offset, &prot, &wired,
3985 NULL,
3986 &real_map)) {
3987
3988 vm_map_unlock_read(lookup_map);
3989 vm_map_unwire(map, start,
3990 s, user_wire);
3991 return(KERN_FAILURE);
3992 }
3993 if(real_map != lookup_map)
3994 vm_map_unlock(real_map);
3995 vm_map_unlock_read(lookup_map);
3996 vm_map_lock(map);
3997 vm_object_unlock(object);
3998
3999 /* we unlocked, so must re-lookup */
4000 if (!vm_map_lookup_entry(map,
4001 local_start,
4002 &local_entry)) {
4003 rc = KERN_FAILURE;
4004 goto done;
4005 }
4006
4007 /*
4008 * entry could have been "simplified",
4009 * so re-clip
4010 */
4011 entry = local_entry;
4012 assert(s == local_start);
4013 vm_map_clip_start(map, entry, s);
4014 vm_map_clip_end(map, entry, end);
4015 /* re-compute "e" */
4016 e = entry->vme_end;
4017 if (e > end)
4018 e = end;
4019
4020 /* did we have a change of type? */
4021 if (!entry->is_sub_map) {
4022 last_timestamp = map->timestamp;
4023 continue;
4024 }
4025 } else {
4026 local_start = entry->vme_start;
4027 pmap = map_pmap;
4028 }
4029
4030 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4031 goto done;
4032
4033 entry->in_transition = TRUE;
4034
4035 vm_map_unlock(map);
4036 rc = vm_map_wire_nested(entry->object.sub_map,
4037 sub_start, sub_end,
4038 access_type,
4039 user_wire, pmap, pmap_addr);
4040 vm_map_lock(map);
4041
4042 /*
4043 * Find the entry again. It could have been clipped
4044 * after we unlocked the map.
4045 */
4046 if (!vm_map_lookup_entry(map, local_start,
4047 &first_entry))
4048 panic("vm_map_wire: re-lookup failed");
4049 entry = first_entry;
4050
4051 assert(local_start == s);
4052 /* re-compute "e" */
4053 e = entry->vme_end;
4054 if (e > end)
4055 e = end;
4056
4057 last_timestamp = map->timestamp;
4058 while ((entry != vm_map_to_entry(map)) &&
4059 (entry->vme_start < e)) {
4060 assert(entry->in_transition);
4061 entry->in_transition = FALSE;
4062 if (entry->needs_wakeup) {
4063 entry->needs_wakeup = FALSE;
4064 need_wakeup = TRUE;
4065 }
4066 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4067 subtract_wire_counts(map, entry, user_wire);
4068 }
4069 entry = entry->vme_next;
4070 }
4071 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4072 goto done;
4073 }
4074
4075 /* no need to relookup again */
4076 s = entry->vme_start;
4077 continue;
4078 }
4079
4080 /*
4081 * If this entry is already wired then increment
4082 * the appropriate wire reference count.
4083 */
4084 if (entry->wired_count) {
4085 /*
4086 * entry is already wired down, get our reference
4087 * after clipping to our range.
4088 */
4089 vm_map_clip_start(map, entry, s);
4090 vm_map_clip_end(map, entry, end);
4091
4092 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4093 goto done;
4094
4095 /* map was not unlocked: no need to relookup */
4096 entry = entry->vme_next;
4097 s = entry->vme_start;
4098 continue;
4099 }
4100
4101 /*
4102 * Unwired entry or wire request transmitted via submap
4103 */
4104
4105
4106 /*
4107 * Perform actions of vm_map_lookup that need the write
4108 * lock on the map: create a shadow object for a
4109 * copy-on-write region, or an object for a zero-fill
4110 * region.
4111 */
4112 size = entry->vme_end - entry->vme_start;
4113 /*
4114 * If wiring a copy-on-write page, we need to copy it now
4115 * even if we're only (currently) requesting read access.
4116 * This is aggressive, but once it's wired we can't move it.
4117 */
4118 if (entry->needs_copy) {
4119 vm_object_shadow(&entry->object.vm_object,
4120 &entry->offset, size);
4121 entry->needs_copy = FALSE;
4122 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4123 entry->object.vm_object = vm_object_allocate(size);
4124 entry->offset = (vm_object_offset_t)0;
4125 }
4126
4127 vm_map_clip_start(map, entry, s);
4128 vm_map_clip_end(map, entry, end);
4129
4130 /* re-compute "e" */
4131 e = entry->vme_end;
4132 if (e > end)
4133 e = end;
4134
4135 /*
4136 * Check for holes and protection mismatch.
4137 * Holes: Next entry should be contiguous unless this
4138 * is the end of the region.
4139 * Protection: Access requested must be allowed, unless
4140 * wiring is by protection class
4141 */
4142 if ((entry->vme_end < end) &&
4143 ((entry->vme_next == vm_map_to_entry(map)) ||
4144 (entry->vme_next->vme_start > entry->vme_end))) {
4145 /* found a hole */
4146 rc = KERN_INVALID_ADDRESS;
4147 goto done;
4148 }
4149 if ((entry->protection & access_type) != access_type) {
4150 /* found a protection problem */
4151 rc = KERN_PROTECTION_FAILURE;
4152 goto done;
4153 }
4154
4155 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4156
4157 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4158 goto done;
4159
4160 entry->in_transition = TRUE;
4161
4162 /*
4163 * This entry might get split once we unlock the map.
4164 * In vm_fault_wire(), we need the current range as
4165 * defined by this entry. In order for this to work
4166 * along with a simultaneous clip operation, we make a
4167 * temporary copy of this entry and use that for the
4168 * wiring. Note that the underlying objects do not
4169 * change during a clip.
4170 */
4171 tmp_entry = *entry;
4172
4173 /*
4174 * The in_transition state guarentees that the entry
4175 * (or entries for this range, if split occured) will be
4176 * there when the map lock is acquired for the second time.
4177 */
4178 vm_map_unlock(map);
4179
4180 if (!user_wire && cur_thread != THREAD_NULL)
4181 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4182 else
4183 interruptible_state = THREAD_UNINT;
4184
4185 if(map_pmap)
4186 rc = vm_fault_wire(map,
4187 &tmp_entry, map_pmap, pmap_addr);
4188 else
4189 rc = vm_fault_wire(map,
4190 &tmp_entry, map->pmap,
4191 tmp_entry.vme_start);
4192
4193 if (!user_wire && cur_thread != THREAD_NULL)
4194 thread_interrupt_level(interruptible_state);
4195
4196 vm_map_lock(map);
4197
4198 if (last_timestamp+1 != map->timestamp) {
4199 /*
4200 * Find the entry again. It could have been clipped
4201 * after we unlocked the map.
4202 */
4203 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4204 &first_entry))
4205 panic("vm_map_wire: re-lookup failed");
4206
4207 entry = first_entry;
4208 }
4209
4210 last_timestamp = map->timestamp;
4211
4212 while ((entry != vm_map_to_entry(map)) &&
4213 (entry->vme_start < tmp_entry.vme_end)) {
4214 assert(entry->in_transition);
4215 entry->in_transition = FALSE;
4216 if (entry->needs_wakeup) {
4217 entry->needs_wakeup = FALSE;
4218 need_wakeup = TRUE;
4219 }
4220 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4221 subtract_wire_counts(map, entry, user_wire);
4222 }
4223 entry = entry->vme_next;
4224 }
4225
4226 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4227 goto done;
4228 }
4229
4230 s = entry->vme_start;
4231 } /* end while loop through map entries */
4232
4233 done:
4234 if (rc == KERN_SUCCESS) {
4235 /* repair any damage we may have made to the VM map */
4236 vm_map_simplify_range(map, start, end);
4237 }
4238
4239 vm_map_unlock(map);
4240
4241 /*
4242 * wake up anybody waiting on entries we wired.
4243 */
4244 if (need_wakeup)
4245 vm_map_entry_wakeup(map);
4246
4247 if (rc != KERN_SUCCESS) {
4248 /* undo what has been wired so far */
4249 vm_map_unwire(map, start, s, user_wire);
4250 }
4251
4252 return rc;
4253
4254 }
4255
4256 kern_return_t
4257 vm_map_wire(
4258 register vm_map_t map,
4259 register vm_map_offset_t start,
4260 register vm_map_offset_t end,
4261 register vm_prot_t access_type,
4262 boolean_t user_wire)
4263 {
4264
4265 kern_return_t kret;
4266
4267 #ifdef ppc
4268 /*
4269 * the calls to mapping_prealloc and mapping_relpre
4270 * (along with the VM_MAP_RANGE_CHECK to insure a
4271 * resonable range was passed in) are
4272 * currently necessary because
4273 * we haven't enabled kernel pre-emption
4274 * and/or the pmap_enter cannot purge and re-use
4275 * existing mappings
4276 */
4277 VM_MAP_RANGE_CHECK(map, start, end);
4278 assert((unsigned int) (end - start) == (end - start));
4279 mapping_prealloc((unsigned int) (end - start));
4280 #endif
4281 kret = vm_map_wire_nested(map, start, end, access_type,
4282 user_wire, (pmap_t)NULL, 0);
4283 #ifdef ppc
4284 mapping_relpre();
4285 #endif
4286 return kret;
4287 }
4288
4289 /*
4290 * vm_map_unwire:
4291 *
4292 * Sets the pageability of the specified address range in the target
4293 * as pageable. Regions specified must have been wired previously.
4294 *
4295 * The map must not be locked, but a reference must remain to the map
4296 * throughout the call.
4297 *
4298 * Kernel will panic on failures. User unwire ignores holes and
4299 * unwired and intransition entries to avoid losing memory by leaving
4300 * it unwired.
4301 */
4302 static kern_return_t
4303 vm_map_unwire_nested(
4304 register vm_map_t map,
4305 register vm_map_offset_t start,
4306 register vm_map_offset_t end,
4307 boolean_t user_wire,
4308 pmap_t map_pmap,
4309 vm_map_offset_t pmap_addr)
4310 {
4311 register vm_map_entry_t entry;
4312 struct vm_map_entry *first_entry, tmp_entry;
4313 boolean_t need_wakeup;
4314 boolean_t main_map = FALSE;
4315 unsigned int last_timestamp;
4316
4317 vm_map_lock(map);
4318 if(map_pmap == NULL)
4319 main_map = TRUE;
4320 last_timestamp = map->timestamp;
4321
4322 VM_MAP_RANGE_CHECK(map, start, end);
4323 assert(page_aligned(start));
4324 assert(page_aligned(end));
4325
4326 if (start == end) {
4327 /* We unwired what the caller asked for: zero pages */
4328 vm_map_unlock(map);
4329 return KERN_SUCCESS;
4330 }
4331
4332 if (vm_map_lookup_entry(map, start, &first_entry)) {
4333 entry = first_entry;
4334 /*
4335 * vm_map_clip_start will be done later.
4336 * We don't want to unnest any nested sub maps here !
4337 */
4338 }
4339 else {
4340 if (!user_wire) {
4341 panic("vm_map_unwire: start not found");
4342 }
4343 /* Start address is not in map. */
4344 vm_map_unlock(map);
4345 return(KERN_INVALID_ADDRESS);
4346 }
4347
4348 if (entry->superpage_size) {
4349 /* superpages are always wired */
4350 vm_map_unlock(map);
4351 return KERN_INVALID_ADDRESS;
4352 }
4353
4354 need_wakeup = FALSE;
4355 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4356 if (entry->in_transition) {
4357 /*
4358 * 1)
4359 * Another thread is wiring down this entry. Note
4360 * that if it is not for the other thread we would
4361 * be unwiring an unwired entry. This is not
4362 * permitted. If we wait, we will be unwiring memory
4363 * we did not wire.
4364 *
4365 * 2)
4366 * Another thread is unwiring this entry. We did not
4367 * have a reference to it, because if we did, this
4368 * entry will not be getting unwired now.
4369 */
4370 if (!user_wire) {
4371 /*
4372 * XXX FBDP
4373 * This could happen: there could be some
4374 * overlapping vslock/vsunlock operations
4375 * going on.
4376 * We should probably just wait and retry,
4377 * but then we have to be careful that this
4378 * entry could get "simplified" after
4379 * "in_transition" gets unset and before
4380 * we re-lookup the entry, so we would
4381 * have to re-clip the entry to avoid
4382 * re-unwiring what we have already unwired...
4383 * See vm_map_wire_nested().
4384 *
4385 * Or we could just ignore "in_transition"
4386 * here and proceed to decement the wired
4387 * count(s) on this entry. That should be fine
4388 * as long as "wired_count" doesn't drop all
4389 * the way to 0 (and we should panic if THAT
4390 * happens).
4391 */
4392 panic("vm_map_unwire: in_transition entry");
4393 }
4394
4395 entry = entry->vme_next;
4396 continue;
4397 }
4398
4399 if (entry->is_sub_map) {
4400 vm_map_offset_t sub_start;
4401 vm_map_offset_t sub_end;
4402 vm_map_offset_t local_end;
4403 pmap_t pmap;
4404
4405 vm_map_clip_start(map, entry, start);
4406 vm_map_clip_end(map, entry, end);
4407
4408 sub_start = entry->offset;
4409 sub_end = entry->vme_end - entry->vme_start;
4410 sub_end += entry->offset;
4411 local_end = entry->vme_end;
4412 if(map_pmap == NULL) {
4413 if(entry->use_pmap) {
4414 pmap = entry->object.sub_map->pmap;
4415 pmap_addr = sub_start;
4416 } else {
4417 pmap = map->pmap;
4418 pmap_addr = start;
4419 }
4420 if (entry->wired_count == 0 ||
4421 (user_wire && entry->user_wired_count == 0)) {
4422 if (!user_wire)
4423 panic("vm_map_unwire: entry is unwired");
4424 entry = entry->vme_next;
4425 continue;
4426 }
4427
4428 /*
4429 * Check for holes
4430 * Holes: Next entry should be contiguous unless
4431 * this is the end of the region.
4432 */
4433 if (((entry->vme_end < end) &&
4434 ((entry->vme_next == vm_map_to_entry(map)) ||
4435 (entry->vme_next->vme_start
4436 > entry->vme_end)))) {
4437 if (!user_wire)
4438 panic("vm_map_unwire: non-contiguous region");
4439 /*
4440 entry = entry->vme_next;
4441 continue;
4442 */
4443 }
4444
4445 subtract_wire_counts(map, entry, user_wire);
4446
4447 if (entry->wired_count != 0) {
4448 entry = entry->vme_next;
4449 continue;
4450 }
4451
4452 entry->in_transition = TRUE;
4453 tmp_entry = *entry;/* see comment in vm_map_wire() */
4454
4455 /*
4456 * We can unlock the map now. The in_transition state
4457 * guarantees existance of the entry.
4458 */
4459 vm_map_unlock(map);
4460 vm_map_unwire_nested(entry->object.sub_map,
4461 sub_start, sub_end, user_wire, pmap, pmap_addr);
4462 vm_map_lock(map);
4463
4464 if (last_timestamp+1 != map->timestamp) {
4465 /*
4466 * Find the entry again. It could have been
4467 * clipped or deleted after we unlocked the map.
4468 */
4469 if (!vm_map_lookup_entry(map,
4470 tmp_entry.vme_start,
4471 &first_entry)) {
4472 if (!user_wire)
4473 panic("vm_map_unwire: re-lookup failed");
4474 entry = first_entry->vme_next;
4475 } else
4476 entry = first_entry;
4477 }
4478 last_timestamp = map->timestamp;
4479
4480 /*
4481 * clear transition bit for all constituent entries
4482 * that were in the original entry (saved in
4483 * tmp_entry). Also check for waiters.
4484 */
4485 while ((entry != vm_map_to_entry(map)) &&
4486 (entry->vme_start < tmp_entry.vme_end)) {
4487 assert(entry->in_transition);
4488 entry->in_transition = FALSE;
4489 if (entry->needs_wakeup) {
4490 entry->needs_wakeup = FALSE;
4491 need_wakeup = TRUE;
4492 }
4493 entry = entry->vme_next;
4494 }
4495 continue;
4496 } else {
4497 vm_map_unlock(map);
4498 vm_map_unwire_nested(entry->object.sub_map,
4499 sub_start, sub_end, user_wire, map_pmap,
4500 pmap_addr);
4501 vm_map_lock(map);
4502
4503 if (last_timestamp+1 != map->timestamp) {
4504 /*
4505 * Find the entry again. It could have been
4506 * clipped or deleted after we unlocked the map.
4507 */
4508 if (!vm_map_lookup_entry(map,
4509 tmp_entry.vme_start,
4510 &first_entry)) {
4511 if (!user_wire)
4512 panic("vm_map_unwire: re-lookup failed");
4513 entry = first_entry->vme_next;
4514 } else
4515 entry = first_entry;
4516 }
4517 last_timestamp = map->timestamp;
4518 }
4519 }
4520
4521
4522 if ((entry->wired_count == 0) ||
4523 (user_wire && entry->user_wired_count == 0)) {
4524 if (!user_wire)
4525 panic("vm_map_unwire: entry is unwired");
4526
4527 entry = entry->vme_next;
4528 continue;
4529 }
4530
4531 assert(entry->wired_count > 0 &&
4532 (!user_wire || entry->user_wired_count > 0));
4533
4534 vm_map_clip_start(map, entry, start);
4535 vm_map_clip_end(map, entry, end);
4536
4537 /*
4538 * Check for holes
4539 * Holes: Next entry should be contiguous unless
4540 * this is the end of the region.
4541 */
4542 if (((entry->vme_end < end) &&
4543 ((entry->vme_next == vm_map_to_entry(map)) ||
4544 (entry->vme_next->vme_start > entry->vme_end)))) {
4545
4546 if (!user_wire)
4547 panic("vm_map_unwire: non-contiguous region");
4548 entry = entry->vme_next;
4549 continue;
4550 }
4551
4552 subtract_wire_counts(map, entry, user_wire);
4553
4554 if (entry->wired_count != 0) {
4555 entry = entry->vme_next;
4556 continue;
4557 }
4558
4559 if(entry->zero_wired_pages) {
4560 entry->zero_wired_pages = FALSE;
4561 }
4562
4563 entry->in_transition = TRUE;
4564 tmp_entry = *entry; /* see comment in vm_map_wire() */
4565
4566 /*
4567 * We can unlock the map now. The in_transition state
4568 * guarantees existance of the entry.
4569 */
4570 vm_map_unlock(map);
4571 if(map_pmap) {
4572 vm_fault_unwire(map,
4573 &tmp_entry, FALSE, map_pmap, pmap_addr);
4574 } else {
4575 vm_fault_unwire(map,
4576 &tmp_entry, FALSE, map->pmap,
4577 tmp_entry.vme_start);
4578 }
4579 vm_map_lock(map);
4580
4581 if (last_timestamp+1 != map->timestamp) {
4582 /*
4583 * Find the entry again. It could have been clipped
4584 * or deleted after we unlocked the map.
4585 */
4586 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4587 &first_entry)) {
4588 if (!user_wire)
4589 panic("vm_map_unwire: re-lookup failed");
4590 entry = first_entry->vme_next;
4591 } else
4592 entry = first_entry;
4593 }
4594 last_timestamp = map->timestamp;
4595
4596 /*
4597 * clear transition bit for all constituent entries that
4598 * were in the original entry (saved in tmp_entry). Also
4599 * check for waiters.
4600 */
4601 while ((entry != vm_map_to_entry(map)) &&
4602 (entry->vme_start < tmp_entry.vme_end)) {
4603 assert(entry->in_transition);
4604 entry->in_transition = FALSE;
4605 if (entry->needs_wakeup) {
4606 entry->needs_wakeup = FALSE;
4607 need_wakeup = TRUE;
4608 }
4609 entry = entry->vme_next;
4610 }
4611 }
4612
4613 /*
4614 * We might have fragmented the address space when we wired this
4615 * range of addresses. Attempt to re-coalesce these VM map entries
4616 * with their neighbors now that they're no longer wired.
4617 * Under some circumstances, address space fragmentation can
4618 * prevent VM object shadow chain collapsing, which can cause
4619 * swap space leaks.
4620 */
4621 vm_map_simplify_range(map, start, end);
4622
4623 vm_map_unlock(map);
4624 /*
4625 * wake up anybody waiting on entries that we have unwired.
4626 */
4627 if (need_wakeup)
4628 vm_map_entry_wakeup(map);
4629 return(KERN_SUCCESS);
4630
4631 }
4632
4633 kern_return_t
4634 vm_map_unwire(
4635 register vm_map_t map,
4636 register vm_map_offset_t start,
4637 register vm_map_offset_t end,
4638 boolean_t user_wire)
4639 {
4640 return vm_map_unwire_nested(map, start, end,
4641 user_wire, (pmap_t)NULL, 0);
4642 }
4643
4644
4645 /*
4646 * vm_map_entry_delete: [ internal use only ]
4647 *
4648 * Deallocate the given entry from the target map.
4649 */
4650 static void
4651 vm_map_entry_delete(
4652 register vm_map_t map,
4653 register vm_map_entry_t entry)
4654 {
4655 register vm_map_offset_t s, e;
4656 register vm_object_t object;
4657 register vm_map_t submap;
4658
4659 s = entry->vme_start;
4660 e = entry->vme_end;
4661 assert(page_aligned(s));
4662 assert(page_aligned(e));
4663 assert(entry->wired_count == 0);
4664 assert(entry->user_wired_count == 0);
4665 assert(!entry->permanent);
4666
4667 if (entry->is_sub_map) {
4668 object = NULL;
4669 submap = entry->object.sub_map;
4670 } else {
4671 submap = NULL;
4672 object = entry->object.vm_object;
4673 }
4674
4675 vm_map_entry_unlink(map, entry);
4676 map->size -= e - s;
4677
4678 vm_map_entry_dispose(map, entry);
4679
4680 vm_map_unlock(map);
4681 /*
4682 * Deallocate the object only after removing all
4683 * pmap entries pointing to its pages.
4684 */
4685 if (submap)
4686 vm_map_deallocate(submap);
4687 else
4688 vm_object_deallocate(object);
4689
4690 }
4691
4692 void
4693 vm_map_submap_pmap_clean(
4694 vm_map_t map,
4695 vm_map_offset_t start,
4696 vm_map_offset_t end,
4697 vm_map_t sub_map,
4698 vm_map_offset_t offset)
4699 {
4700 vm_map_offset_t submap_start;
4701 vm_map_offset_t submap_end;
4702 vm_map_size_t remove_size;
4703 vm_map_entry_t entry;
4704
4705 submap_end = offset + (end - start);
4706 submap_start = offset;
4707
4708 vm_map_lock_read(sub_map);
4709 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4710
4711 remove_size = (entry->vme_end - entry->vme_start);
4712 if(offset > entry->vme_start)
4713 remove_size -= offset - entry->vme_start;
4714
4715
4716 if(submap_end < entry->vme_end) {
4717 remove_size -=
4718 entry->vme_end - submap_end;
4719 }
4720 if(entry->is_sub_map) {
4721 vm_map_submap_pmap_clean(
4722 sub_map,
4723 start,
4724 start + remove_size,
4725 entry->object.sub_map,
4726 entry->offset);
4727 } else {
4728
4729 if((map->mapped) && (map->ref_count)
4730 && (entry->object.vm_object != NULL)) {
4731 vm_object_pmap_protect(
4732 entry->object.vm_object,
4733 entry->offset,
4734 remove_size,
4735 PMAP_NULL,
4736 entry->vme_start,
4737 VM_PROT_NONE);
4738 } else {
4739 pmap_remove(map->pmap,
4740 (addr64_t)start,
4741 (addr64_t)(start + remove_size));
4742 }
4743 }
4744 }
4745
4746 entry = entry->vme_next;
4747
4748 while((entry != vm_map_to_entry(sub_map))
4749 && (entry->vme_start < submap_end)) {
4750 remove_size = (entry->vme_end - entry->vme_start);
4751 if(submap_end < entry->vme_end) {
4752 remove_size -= entry->vme_end - submap_end;
4753 }
4754 if(entry->is_sub_map) {
4755 vm_map_submap_pmap_clean(
4756 sub_map,
4757 (start + entry->vme_start) - offset,
4758 ((start + entry->vme_start) - offset) + remove_size,
4759 entry->object.sub_map,
4760 entry->offset);
4761 } else {
4762 if((map->mapped) && (map->ref_count)
4763 && (entry->object.vm_object != NULL)) {
4764 vm_object_pmap_protect(
4765 entry->object.vm_object,
4766 entry->offset,
4767 remove_size,
4768 PMAP_NULL,
4769 entry->vme_start,
4770 VM_PROT_NONE);
4771 } else {
4772 pmap_remove(map->pmap,
4773 (addr64_t)((start + entry->vme_start)
4774 - offset),
4775 (addr64_t)(((start + entry->vme_start)
4776 - offset) + remove_size));
4777 }
4778 }
4779 entry = entry->vme_next;
4780 }
4781 vm_map_unlock_read(sub_map);
4782 return;
4783 }
4784
4785 /*
4786 * vm_map_delete: [ internal use only ]
4787 *
4788 * Deallocates the given address range from the target map.
4789 * Removes all user wirings. Unwires one kernel wiring if
4790 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4791 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4792 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4793 *
4794 * This routine is called with map locked and leaves map locked.
4795 */
4796 static kern_return_t
4797 vm_map_delete(
4798 vm_map_t map,
4799 vm_map_offset_t start,
4800 vm_map_offset_t end,
4801 int flags,
4802 vm_map_t zap_map)
4803 {
4804 vm_map_entry_t entry, next;
4805 struct vm_map_entry *first_entry, tmp_entry;
4806 register vm_map_offset_t s;
4807 register vm_object_t object;
4808 boolean_t need_wakeup;
4809 unsigned int last_timestamp = ~0; /* unlikely value */
4810 int interruptible;
4811
4812 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4813 THREAD_ABORTSAFE : THREAD_UNINT;
4814
4815 /*
4816 * All our DMA I/O operations in IOKit are currently done by
4817 * wiring through the map entries of the task requesting the I/O.
4818 * Because of this, we must always wait for kernel wirings
4819 * to go away on the entries before deleting them.
4820 *
4821 * Any caller who wants to actually remove a kernel wiring
4822 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4823 * properly remove one wiring instead of blasting through
4824 * them all.
4825 */
4826 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4827
4828 while(1) {
4829 /*
4830 * Find the start of the region, and clip it
4831 */
4832 if (vm_map_lookup_entry(map, start, &first_entry)) {
4833 entry = first_entry;
4834 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4835 start = SUPERPAGE_ROUND_DOWN(start);
4836 continue;
4837 }
4838 if (start == entry->vme_start) {
4839 /*
4840 * No need to clip. We don't want to cause
4841 * any unnecessary unnesting in this case...
4842 */
4843 } else {
4844 vm_map_clip_start(map, entry, start);
4845 }
4846
4847 /*
4848 * Fix the lookup hint now, rather than each
4849 * time through the loop.
4850 */
4851 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4852 } else {
4853 entry = first_entry->vme_next;
4854 }
4855 break;
4856 }
4857 if (entry->superpage_size)
4858 end = SUPERPAGE_ROUND_UP(end);
4859
4860 need_wakeup = FALSE;
4861 /*
4862 * Step through all entries in this region
4863 */
4864 s = entry->vme_start;
4865 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4866 /*
4867 * At this point, we have deleted all the memory entries
4868 * between "start" and "s". We still need to delete
4869 * all memory entries between "s" and "end".
4870 * While we were blocked and the map was unlocked, some
4871 * new memory entries could have been re-allocated between
4872 * "start" and "s" and we don't want to mess with those.
4873 * Some of those entries could even have been re-assembled
4874 * with an entry after "s" (in vm_map_simplify_entry()), so
4875 * we may have to vm_map_clip_start() again.
4876 */
4877
4878 if (entry->vme_start >= s) {
4879 /*
4880 * This entry starts on or after "s"
4881 * so no need to clip its start.
4882 */
4883 } else {
4884 /*
4885 * This entry has been re-assembled by a
4886 * vm_map_simplify_entry(). We need to
4887 * re-clip its start.
4888 */
4889 vm_map_clip_start(map, entry, s);
4890 }
4891 if (entry->vme_end <= end) {
4892 /*
4893 * This entry is going away completely, so no need
4894 * to clip and possibly cause an unnecessary unnesting.
4895 */
4896 } else {
4897 vm_map_clip_end(map, entry, end);
4898 }
4899
4900 if (entry->permanent) {
4901 panic("attempt to remove permanent VM map entry "
4902 "%p [0x%llx:0x%llx]\n",
4903 entry, (uint64_t) s, (uint64_t) end);
4904 }
4905
4906
4907 if (entry->in_transition) {
4908 wait_result_t wait_result;
4909
4910 /*
4911 * Another thread is wiring/unwiring this entry.
4912 * Let the other thread know we are waiting.
4913 */
4914 assert(s == entry->vme_start);
4915 entry->needs_wakeup = TRUE;
4916
4917 /*
4918 * wake up anybody waiting on entries that we have
4919 * already unwired/deleted.
4920 */
4921 if (need_wakeup) {
4922 vm_map_entry_wakeup(map);
4923 need_wakeup = FALSE;
4924 }
4925
4926 wait_result = vm_map_entry_wait(map, interruptible);
4927
4928 if (interruptible &&
4929 wait_result == THREAD_INTERRUPTED) {
4930 /*
4931 * We do not clear the needs_wakeup flag,
4932 * since we cannot tell if we were the only one.
4933 */
4934 vm_map_unlock(map);
4935 return KERN_ABORTED;
4936 }
4937
4938 /*
4939 * The entry could have been clipped or it
4940 * may not exist anymore. Look it up again.
4941 */
4942 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4943 assert((map != kernel_map) &&
4944 (!entry->is_sub_map));
4945 /*
4946 * User: use the next entry
4947 */
4948 entry = first_entry->vme_next;
4949 s = entry->vme_start;
4950 } else {
4951 entry = first_entry;
4952 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4953 }
4954 last_timestamp = map->timestamp;
4955 continue;
4956 } /* end in_transition */
4957
4958 if (entry->wired_count) {
4959 boolean_t user_wire;
4960
4961 user_wire = entry->user_wired_count > 0;
4962
4963 /*
4964 * Remove a kernel wiring if requested
4965 */
4966 if (flags & VM_MAP_REMOVE_KUNWIRE) {
4967 entry->wired_count--;
4968 }
4969
4970 /*
4971 * Remove all user wirings for proper accounting
4972 */
4973 if (entry->user_wired_count > 0) {
4974 while (entry->user_wired_count)
4975 subtract_wire_counts(map, entry, user_wire);
4976 }
4977
4978 if (entry->wired_count != 0) {
4979 assert(map != kernel_map);
4980 /*
4981 * Cannot continue. Typical case is when
4982 * a user thread has physical io pending on
4983 * on this page. Either wait for the
4984 * kernel wiring to go away or return an
4985 * error.
4986 */
4987 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4988 wait_result_t wait_result;
4989
4990 assert(s == entry->vme_start);
4991 entry->needs_wakeup = TRUE;
4992 wait_result = vm_map_entry_wait(map,
4993 interruptible);
4994
4995 if (interruptible &&
4996 wait_result == THREAD_INTERRUPTED) {
4997 /*
4998 * We do not clear the
4999 * needs_wakeup flag, since we
5000 * cannot tell if we were the
5001 * only one.
5002 */
5003 vm_map_unlock(map);
5004 return KERN_ABORTED;
5005 }
5006
5007 /*
5008 * The entry could have been clipped or
5009 * it may not exist anymore. Look it
5010 * up again.
5011 */
5012 if (!vm_map_lookup_entry(map, s,
5013 &first_entry)) {
5014 assert(map != kernel_map);
5015 /*
5016 * User: use the next entry
5017 */
5018 entry = first_entry->vme_next;
5019 s = entry->vme_start;
5020 } else {
5021 entry = first_entry;
5022 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5023 }
5024 last_timestamp = map->timestamp;
5025 continue;
5026 }
5027 else {
5028 return KERN_FAILURE;
5029 }
5030 }
5031
5032 entry->in_transition = TRUE;
5033 /*
5034 * copy current entry. see comment in vm_map_wire()
5035 */
5036 tmp_entry = *entry;
5037 assert(s == entry->vme_start);
5038
5039 /*
5040 * We can unlock the map now. The in_transition
5041 * state guarentees existance of the entry.
5042 */
5043 vm_map_unlock(map);
5044
5045 if (tmp_entry.is_sub_map) {
5046 vm_map_t sub_map;
5047 vm_map_offset_t sub_start, sub_end;
5048 pmap_t pmap;
5049 vm_map_offset_t pmap_addr;
5050
5051
5052 sub_map = tmp_entry.object.sub_map;
5053 sub_start = tmp_entry.offset;
5054 sub_end = sub_start + (tmp_entry.vme_end -
5055 tmp_entry.vme_start);
5056 if (tmp_entry.use_pmap) {
5057 pmap = sub_map->pmap;
5058 pmap_addr = tmp_entry.vme_start;
5059 } else {
5060 pmap = map->pmap;
5061 pmap_addr = tmp_entry.vme_start;
5062 }
5063 (void) vm_map_unwire_nested(sub_map,
5064 sub_start, sub_end,
5065 user_wire,
5066 pmap, pmap_addr);
5067 } else {
5068
5069 vm_fault_unwire(map, &tmp_entry,
5070 tmp_entry.object.vm_object == kernel_object,
5071 map->pmap, tmp_entry.vme_start);
5072 }
5073
5074 vm_map_lock(map);
5075
5076 if (last_timestamp+1 != map->timestamp) {
5077 /*
5078 * Find the entry again. It could have
5079 * been clipped after we unlocked the map.
5080 */
5081 if (!vm_map_lookup_entry(map, s, &first_entry)){
5082 assert((map != kernel_map) &&
5083 (!entry->is_sub_map));
5084 first_entry = first_entry->vme_next;
5085 s = first_entry->vme_start;
5086 } else {
5087 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5088 }
5089 } else {
5090 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5091 first_entry = entry;
5092 }
5093
5094 last_timestamp = map->timestamp;
5095
5096 entry = first_entry;
5097 while ((entry != vm_map_to_entry(map)) &&
5098 (entry->vme_start < tmp_entry.vme_end)) {
5099 assert(entry->in_transition);
5100 entry->in_transition = FALSE;
5101 if (entry->needs_wakeup) {
5102 entry->needs_wakeup = FALSE;
5103 need_wakeup = TRUE;
5104 }
5105 entry = entry->vme_next;
5106 }
5107 /*
5108 * We have unwired the entry(s). Go back and
5109 * delete them.
5110 */
5111 entry = first_entry;
5112 continue;
5113 }
5114
5115 /* entry is unwired */
5116 assert(entry->wired_count == 0);
5117 assert(entry->user_wired_count == 0);
5118
5119 assert(s == entry->vme_start);
5120
5121 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5122 /*
5123 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5124 * vm_map_delete(), some map entries might have been
5125 * transferred to a "zap_map", which doesn't have a
5126 * pmap. The original pmap has already been flushed
5127 * in the vm_map_delete() call targeting the original
5128 * map, but when we get to destroying the "zap_map",
5129 * we don't have any pmap to flush, so let's just skip
5130 * all this.
5131 */
5132 } else if (entry->is_sub_map) {
5133 if (entry->use_pmap) {
5134 #ifndef NO_NESTED_PMAP
5135 pmap_unnest(map->pmap,
5136 (addr64_t)entry->vme_start,
5137 entry->vme_end - entry->vme_start);
5138 #endif /* NO_NESTED_PMAP */
5139 if ((map->mapped) && (map->ref_count)) {
5140 /* clean up parent map/maps */
5141 vm_map_submap_pmap_clean(
5142 map, entry->vme_start,
5143 entry->vme_end,
5144 entry->object.sub_map,
5145 entry->offset);
5146 }
5147 } else {
5148 vm_map_submap_pmap_clean(
5149 map, entry->vme_start, entry->vme_end,
5150 entry->object.sub_map,
5151 entry->offset);
5152 }
5153 } else if (entry->object.vm_object != kernel_object) {
5154 object = entry->object.vm_object;
5155 if((map->mapped) && (map->ref_count)) {
5156 vm_object_pmap_protect(
5157 object, entry->offset,
5158 entry->vme_end - entry->vme_start,
5159 PMAP_NULL,
5160 entry->vme_start,
5161 VM_PROT_NONE);
5162 } else {
5163 pmap_remove(map->pmap,
5164 (addr64_t)entry->vme_start,
5165 (addr64_t)entry->vme_end);
5166 }
5167 }
5168
5169 /*
5170 * All pmap mappings for this map entry must have been
5171 * cleared by now.
5172 */
5173 assert(vm_map_pmap_is_empty(map,
5174 entry->vme_start,
5175 entry->vme_end));
5176
5177 next = entry->vme_next;
5178 s = next->vme_start;
5179 last_timestamp = map->timestamp;
5180
5181 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5182 zap_map != VM_MAP_NULL) {
5183 vm_map_size_t entry_size;
5184 /*
5185 * The caller wants to save the affected VM map entries
5186 * into the "zap_map". The caller will take care of
5187 * these entries.
5188 */
5189 /* unlink the entry from "map" ... */
5190 vm_map_entry_unlink(map, entry);
5191 /* ... and add it to the end of the "zap_map" */
5192 vm_map_entry_link(zap_map,
5193 vm_map_last_entry(zap_map),
5194 entry);
5195 entry_size = entry->vme_end - entry->vme_start;
5196 map->size -= entry_size;
5197 zap_map->size += entry_size;
5198 /* we didn't unlock the map, so no timestamp increase */
5199 last_timestamp--;
5200 } else {
5201 vm_map_entry_delete(map, entry);
5202 /* vm_map_entry_delete unlocks the map */
5203 vm_map_lock(map);
5204 }
5205
5206 entry = next;
5207
5208 if(entry == vm_map_to_entry(map)) {
5209 break;
5210 }
5211 if (last_timestamp+1 != map->timestamp) {
5212 /*
5213 * we are responsible for deleting everything
5214 * from the give space, if someone has interfered
5215 * we pick up where we left off, back fills should
5216 * be all right for anyone except map_delete and
5217 * we have to assume that the task has been fully
5218 * disabled before we get here
5219 */
5220 if (!vm_map_lookup_entry(map, s, &entry)){
5221 entry = entry->vme_next;
5222 s = entry->vme_start;
5223 } else {
5224 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5225 }
5226 /*
5227 * others can not only allocate behind us, we can
5228 * also see coalesce while we don't have the map lock
5229 */
5230 if(entry == vm_map_to_entry(map)) {
5231 break;
5232 }
5233 }
5234 last_timestamp = map->timestamp;
5235 }
5236
5237 if (map->wait_for_space)
5238 thread_wakeup((event_t) map);
5239 /*
5240 * wake up anybody waiting on entries that we have already deleted.
5241 */
5242 if (need_wakeup)
5243 vm_map_entry_wakeup(map);
5244
5245 return KERN_SUCCESS;
5246 }
5247
5248 /*
5249 * vm_map_remove:
5250 *
5251 * Remove the given address range from the target map.
5252 * This is the exported form of vm_map_delete.
5253 */
5254 kern_return_t
5255 vm_map_remove(
5256 register vm_map_t map,
5257 register vm_map_offset_t start,
5258 register vm_map_offset_t end,
5259 register boolean_t flags)
5260 {
5261 register kern_return_t result;
5262
5263 vm_map_lock(map);
5264 VM_MAP_RANGE_CHECK(map, start, end);
5265 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5266 vm_map_unlock(map);
5267
5268 return(result);
5269 }
5270
5271
5272 /*
5273 * Routine: vm_map_copy_discard
5274 *
5275 * Description:
5276 * Dispose of a map copy object (returned by
5277 * vm_map_copyin).
5278 */
5279 void
5280 vm_map_copy_discard(
5281 vm_map_copy_t copy)
5282 {
5283 if (copy == VM_MAP_COPY_NULL)
5284 return;
5285
5286 switch (copy->type) {
5287 case VM_MAP_COPY_ENTRY_LIST:
5288 while (vm_map_copy_first_entry(copy) !=
5289 vm_map_copy_to_entry(copy)) {
5290 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5291
5292 vm_map_copy_entry_unlink(copy, entry);
5293 vm_object_deallocate(entry->object.vm_object);
5294 vm_map_copy_entry_dispose(copy, entry);
5295 }
5296 break;
5297 case VM_MAP_COPY_OBJECT:
5298 vm_object_deallocate(copy->cpy_object);
5299 break;
5300 case VM_MAP_COPY_KERNEL_BUFFER:
5301
5302 /*
5303 * The vm_map_copy_t and possibly the data buffer were
5304 * allocated by a single call to kalloc(), i.e. the
5305 * vm_map_copy_t was not allocated out of the zone.
5306 */
5307 kfree(copy, copy->cpy_kalloc_size);
5308 return;
5309 }
5310 zfree(vm_map_copy_zone, copy);
5311 }
5312
5313 /*
5314 * Routine: vm_map_copy_copy
5315 *
5316 * Description:
5317 * Move the information in a map copy object to
5318 * a new map copy object, leaving the old one
5319 * empty.
5320 *
5321 * This is used by kernel routines that need
5322 * to look at out-of-line data (in copyin form)
5323 * before deciding whether to return SUCCESS.
5324 * If the routine returns FAILURE, the original
5325 * copy object will be deallocated; therefore,
5326 * these routines must make a copy of the copy
5327 * object and leave the original empty so that
5328 * deallocation will not fail.
5329 */
5330 vm_map_copy_t
5331 vm_map_copy_copy(
5332 vm_map_copy_t copy)
5333 {
5334 vm_map_copy_t new_copy;
5335
5336 if (copy == VM_MAP_COPY_NULL)
5337 return VM_MAP_COPY_NULL;
5338
5339 /*
5340 * Allocate a new copy object, and copy the information
5341 * from the old one into it.
5342 */
5343
5344 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5345 *new_copy = *copy;
5346
5347 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5348 /*
5349 * The links in the entry chain must be
5350 * changed to point to the new copy object.
5351 */
5352 vm_map_copy_first_entry(copy)->vme_prev
5353 = vm_map_copy_to_entry(new_copy);
5354 vm_map_copy_last_entry(copy)->vme_next
5355 = vm_map_copy_to_entry(new_copy);
5356 }
5357
5358 /*
5359 * Change the old copy object into one that contains
5360 * nothing to be deallocated.
5361 */
5362 copy->type = VM_MAP_COPY_OBJECT;
5363 copy->cpy_object = VM_OBJECT_NULL;
5364
5365 /*
5366 * Return the new object.
5367 */
5368 return new_copy;
5369 }
5370
5371 static kern_return_t
5372 vm_map_overwrite_submap_recurse(
5373 vm_map_t dst_map,
5374 vm_map_offset_t dst_addr,
5375 vm_map_size_t dst_size)
5376 {
5377 vm_map_offset_t dst_end;
5378 vm_map_entry_t tmp_entry;
5379 vm_map_entry_t entry;
5380 kern_return_t result;
5381 boolean_t encountered_sub_map = FALSE;
5382
5383
5384
5385 /*
5386 * Verify that the destination is all writeable
5387 * initially. We have to trunc the destination
5388 * address and round the copy size or we'll end up
5389 * splitting entries in strange ways.
5390 */
5391
5392 dst_end = vm_map_round_page(dst_addr + dst_size);
5393 vm_map_lock(dst_map);
5394
5395 start_pass_1:
5396 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5397 vm_map_unlock(dst_map);
5398 return(KERN_INVALID_ADDRESS);
5399 }
5400
5401 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5402 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5403
5404 for (entry = tmp_entry;;) {
5405 vm_map_entry_t next;
5406
5407 next = entry->vme_next;
5408 while(entry->is_sub_map) {
5409 vm_map_offset_t sub_start;
5410 vm_map_offset_t sub_end;
5411 vm_map_offset_t local_end;
5412
5413 if (entry->in_transition) {
5414 /*
5415 * Say that we are waiting, and wait for entry.
5416 */
5417 entry->needs_wakeup = TRUE;
5418 vm_map_entry_wait(dst_map, THREAD_UNINT);
5419
5420 goto start_pass_1;
5421 }
5422
5423 encountered_sub_map = TRUE;
5424 sub_start = entry->offset;
5425
5426 if(entry->vme_end < dst_end)
5427 sub_end = entry->vme_end;
5428 else
5429 sub_end = dst_end;
5430 sub_end -= entry->vme_start;
5431 sub_end += entry->offset;
5432 local_end = entry->vme_end;
5433 vm_map_unlock(dst_map);
5434
5435 result = vm_map_overwrite_submap_recurse(
5436 entry->object.sub_map,
5437 sub_start,
5438 sub_end - sub_start);
5439
5440 if(result != KERN_SUCCESS)
5441 return result;
5442 if (dst_end <= entry->vme_end)
5443 return KERN_SUCCESS;
5444 vm_map_lock(dst_map);
5445 if(!vm_map_lookup_entry(dst_map, local_end,
5446 &tmp_entry)) {
5447 vm_map_unlock(dst_map);
5448 return(KERN_INVALID_ADDRESS);
5449 }
5450 entry = tmp_entry;
5451 next = entry->vme_next;
5452 }
5453
5454 if ( ! (entry->protection & VM_PROT_WRITE)) {
5455 vm_map_unlock(dst_map);
5456 return(KERN_PROTECTION_FAILURE);
5457 }
5458
5459 /*
5460 * If the entry is in transition, we must wait
5461 * for it to exit that state. Anything could happen
5462 * when we unlock the map, so start over.
5463 */
5464 if (entry->in_transition) {
5465
5466 /*
5467 * Say that we are waiting, and wait for entry.
5468 */
5469 entry->needs_wakeup = TRUE;
5470 vm_map_entry_wait(dst_map, THREAD_UNINT);
5471
5472 goto start_pass_1;
5473 }
5474
5475 /*
5476 * our range is contained completely within this map entry
5477 */
5478 if (dst_end <= entry->vme_end) {
5479 vm_map_unlock(dst_map);
5480 return KERN_SUCCESS;
5481 }
5482 /*
5483 * check that range specified is contiguous region
5484 */
5485 if ((next == vm_map_to_entry(dst_map)) ||
5486 (next->vme_start != entry->vme_end)) {
5487 vm_map_unlock(dst_map);
5488 return(KERN_INVALID_ADDRESS);
5489 }
5490
5491 /*
5492 * Check for permanent objects in the destination.
5493 */
5494 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5495 ((!entry->object.vm_object->internal) ||
5496 (entry->object.vm_object->true_share))) {
5497 if(encountered_sub_map) {
5498 vm_map_unlock(dst_map);
5499 return(KERN_FAILURE);
5500 }
5501 }
5502
5503
5504 entry = next;
5505 }/* for */
5506 vm_map_unlock(dst_map);
5507 return(KERN_SUCCESS);
5508 }
5509
5510 /*
5511 * Routine: vm_map_copy_overwrite
5512 *
5513 * Description:
5514 * Copy the memory described by the map copy
5515 * object (copy; returned by vm_map_copyin) onto
5516 * the specified destination region (dst_map, dst_addr).
5517 * The destination must be writeable.
5518 *
5519 * Unlike vm_map_copyout, this routine actually
5520 * writes over previously-mapped memory. If the
5521 * previous mapping was to a permanent (user-supplied)
5522 * memory object, it is preserved.
5523 *
5524 * The attributes (protection and inheritance) of the
5525 * destination region are preserved.
5526 *
5527 * If successful, consumes the copy object.
5528 * Otherwise, the caller is responsible for it.
5529 *
5530 * Implementation notes:
5531 * To overwrite aligned temporary virtual memory, it is
5532 * sufficient to remove the previous mapping and insert
5533 * the new copy. This replacement is done either on
5534 * the whole region (if no permanent virtual memory
5535 * objects are embedded in the destination region) or
5536 * in individual map entries.
5537 *
5538 * To overwrite permanent virtual memory , it is necessary
5539 * to copy each page, as the external memory management
5540 * interface currently does not provide any optimizations.
5541 *
5542 * Unaligned memory also has to be copied. It is possible
5543 * to use 'vm_trickery' to copy the aligned data. This is
5544 * not done but not hard to implement.
5545 *
5546 * Once a page of permanent memory has been overwritten,
5547 * it is impossible to interrupt this function; otherwise,
5548 * the call would be neither atomic nor location-independent.
5549 * The kernel-state portion of a user thread must be
5550 * interruptible.
5551 *
5552 * It may be expensive to forward all requests that might
5553 * overwrite permanent memory (vm_write, vm_copy) to
5554 * uninterruptible kernel threads. This routine may be
5555 * called by interruptible threads; however, success is
5556 * not guaranteed -- if the request cannot be performed
5557 * atomically and interruptibly, an error indication is
5558 * returned.
5559 */
5560
5561 static kern_return_t
5562 vm_map_copy_overwrite_nested(
5563 vm_map_t dst_map,
5564 vm_map_address_t dst_addr,
5565 vm_map_copy_t copy,
5566 boolean_t interruptible,
5567 pmap_t pmap)
5568 {
5569 vm_map_offset_t dst_end;
5570 vm_map_entry_t tmp_entry;
5571 vm_map_entry_t entry;
5572 kern_return_t kr;
5573 boolean_t aligned = TRUE;
5574 boolean_t contains_permanent_objects = FALSE;
5575 boolean_t encountered_sub_map = FALSE;
5576 vm_map_offset_t base_addr;
5577 vm_map_size_t copy_size;
5578 vm_map_size_t total_size;
5579
5580
5581 /*
5582 * Check for null copy object.
5583 */
5584
5585 if (copy == VM_MAP_COPY_NULL)
5586 return(KERN_SUCCESS);
5587
5588 /*
5589 * Check for special kernel buffer allocated
5590 * by new_ipc_kmsg_copyin.
5591 */
5592
5593 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5594 return(vm_map_copyout_kernel_buffer(
5595 dst_map, &dst_addr,
5596 copy, TRUE));
5597 }
5598
5599 /*
5600 * Only works for entry lists at the moment. Will
5601 * support page lists later.
5602 */
5603
5604 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5605
5606 if (copy->size == 0) {
5607 vm_map_copy_discard(copy);
5608 return(KERN_SUCCESS);
5609 }
5610
5611 /*
5612 * Verify that the destination is all writeable
5613 * initially. We have to trunc the destination
5614 * address and round the copy size or we'll end up
5615 * splitting entries in strange ways.
5616 */
5617
5618 if (!page_aligned(copy->size) ||
5619 !page_aligned (copy->offset) ||
5620 !page_aligned (dst_addr))
5621 {
5622 aligned = FALSE;
5623 dst_end = vm_map_round_page(dst_addr + copy->size);
5624 } else {
5625 dst_end = dst_addr + copy->size;
5626 }
5627
5628 vm_map_lock(dst_map);
5629
5630 /* LP64todo - remove this check when vm_map_commpage64()
5631 * no longer has to stuff in a map_entry for the commpage
5632 * above the map's max_offset.
5633 */
5634 if (dst_addr >= dst_map->max_offset) {
5635 vm_map_unlock(dst_map);
5636 return(KERN_INVALID_ADDRESS);
5637 }
5638
5639 start_pass_1:
5640 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5641 vm_map_unlock(dst_map);
5642 return(KERN_INVALID_ADDRESS);
5643 }
5644 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5645 for (entry = tmp_entry;;) {
5646 vm_map_entry_t next = entry->vme_next;
5647
5648 while(entry->is_sub_map) {
5649 vm_map_offset_t sub_start;
5650 vm_map_offset_t sub_end;
5651 vm_map_offset_t local_end;
5652
5653 if (entry->in_transition) {
5654
5655 /*
5656 * Say that we are waiting, and wait for entry.
5657 */
5658 entry->needs_wakeup = TRUE;
5659 vm_map_entry_wait(dst_map, THREAD_UNINT);
5660
5661 goto start_pass_1;
5662 }
5663
5664 local_end = entry->vme_end;
5665 if (!(entry->needs_copy)) {
5666 /* if needs_copy we are a COW submap */
5667 /* in such a case we just replace so */
5668 /* there is no need for the follow- */
5669 /* ing check. */
5670 encountered_sub_map = TRUE;
5671 sub_start = entry->offset;
5672
5673 if(entry->vme_end < dst_end)
5674 sub_end = entry->vme_end;
5675 else
5676 sub_end = dst_end;
5677 sub_end -= entry->vme_start;
5678 sub_end += entry->offset;
5679 vm_map_unlock(dst_map);
5680
5681 kr = vm_map_overwrite_submap_recurse(
5682 entry->object.sub_map,
5683 sub_start,
5684 sub_end - sub_start);
5685 if(kr != KERN_SUCCESS)
5686 return kr;
5687 vm_map_lock(dst_map);
5688 }
5689
5690 if (dst_end <= entry->vme_end)
5691 goto start_overwrite;
5692 if(!vm_map_lookup_entry(dst_map, local_end,
5693 &entry)) {
5694 vm_map_unlock(dst_map);
5695 return(KERN_INVALID_ADDRESS);
5696 }
5697 next = entry->vme_next;
5698 }
5699
5700 if ( ! (entry->protection & VM_PROT_WRITE)) {
5701 vm_map_unlock(dst_map);
5702 return(KERN_PROTECTION_FAILURE);
5703 }
5704
5705 /*
5706 * If the entry is in transition, we must wait
5707 * for it to exit that state. Anything could happen
5708 * when we unlock the map, so start over.
5709 */
5710 if (entry->in_transition) {
5711
5712 /*
5713 * Say that we are waiting, and wait for entry.
5714 */
5715 entry->needs_wakeup = TRUE;
5716 vm_map_entry_wait(dst_map, THREAD_UNINT);
5717
5718 goto start_pass_1;
5719 }
5720
5721 /*
5722 * our range is contained completely within this map entry
5723 */
5724 if (dst_end <= entry->vme_end)
5725 break;
5726 /*
5727 * check that range specified is contiguous region
5728 */
5729 if ((next == vm_map_to_entry(dst_map)) ||
5730 (next->vme_start != entry->vme_end)) {
5731 vm_map_unlock(dst_map);
5732 return(KERN_INVALID_ADDRESS);
5733 }
5734
5735
5736 /*
5737 * Check for permanent objects in the destination.
5738 */
5739 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5740 ((!entry->object.vm_object->internal) ||
5741 (entry->object.vm_object->true_share))) {
5742 contains_permanent_objects = TRUE;
5743 }
5744
5745 entry = next;
5746 }/* for */
5747
5748 start_overwrite:
5749 /*
5750 * If there are permanent objects in the destination, then
5751 * the copy cannot be interrupted.
5752 */
5753
5754 if (interruptible && contains_permanent_objects) {
5755 vm_map_unlock(dst_map);
5756 return(KERN_FAILURE); /* XXX */
5757 }
5758
5759 /*
5760 *
5761 * Make a second pass, overwriting the data
5762 * At the beginning of each loop iteration,
5763 * the next entry to be overwritten is "tmp_entry"
5764 * (initially, the value returned from the lookup above),
5765 * and the starting address expected in that entry
5766 * is "start".
5767 */
5768
5769 total_size = copy->size;
5770 if(encountered_sub_map) {
5771 copy_size = 0;
5772 /* re-calculate tmp_entry since we've had the map */
5773 /* unlocked */
5774 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5775 vm_map_unlock(dst_map);
5776 return(KERN_INVALID_ADDRESS);
5777 }
5778 } else {
5779 copy_size = copy->size;
5780 }
5781
5782 base_addr = dst_addr;
5783 while(TRUE) {
5784 /* deconstruct the copy object and do in parts */
5785 /* only in sub_map, interruptable case */
5786 vm_map_entry_t copy_entry;
5787 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5788 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5789 int nentries;
5790 int remaining_entries = 0;
5791 vm_map_offset_t new_offset = 0;
5792
5793 for (entry = tmp_entry; copy_size == 0;) {
5794 vm_map_entry_t next;
5795
5796 next = entry->vme_next;
5797
5798 /* tmp_entry and base address are moved along */
5799 /* each time we encounter a sub-map. Otherwise */
5800 /* entry can outpase tmp_entry, and the copy_size */
5801 /* may reflect the distance between them */
5802 /* if the current entry is found to be in transition */
5803 /* we will start over at the beginning or the last */
5804 /* encounter of a submap as dictated by base_addr */
5805 /* we will zero copy_size accordingly. */
5806 if (entry->in_transition) {
5807 /*
5808 * Say that we are waiting, and wait for entry.
5809 */
5810 entry->needs_wakeup = TRUE;
5811 vm_map_entry_wait(dst_map, THREAD_UNINT);
5812
5813 if(!vm_map_lookup_entry(dst_map, base_addr,
5814 &tmp_entry)) {
5815 vm_map_unlock(dst_map);
5816 return(KERN_INVALID_ADDRESS);
5817 }
5818 copy_size = 0;
5819 entry = tmp_entry;
5820 continue;
5821 }
5822 if(entry->is_sub_map) {
5823 vm_map_offset_t sub_start;
5824 vm_map_offset_t sub_end;
5825 vm_map_offset_t local_end;
5826
5827 if (entry->needs_copy) {
5828 /* if this is a COW submap */
5829 /* just back the range with a */
5830 /* anonymous entry */
5831 if(entry->vme_end < dst_end)
5832 sub_end = entry->vme_end;
5833 else
5834 sub_end = dst_end;
5835 if(entry->vme_start < base_addr)
5836 sub_start = base_addr;
5837 else
5838 sub_start = entry->vme_start;
5839 vm_map_clip_end(
5840 dst_map, entry, sub_end);
5841 vm_map_clip_start(
5842 dst_map, entry, sub_start);
5843 assert(!entry->use_pmap);
5844 entry->is_sub_map = FALSE;
5845 vm_map_deallocate(
5846 entry->object.sub_map);
5847 entry->object.sub_map = NULL;
5848 entry->is_shared = FALSE;
5849 entry->needs_copy = FALSE;
5850 entry->offset = 0;
5851 /*
5852 * XXX FBDP
5853 * We should propagate the protections
5854 * of the submap entry here instead
5855 * of forcing them to VM_PROT_ALL...
5856 * Or better yet, we should inherit
5857 * the protection of the copy_entry.
5858 */
5859 entry->protection = VM_PROT_ALL;
5860 entry->max_protection = VM_PROT_ALL;
5861 entry->wired_count = 0;
5862 entry->user_wired_count = 0;
5863 if(entry->inheritance
5864 == VM_INHERIT_SHARE)
5865 entry->inheritance = VM_INHERIT_COPY;
5866 continue;
5867 }
5868 /* first take care of any non-sub_map */
5869 /* entries to send */
5870 if(base_addr < entry->vme_start) {
5871 /* stuff to send */
5872 copy_size =
5873 entry->vme_start - base_addr;
5874 break;
5875 }
5876 sub_start = entry->offset;
5877
5878 if(entry->vme_end < dst_end)
5879 sub_end = entry->vme_end;
5880 else
5881 sub_end = dst_end;
5882 sub_end -= entry->vme_start;
5883 sub_end += entry->offset;
5884 local_end = entry->vme_end;
5885 vm_map_unlock(dst_map);
5886 copy_size = sub_end - sub_start;
5887
5888 /* adjust the copy object */
5889 if (total_size > copy_size) {
5890 vm_map_size_t local_size = 0;
5891 vm_map_size_t entry_size;
5892
5893 nentries = 1;
5894 new_offset = copy->offset;
5895 copy_entry = vm_map_copy_first_entry(copy);
5896 while(copy_entry !=
5897 vm_map_copy_to_entry(copy)){
5898 entry_size = copy_entry->vme_end -
5899 copy_entry->vme_start;
5900 if((local_size < copy_size) &&
5901 ((local_size + entry_size)
5902 >= copy_size)) {
5903 vm_map_copy_clip_end(copy,
5904 copy_entry,
5905 copy_entry->vme_start +
5906 (copy_size - local_size));
5907 entry_size = copy_entry->vme_end -
5908 copy_entry->vme_start;
5909 local_size += entry_size;
5910 new_offset += entry_size;
5911 }
5912 if(local_size >= copy_size) {
5913 next_copy = copy_entry->vme_next;
5914 copy_entry->vme_next =
5915 vm_map_copy_to_entry(copy);
5916 previous_prev =
5917 copy->cpy_hdr.links.prev;
5918 copy->cpy_hdr.links.prev = copy_entry;
5919 copy->size = copy_size;
5920 remaining_entries =
5921 copy->cpy_hdr.nentries;
5922 remaining_entries -= nentries;
5923 copy->cpy_hdr.nentries = nentries;
5924 break;
5925 } else {
5926 local_size += entry_size;
5927 new_offset += entry_size;
5928 nentries++;
5929 }
5930 copy_entry = copy_entry->vme_next;
5931 }
5932 }
5933
5934 if((entry->use_pmap) && (pmap == NULL)) {
5935 kr = vm_map_copy_overwrite_nested(
5936 entry->object.sub_map,
5937 sub_start,
5938 copy,
5939 interruptible,
5940 entry->object.sub_map->pmap);
5941 } else if (pmap != NULL) {
5942 kr = vm_map_copy_overwrite_nested(
5943 entry->object.sub_map,
5944 sub_start,
5945 copy,
5946 interruptible, pmap);
5947 } else {
5948 kr = vm_map_copy_overwrite_nested(
5949 entry->object.sub_map,
5950 sub_start,
5951 copy,
5952 interruptible,
5953 dst_map->pmap);
5954 }
5955 if(kr != KERN_SUCCESS) {
5956 if(next_copy != NULL) {
5957 copy->cpy_hdr.nentries +=
5958 remaining_entries;
5959 copy->cpy_hdr.links.prev->vme_next =
5960 next_copy;
5961 copy->cpy_hdr.links.prev
5962 = previous_prev;
5963 copy->size = total_size;
5964 }
5965 return kr;
5966 }
5967 if (dst_end <= local_end) {
5968 return(KERN_SUCCESS);
5969 }
5970 /* otherwise copy no longer exists, it was */
5971 /* destroyed after successful copy_overwrite */
5972 copy = (vm_map_copy_t)
5973 zalloc(vm_map_copy_zone);
5974 vm_map_copy_first_entry(copy) =
5975 vm_map_copy_last_entry(copy) =
5976 vm_map_copy_to_entry(copy);
5977 copy->type = VM_MAP_COPY_ENTRY_LIST;
5978 copy->offset = new_offset;
5979
5980 total_size -= copy_size;
5981 copy_size = 0;
5982 /* put back remainder of copy in container */
5983 if(next_copy != NULL) {
5984 copy->cpy_hdr.nentries = remaining_entries;
5985 copy->cpy_hdr.links.next = next_copy;
5986 copy->cpy_hdr.links.prev = previous_prev;
5987 copy->size = total_size;
5988 next_copy->vme_prev =
5989 vm_map_copy_to_entry(copy);
5990 next_copy = NULL;
5991 }
5992 base_addr = local_end;
5993 vm_map_lock(dst_map);
5994 if(!vm_map_lookup_entry(dst_map,
5995 local_end, &tmp_entry)) {
5996 vm_map_unlock(dst_map);
5997 return(KERN_INVALID_ADDRESS);
5998 }
5999 entry = tmp_entry;
6000 continue;
6001 }
6002 if (dst_end <= entry->vme_end) {
6003 copy_size = dst_end - base_addr;
6004 break;
6005 }
6006
6007 if ((next == vm_map_to_entry(dst_map)) ||
6008 (next->vme_start != entry->vme_end)) {
6009 vm_map_unlock(dst_map);
6010 return(KERN_INVALID_ADDRESS);
6011 }
6012
6013 entry = next;
6014 }/* for */
6015
6016 next_copy = NULL;
6017 nentries = 1;
6018
6019 /* adjust the copy object */
6020 if (total_size > copy_size) {
6021 vm_map_size_t local_size = 0;
6022 vm_map_size_t entry_size;
6023
6024 new_offset = copy->offset;
6025 copy_entry = vm_map_copy_first_entry(copy);
6026 while(copy_entry != vm_map_copy_to_entry(copy)) {
6027 entry_size = copy_entry->vme_end -
6028 copy_entry->vme_start;
6029 if((local_size < copy_size) &&
6030 ((local_size + entry_size)
6031 >= copy_size)) {
6032 vm_map_copy_clip_end(copy, copy_entry,
6033 copy_entry->vme_start +
6034 (copy_size - local_size));
6035 entry_size = copy_entry->vme_end -
6036 copy_entry->vme_start;
6037 local_size += entry_size;
6038 new_offset += entry_size;
6039 }
6040 if(local_size >= copy_size) {
6041 next_copy = copy_entry->vme_next;
6042 copy_entry->vme_next =
6043 vm_map_copy_to_entry(copy);
6044 previous_prev =
6045 copy->cpy_hdr.links.prev;
6046 copy->cpy_hdr.links.prev = copy_entry;
6047 copy->size = copy_size;
6048 remaining_entries =
6049 copy->cpy_hdr.nentries;
6050 remaining_entries -= nentries;
6051 copy->cpy_hdr.nentries = nentries;
6052 break;
6053 } else {
6054 local_size += entry_size;
6055 new_offset += entry_size;
6056 nentries++;
6057 }
6058 copy_entry = copy_entry->vme_next;
6059 }
6060 }
6061
6062 if (aligned) {
6063 pmap_t local_pmap;
6064
6065 if(pmap)
6066 local_pmap = pmap;
6067 else
6068 local_pmap = dst_map->pmap;
6069
6070 if ((kr = vm_map_copy_overwrite_aligned(
6071 dst_map, tmp_entry, copy,
6072 base_addr, local_pmap)) != KERN_SUCCESS) {
6073 if(next_copy != NULL) {
6074 copy->cpy_hdr.nentries +=
6075 remaining_entries;
6076 copy->cpy_hdr.links.prev->vme_next =
6077 next_copy;
6078 copy->cpy_hdr.links.prev =
6079 previous_prev;
6080 copy->size += copy_size;
6081 }
6082 return kr;
6083 }
6084 vm_map_unlock(dst_map);
6085 } else {
6086 /*
6087 * Performance gain:
6088 *
6089 * if the copy and dst address are misaligned but the same
6090 * offset within the page we can copy_not_aligned the
6091 * misaligned parts and copy aligned the rest. If they are
6092 * aligned but len is unaligned we simply need to copy
6093 * the end bit unaligned. We'll need to split the misaligned
6094 * bits of the region in this case !
6095 */
6096 /* ALWAYS UNLOCKS THE dst_map MAP */
6097 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
6098 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6099 if(next_copy != NULL) {
6100 copy->cpy_hdr.nentries +=
6101 remaining_entries;
6102 copy->cpy_hdr.links.prev->vme_next =
6103 next_copy;
6104 copy->cpy_hdr.links.prev =
6105 previous_prev;
6106 copy->size += copy_size;
6107 }
6108 return kr;
6109 }
6110 }
6111 total_size -= copy_size;
6112 if(total_size == 0)
6113 break;
6114 base_addr += copy_size;
6115 copy_size = 0;
6116 copy->offset = new_offset;
6117 if(next_copy != NULL) {
6118 copy->cpy_hdr.nentries = remaining_entries;
6119 copy->cpy_hdr.links.next = next_copy;
6120 copy->cpy_hdr.links.prev = previous_prev;
6121 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6122 copy->size = total_size;
6123 }
6124 vm_map_lock(dst_map);
6125 while(TRUE) {
6126 if (!vm_map_lookup_entry(dst_map,
6127 base_addr, &tmp_entry)) {
6128 vm_map_unlock(dst_map);
6129 return(KERN_INVALID_ADDRESS);
6130 }
6131 if (tmp_entry->in_transition) {
6132 entry->needs_wakeup = TRUE;
6133 vm_map_entry_wait(dst_map, THREAD_UNINT);
6134 } else {
6135 break;
6136 }
6137 }
6138 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6139
6140 entry = tmp_entry;
6141 } /* while */
6142
6143 /*
6144 * Throw away the vm_map_copy object
6145 */
6146 vm_map_copy_discard(copy);
6147
6148 return(KERN_SUCCESS);
6149 }/* vm_map_copy_overwrite */
6150
6151 kern_return_t
6152 vm_map_copy_overwrite(
6153 vm_map_t dst_map,
6154 vm_map_offset_t dst_addr,
6155 vm_map_copy_t copy,
6156 boolean_t interruptible)
6157 {
6158 return vm_map_copy_overwrite_nested(
6159 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
6160 }
6161
6162
6163 /*
6164 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6165 *
6166 * Decription:
6167 * Physically copy unaligned data
6168 *
6169 * Implementation:
6170 * Unaligned parts of pages have to be physically copied. We use
6171 * a modified form of vm_fault_copy (which understands none-aligned
6172 * page offsets and sizes) to do the copy. We attempt to copy as
6173 * much memory in one go as possibly, however vm_fault_copy copies
6174 * within 1 memory object so we have to find the smaller of "amount left"
6175 * "source object data size" and "target object data size". With
6176 * unaligned data we don't need to split regions, therefore the source
6177 * (copy) object should be one map entry, the target range may be split
6178 * over multiple map entries however. In any event we are pessimistic
6179 * about these assumptions.
6180 *
6181 * Assumptions:
6182 * dst_map is locked on entry and is return locked on success,
6183 * unlocked on error.
6184 */
6185
6186 static kern_return_t
6187 vm_map_copy_overwrite_unaligned(
6188 vm_map_t dst_map,
6189 vm_map_entry_t entry,
6190 vm_map_copy_t copy,
6191 vm_map_offset_t start)
6192 {
6193 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6194 vm_map_version_t version;
6195 vm_object_t dst_object;
6196 vm_object_offset_t dst_offset;
6197 vm_object_offset_t src_offset;
6198 vm_object_offset_t entry_offset;
6199 vm_map_offset_t entry_end;
6200 vm_map_size_t src_size,
6201 dst_size,
6202 copy_size,
6203 amount_left;
6204 kern_return_t kr = KERN_SUCCESS;
6205
6206 vm_map_lock_write_to_read(dst_map);
6207
6208 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6209 amount_left = copy->size;
6210 /*
6211 * unaligned so we never clipped this entry, we need the offset into
6212 * the vm_object not just the data.
6213 */
6214 while (amount_left > 0) {
6215
6216 if (entry == vm_map_to_entry(dst_map)) {
6217 vm_map_unlock_read(dst_map);
6218 return KERN_INVALID_ADDRESS;
6219 }
6220
6221 /* "start" must be within the current map entry */
6222 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6223
6224 dst_offset = start - entry->vme_start;
6225
6226 dst_size = entry->vme_end - start;
6227
6228 src_size = copy_entry->vme_end -
6229 (copy_entry->vme_start + src_offset);
6230
6231 if (dst_size < src_size) {
6232 /*
6233 * we can only copy dst_size bytes before
6234 * we have to get the next destination entry
6235 */
6236 copy_size = dst_size;
6237 } else {
6238 /*
6239 * we can only copy src_size bytes before
6240 * we have to get the next source copy entry
6241 */
6242 copy_size = src_size;
6243 }
6244
6245 if (copy_size > amount_left) {
6246 copy_size = amount_left;
6247 }
6248 /*
6249 * Entry needs copy, create a shadow shadow object for
6250 * Copy on write region.
6251 */
6252 if (entry->needs_copy &&
6253 ((entry->protection & VM_PROT_WRITE) != 0))
6254 {
6255 if (vm_map_lock_read_to_write(dst_map)) {
6256 vm_map_lock_read(dst_map);
6257 goto RetryLookup;
6258 }
6259 vm_object_shadow(&entry->object.vm_object,
6260 &entry->offset,
6261 (vm_map_size_t)(entry->vme_end
6262 - entry->vme_start));
6263 entry->needs_copy = FALSE;
6264 vm_map_lock_write_to_read(dst_map);
6265 }
6266 dst_object = entry->object.vm_object;
6267 /*
6268 * unlike with the virtual (aligned) copy we're going
6269 * to fault on it therefore we need a target object.
6270 */
6271 if (dst_object == VM_OBJECT_NULL) {
6272 if (vm_map_lock_read_to_write(dst_map)) {
6273 vm_map_lock_read(dst_map);
6274 goto RetryLookup;
6275 }
6276 dst_object = vm_object_allocate((vm_map_size_t)
6277 entry->vme_end - entry->vme_start);
6278 entry->object.vm_object = dst_object;
6279 entry->offset = 0;
6280 vm_map_lock_write_to_read(dst_map);
6281 }
6282 /*
6283 * Take an object reference and unlock map. The "entry" may
6284 * disappear or change when the map is unlocked.
6285 */
6286 vm_object_reference(dst_object);
6287 version.main_timestamp = dst_map->timestamp;
6288 entry_offset = entry->offset;
6289 entry_end = entry->vme_end;
6290 vm_map_unlock_read(dst_map);
6291 /*
6292 * Copy as much as possible in one pass
6293 */
6294 kr = vm_fault_copy(
6295 copy_entry->object.vm_object,
6296 copy_entry->offset + src_offset,
6297 &copy_size,
6298 dst_object,
6299 entry_offset + dst_offset,
6300 dst_map,
6301 &version,
6302 THREAD_UNINT );
6303
6304 start += copy_size;
6305 src_offset += copy_size;
6306 amount_left -= copy_size;
6307 /*
6308 * Release the object reference
6309 */
6310 vm_object_deallocate(dst_object);
6311 /*
6312 * If a hard error occurred, return it now
6313 */
6314 if (kr != KERN_SUCCESS)
6315 return kr;
6316
6317 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6318 || amount_left == 0)
6319 {
6320 /*
6321 * all done with this copy entry, dispose.
6322 */
6323 vm_map_copy_entry_unlink(copy, copy_entry);
6324 vm_object_deallocate(copy_entry->object.vm_object);
6325 vm_map_copy_entry_dispose(copy, copy_entry);
6326
6327 if ((copy_entry = vm_map_copy_first_entry(copy))
6328 == vm_map_copy_to_entry(copy) && amount_left) {
6329 /*
6330 * not finished copying but run out of source
6331 */
6332 return KERN_INVALID_ADDRESS;
6333 }
6334 src_offset = 0;
6335 }
6336
6337 if (amount_left == 0)
6338 return KERN_SUCCESS;
6339
6340 vm_map_lock_read(dst_map);
6341 if (version.main_timestamp == dst_map->timestamp) {
6342 if (start == entry_end) {
6343 /*
6344 * destination region is split. Use the version
6345 * information to avoid a lookup in the normal
6346 * case.
6347 */
6348 entry = entry->vme_next;
6349 /*
6350 * should be contiguous. Fail if we encounter
6351 * a hole in the destination.
6352 */
6353 if (start != entry->vme_start) {
6354 vm_map_unlock_read(dst_map);
6355 return KERN_INVALID_ADDRESS ;
6356 }
6357 }
6358 } else {
6359 /*
6360 * Map version check failed.
6361 * we must lookup the entry because somebody
6362 * might have changed the map behind our backs.
6363 */
6364 RetryLookup:
6365 if (!vm_map_lookup_entry(dst_map, start, &entry))
6366 {
6367 vm_map_unlock_read(dst_map);
6368 return KERN_INVALID_ADDRESS ;
6369 }
6370 }
6371 }/* while */
6372
6373 return KERN_SUCCESS;
6374 }/* vm_map_copy_overwrite_unaligned */
6375
6376 /*
6377 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6378 *
6379 * Description:
6380 * Does all the vm_trickery possible for whole pages.
6381 *
6382 * Implementation:
6383 *
6384 * If there are no permanent objects in the destination,
6385 * and the source and destination map entry zones match,
6386 * and the destination map entry is not shared,
6387 * then the map entries can be deleted and replaced
6388 * with those from the copy. The following code is the
6389 * basic idea of what to do, but there are lots of annoying
6390 * little details about getting protection and inheritance
6391 * right. Should add protection, inheritance, and sharing checks
6392 * to the above pass and make sure that no wiring is involved.
6393 */
6394
6395 static kern_return_t
6396 vm_map_copy_overwrite_aligned(
6397 vm_map_t dst_map,
6398 vm_map_entry_t tmp_entry,
6399 vm_map_copy_t copy,
6400 vm_map_offset_t start,
6401 __unused pmap_t pmap)
6402 {
6403 vm_object_t object;
6404 vm_map_entry_t copy_entry;
6405 vm_map_size_t copy_size;
6406 vm_map_size_t size;
6407 vm_map_entry_t entry;
6408
6409 while ((copy_entry = vm_map_copy_first_entry(copy))
6410 != vm_map_copy_to_entry(copy))
6411 {
6412 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6413
6414 entry = tmp_entry;
6415 assert(!entry->use_pmap); /* unnested when clipped earlier */
6416 if (entry == vm_map_to_entry(dst_map)) {
6417 vm_map_unlock(dst_map);
6418 return KERN_INVALID_ADDRESS;
6419 }
6420 size = (entry->vme_end - entry->vme_start);
6421 /*
6422 * Make sure that no holes popped up in the
6423 * address map, and that the protection is
6424 * still valid, in case the map was unlocked
6425 * earlier.
6426 */
6427
6428 if ((entry->vme_start != start) || ((entry->is_sub_map)
6429 && !entry->needs_copy)) {
6430 vm_map_unlock(dst_map);
6431 return(KERN_INVALID_ADDRESS);
6432 }
6433 assert(entry != vm_map_to_entry(dst_map));
6434
6435 /*
6436 * Check protection again
6437 */
6438
6439 if ( ! (entry->protection & VM_PROT_WRITE)) {
6440 vm_map_unlock(dst_map);
6441 return(KERN_PROTECTION_FAILURE);
6442 }
6443
6444 /*
6445 * Adjust to source size first
6446 */
6447
6448 if (copy_size < size) {
6449 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6450 size = copy_size;
6451 }
6452
6453 /*
6454 * Adjust to destination size
6455 */
6456
6457 if (size < copy_size) {
6458 vm_map_copy_clip_end(copy, copy_entry,
6459 copy_entry->vme_start + size);
6460 copy_size = size;
6461 }
6462
6463 assert((entry->vme_end - entry->vme_start) == size);
6464 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6465 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6466
6467 /*
6468 * If the destination contains temporary unshared memory,
6469 * we can perform the copy by throwing it away and
6470 * installing the source data.
6471 */
6472
6473 object = entry->object.vm_object;
6474 if ((!entry->is_shared &&
6475 ((object == VM_OBJECT_NULL) ||
6476 (object->internal && !object->true_share))) ||
6477 entry->needs_copy) {
6478 vm_object_t old_object = entry->object.vm_object;
6479 vm_object_offset_t old_offset = entry->offset;
6480 vm_object_offset_t offset;
6481
6482 /*
6483 * Ensure that the source and destination aren't
6484 * identical
6485 */
6486 if (old_object == copy_entry->object.vm_object &&
6487 old_offset == copy_entry->offset) {
6488 vm_map_copy_entry_unlink(copy, copy_entry);
6489 vm_map_copy_entry_dispose(copy, copy_entry);
6490
6491 if (old_object != VM_OBJECT_NULL)
6492 vm_object_deallocate(old_object);
6493
6494 start = tmp_entry->vme_end;
6495 tmp_entry = tmp_entry->vme_next;
6496 continue;
6497 }
6498
6499 if (old_object != VM_OBJECT_NULL) {
6500 if(entry->is_sub_map) {
6501 if(entry->use_pmap) {
6502 #ifndef NO_NESTED_PMAP
6503 pmap_unnest(dst_map->pmap,
6504 (addr64_t)entry->vme_start,
6505 entry->vme_end - entry->vme_start);
6506 #endif /* NO_NESTED_PMAP */
6507 if(dst_map->mapped) {
6508 /* clean up parent */
6509 /* map/maps */
6510 vm_map_submap_pmap_clean(
6511 dst_map, entry->vme_start,
6512 entry->vme_end,
6513 entry->object.sub_map,
6514 entry->offset);
6515 }
6516 } else {
6517 vm_map_submap_pmap_clean(
6518 dst_map, entry->vme_start,
6519 entry->vme_end,
6520 entry->object.sub_map,
6521 entry->offset);
6522 }
6523 vm_map_deallocate(
6524 entry->object.sub_map);
6525 } else {
6526 if(dst_map->mapped) {
6527 vm_object_pmap_protect(
6528 entry->object.vm_object,
6529 entry->offset,
6530 entry->vme_end
6531 - entry->vme_start,
6532 PMAP_NULL,
6533 entry->vme_start,
6534 VM_PROT_NONE);
6535 } else {
6536 pmap_remove(dst_map->pmap,
6537 (addr64_t)(entry->vme_start),
6538 (addr64_t)(entry->vme_end));
6539 }
6540 vm_object_deallocate(old_object);
6541 }
6542 }
6543
6544 entry->is_sub_map = FALSE;
6545 entry->object = copy_entry->object;
6546 object = entry->object.vm_object;
6547 entry->needs_copy = copy_entry->needs_copy;
6548 entry->wired_count = 0;
6549 entry->user_wired_count = 0;
6550 offset = entry->offset = copy_entry->offset;
6551
6552 vm_map_copy_entry_unlink(copy, copy_entry);
6553 vm_map_copy_entry_dispose(copy, copy_entry);
6554
6555 /*
6556 * we could try to push pages into the pmap at this point, BUT
6557 * this optimization only saved on average 2 us per page if ALL
6558 * the pages in the source were currently mapped
6559 * and ALL the pages in the dest were touched, if there were fewer
6560 * than 2/3 of the pages touched, this optimization actually cost more cycles
6561 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6562 */
6563
6564 /*
6565 * Set up for the next iteration. The map
6566 * has not been unlocked, so the next
6567 * address should be at the end of this
6568 * entry, and the next map entry should be
6569 * the one following it.
6570 */
6571
6572 start = tmp_entry->vme_end;
6573 tmp_entry = tmp_entry->vme_next;
6574 } else {
6575 vm_map_version_t version;
6576 vm_object_t dst_object = entry->object.vm_object;
6577 vm_object_offset_t dst_offset = entry->offset;
6578 kern_return_t r;
6579
6580 /*
6581 * Take an object reference, and record
6582 * the map version information so that the
6583 * map can be safely unlocked.
6584 */
6585
6586 vm_object_reference(dst_object);
6587
6588 /* account for unlock bumping up timestamp */
6589 version.main_timestamp = dst_map->timestamp + 1;
6590
6591 vm_map_unlock(dst_map);
6592
6593 /*
6594 * Copy as much as possible in one pass
6595 */
6596
6597 copy_size = size;
6598 r = vm_fault_copy(
6599 copy_entry->object.vm_object,
6600 copy_entry->offset,
6601 &copy_size,
6602 dst_object,
6603 dst_offset,
6604 dst_map,
6605 &version,
6606 THREAD_UNINT );
6607
6608 /*
6609 * Release the object reference
6610 */
6611
6612 vm_object_deallocate(dst_object);
6613
6614 /*
6615 * If a hard error occurred, return it now
6616 */
6617
6618 if (r != KERN_SUCCESS)
6619 return(r);
6620
6621 if (copy_size != 0) {
6622 /*
6623 * Dispose of the copied region
6624 */
6625
6626 vm_map_copy_clip_end(copy, copy_entry,
6627 copy_entry->vme_start + copy_size);
6628 vm_map_copy_entry_unlink(copy, copy_entry);
6629 vm_object_deallocate(copy_entry->object.vm_object);
6630 vm_map_copy_entry_dispose(copy, copy_entry);
6631 }
6632
6633 /*
6634 * Pick up in the destination map where we left off.
6635 *
6636 * Use the version information to avoid a lookup
6637 * in the normal case.
6638 */
6639
6640 start += copy_size;
6641 vm_map_lock(dst_map);
6642 if (version.main_timestamp == dst_map->timestamp) {
6643 /* We can safely use saved tmp_entry value */
6644
6645 vm_map_clip_end(dst_map, tmp_entry, start);
6646 tmp_entry = tmp_entry->vme_next;
6647 } else {
6648 /* Must do lookup of tmp_entry */
6649
6650 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6651 vm_map_unlock(dst_map);
6652 return(KERN_INVALID_ADDRESS);
6653 }
6654 vm_map_clip_start(dst_map, tmp_entry, start);
6655 }
6656 }
6657 }/* while */
6658
6659 return(KERN_SUCCESS);
6660 }/* vm_map_copy_overwrite_aligned */
6661
6662 /*
6663 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6664 *
6665 * Description:
6666 * Copy in data to a kernel buffer from space in the
6667 * source map. The original space may be optionally
6668 * deallocated.
6669 *
6670 * If successful, returns a new copy object.
6671 */
6672 static kern_return_t
6673 vm_map_copyin_kernel_buffer(
6674 vm_map_t src_map,
6675 vm_map_offset_t src_addr,
6676 vm_map_size_t len,
6677 boolean_t src_destroy,
6678 vm_map_copy_t *copy_result)
6679 {
6680 kern_return_t kr;
6681 vm_map_copy_t copy;
6682 vm_size_t kalloc_size;
6683
6684 if ((vm_size_t) len != len) {
6685 /* "len" is too big and doesn't fit in a "vm_size_t" */
6686 return KERN_RESOURCE_SHORTAGE;
6687 }
6688 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6689 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6690
6691 copy = (vm_map_copy_t) kalloc(kalloc_size);
6692 if (copy == VM_MAP_COPY_NULL) {
6693 return KERN_RESOURCE_SHORTAGE;
6694 }
6695 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6696 copy->size = len;
6697 copy->offset = 0;
6698 copy->cpy_kdata = (void *) (copy + 1);
6699 copy->cpy_kalloc_size = kalloc_size;
6700
6701 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6702 if (kr != KERN_SUCCESS) {
6703 kfree(copy, kalloc_size);
6704 return kr;
6705 }
6706 if (src_destroy) {
6707 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6708 vm_map_round_page(src_addr + len),
6709 VM_MAP_REMOVE_INTERRUPTIBLE |
6710 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6711 (src_map == kernel_map) ?
6712 VM_MAP_REMOVE_KUNWIRE : 0);
6713 }
6714 *copy_result = copy;
6715 return KERN_SUCCESS;
6716 }
6717
6718 /*
6719 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6720 *
6721 * Description:
6722 * Copy out data from a kernel buffer into space in the
6723 * destination map. The space may be otpionally dynamically
6724 * allocated.
6725 *
6726 * If successful, consumes the copy object.
6727 * Otherwise, the caller is responsible for it.
6728 */
6729 static int vm_map_copyout_kernel_buffer_failures = 0;
6730 static kern_return_t
6731 vm_map_copyout_kernel_buffer(
6732 vm_map_t map,
6733 vm_map_address_t *addr, /* IN/OUT */
6734 vm_map_copy_t copy,
6735 boolean_t overwrite)
6736 {
6737 kern_return_t kr = KERN_SUCCESS;
6738 thread_t thread = current_thread();
6739
6740 if (!overwrite) {
6741
6742 /*
6743 * Allocate space in the target map for the data
6744 */
6745 *addr = 0;
6746 kr = vm_map_enter(map,
6747 addr,
6748 vm_map_round_page(copy->size),
6749 (vm_map_offset_t) 0,
6750 VM_FLAGS_ANYWHERE,
6751 VM_OBJECT_NULL,
6752 (vm_object_offset_t) 0,
6753 FALSE,
6754 VM_PROT_DEFAULT,
6755 VM_PROT_ALL,
6756 VM_INHERIT_DEFAULT);
6757 if (kr != KERN_SUCCESS)
6758 return kr;
6759 }
6760
6761 /*
6762 * Copyout the data from the kernel buffer to the target map.
6763 */
6764 if (thread->map == map) {
6765
6766 /*
6767 * If the target map is the current map, just do
6768 * the copy.
6769 */
6770 assert((vm_size_t) copy->size == copy->size);
6771 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6772 kr = KERN_INVALID_ADDRESS;
6773 }
6774 }
6775 else {
6776 vm_map_t oldmap;
6777
6778 /*
6779 * If the target map is another map, assume the
6780 * target's address space identity for the duration
6781 * of the copy.
6782 */
6783 vm_map_reference(map);
6784 oldmap = vm_map_switch(map);
6785
6786 assert((vm_size_t) copy->size == copy->size);
6787 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6788 vm_map_copyout_kernel_buffer_failures++;
6789 kr = KERN_INVALID_ADDRESS;
6790 }
6791
6792 (void) vm_map_switch(oldmap);
6793 vm_map_deallocate(map);
6794 }
6795
6796 if (kr != KERN_SUCCESS) {
6797 /* the copy failed, clean up */
6798 if (!overwrite) {
6799 /*
6800 * Deallocate the space we allocated in the target map.
6801 */
6802 (void) vm_map_remove(map,
6803 vm_map_trunc_page(*addr),
6804 vm_map_round_page(*addr +
6805 vm_map_round_page(copy->size)),
6806 VM_MAP_NO_FLAGS);
6807 *addr = 0;
6808 }
6809 } else {
6810 /* copy was successful, dicard the copy structure */
6811 kfree(copy, copy->cpy_kalloc_size);
6812 }
6813
6814 return kr;
6815 }
6816
6817 /*
6818 * Macro: vm_map_copy_insert
6819 *
6820 * Description:
6821 * Link a copy chain ("copy") into a map at the
6822 * specified location (after "where").
6823 * Side effects:
6824 * The copy chain is destroyed.
6825 * Warning:
6826 * The arguments are evaluated multiple times.
6827 */
6828 #define vm_map_copy_insert(map, where, copy) \
6829 MACRO_BEGIN \
6830 vm_map_t VMCI_map; \
6831 vm_map_entry_t VMCI_where; \
6832 vm_map_copy_t VMCI_copy; \
6833 VMCI_map = (map); \
6834 VMCI_where = (where); \
6835 VMCI_copy = (copy); \
6836 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6837 ->vme_next = (VMCI_where->vme_next); \
6838 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
6839 ->vme_prev = VMCI_where; \
6840 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
6841 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
6842 zfree(vm_map_copy_zone, VMCI_copy); \
6843 MACRO_END
6844
6845 /*
6846 * Routine: vm_map_copyout
6847 *
6848 * Description:
6849 * Copy out a copy chain ("copy") into newly-allocated
6850 * space in the destination map.
6851 *
6852 * If successful, consumes the copy object.
6853 * Otherwise, the caller is responsible for it.
6854 */
6855 kern_return_t
6856 vm_map_copyout(
6857 vm_map_t dst_map,
6858 vm_map_address_t *dst_addr, /* OUT */
6859 vm_map_copy_t copy)
6860 {
6861 vm_map_size_t size;
6862 vm_map_size_t adjustment;
6863 vm_map_offset_t start;
6864 vm_object_offset_t vm_copy_start;
6865 vm_map_entry_t last;
6866 register
6867 vm_map_entry_t entry;
6868
6869 /*
6870 * Check for null copy object.
6871 */
6872
6873 if (copy == VM_MAP_COPY_NULL) {
6874 *dst_addr = 0;
6875 return(KERN_SUCCESS);
6876 }
6877
6878 /*
6879 * Check for special copy object, created
6880 * by vm_map_copyin_object.
6881 */
6882
6883 if (copy->type == VM_MAP_COPY_OBJECT) {
6884 vm_object_t object = copy->cpy_object;
6885 kern_return_t kr;
6886 vm_object_offset_t offset;
6887
6888 offset = vm_object_trunc_page(copy->offset);
6889 size = vm_map_round_page(copy->size +
6890 (vm_map_size_t)(copy->offset - offset));
6891 *dst_addr = 0;
6892 kr = vm_map_enter(dst_map, dst_addr, size,
6893 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6894 object, offset, FALSE,
6895 VM_PROT_DEFAULT, VM_PROT_ALL,
6896 VM_INHERIT_DEFAULT);
6897 if (kr != KERN_SUCCESS)
6898 return(kr);
6899 /* Account for non-pagealigned copy object */
6900 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6901 zfree(vm_map_copy_zone, copy);
6902 return(KERN_SUCCESS);
6903 }
6904
6905 /*
6906 * Check for special kernel buffer allocated
6907 * by new_ipc_kmsg_copyin.
6908 */
6909
6910 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6911 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6912 copy, FALSE));
6913 }
6914
6915 /*
6916 * Find space for the data
6917 */
6918
6919 vm_copy_start = vm_object_trunc_page(copy->offset);
6920 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6921 - vm_copy_start;
6922
6923 StartAgain: ;
6924
6925 vm_map_lock(dst_map);
6926 assert(first_free_is_valid(dst_map));
6927 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6928 vm_map_min(dst_map) : last->vme_end;
6929
6930 while (TRUE) {
6931 vm_map_entry_t next = last->vme_next;
6932 vm_map_offset_t end = start + size;
6933
6934 if ((end > dst_map->max_offset) || (end < start)) {
6935 if (dst_map->wait_for_space) {
6936 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6937 assert_wait((event_t) dst_map,
6938 THREAD_INTERRUPTIBLE);
6939 vm_map_unlock(dst_map);
6940 thread_block(THREAD_CONTINUE_NULL);
6941 goto StartAgain;
6942 }
6943 }
6944 vm_map_unlock(dst_map);
6945 return(KERN_NO_SPACE);
6946 }
6947
6948 if ((next == vm_map_to_entry(dst_map)) ||
6949 (next->vme_start >= end))
6950 break;
6951
6952 last = next;
6953 start = last->vme_end;
6954 }
6955
6956 /*
6957 * Since we're going to just drop the map
6958 * entries from the copy into the destination
6959 * map, they must come from the same pool.
6960 */
6961
6962 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6963 /*
6964 * Mismatches occur when dealing with the default
6965 * pager.
6966 */
6967 zone_t old_zone;
6968 vm_map_entry_t next, new;
6969
6970 /*
6971 * Find the zone that the copies were allocated from
6972 */
6973 old_zone = (copy->cpy_hdr.entries_pageable)
6974 ? vm_map_entry_zone
6975 : vm_map_kentry_zone;
6976 entry = vm_map_copy_first_entry(copy);
6977
6978 /*
6979 * Reinitialize the copy so that vm_map_copy_entry_link
6980 * will work.
6981 */
6982 copy->cpy_hdr.nentries = 0;
6983 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6984 vm_map_copy_first_entry(copy) =
6985 vm_map_copy_last_entry(copy) =
6986 vm_map_copy_to_entry(copy);
6987
6988 /*
6989 * Copy each entry.
6990 */
6991 while (entry != vm_map_copy_to_entry(copy)) {
6992 new = vm_map_copy_entry_create(copy);
6993 vm_map_entry_copy_full(new, entry);
6994 new->use_pmap = FALSE; /* clr address space specifics */
6995 vm_map_copy_entry_link(copy,
6996 vm_map_copy_last_entry(copy),
6997 new);
6998 next = entry->vme_next;
6999 zfree(old_zone, entry);
7000 entry = next;
7001 }
7002 }
7003
7004 /*
7005 * Adjust the addresses in the copy chain, and
7006 * reset the region attributes.
7007 */
7008
7009 adjustment = start - vm_copy_start;
7010 for (entry = vm_map_copy_first_entry(copy);
7011 entry != vm_map_copy_to_entry(copy);
7012 entry = entry->vme_next) {
7013 entry->vme_start += adjustment;
7014 entry->vme_end += adjustment;
7015
7016 entry->inheritance = VM_INHERIT_DEFAULT;
7017 entry->protection = VM_PROT_DEFAULT;
7018 entry->max_protection = VM_PROT_ALL;
7019 entry->behavior = VM_BEHAVIOR_DEFAULT;
7020
7021 /*
7022 * If the entry is now wired,
7023 * map the pages into the destination map.
7024 */
7025 if (entry->wired_count != 0) {
7026 register vm_map_offset_t va;
7027 vm_object_offset_t offset;
7028 register vm_object_t object;
7029 vm_prot_t prot;
7030 int type_of_fault;
7031
7032 object = entry->object.vm_object;
7033 offset = entry->offset;
7034 va = entry->vme_start;
7035
7036 pmap_pageable(dst_map->pmap,
7037 entry->vme_start,
7038 entry->vme_end,
7039 TRUE);
7040
7041 while (va < entry->vme_end) {
7042 register vm_page_t m;
7043
7044 /*
7045 * Look up the page in the object.
7046 * Assert that the page will be found in the
7047 * top object:
7048 * either
7049 * the object was newly created by
7050 * vm_object_copy_slowly, and has
7051 * copies of all of the pages from
7052 * the source object
7053 * or
7054 * the object was moved from the old
7055 * map entry; because the old map
7056 * entry was wired, all of the pages
7057 * were in the top-level object.
7058 * (XXX not true if we wire pages for
7059 * reading)
7060 */
7061 vm_object_lock(object);
7062
7063 m = vm_page_lookup(object, offset);
7064 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7065 m->absent)
7066 panic("vm_map_copyout: wiring %p", m);
7067
7068 /*
7069 * ENCRYPTED SWAP:
7070 * The page is assumed to be wired here, so it
7071 * shouldn't be encrypted. Otherwise, we
7072 * couldn't enter it in the page table, since
7073 * we don't want the user to see the encrypted
7074 * data.
7075 */
7076 ASSERT_PAGE_DECRYPTED(m);
7077
7078 prot = entry->protection;
7079
7080 if (override_nx(dst_map, entry->alias) && prot)
7081 prot |= VM_PROT_EXECUTE;
7082
7083 type_of_fault = DBG_CACHE_HIT_FAULT;
7084
7085 vm_fault_enter(m, dst_map->pmap, va, prot,
7086 VM_PAGE_WIRED(m), FALSE, FALSE,
7087 &type_of_fault);
7088
7089 vm_object_unlock(object);
7090
7091 offset += PAGE_SIZE_64;
7092 va += PAGE_SIZE;
7093 }
7094 }
7095 }
7096
7097 /*
7098 * Correct the page alignment for the result
7099 */
7100
7101 *dst_addr = start + (copy->offset - vm_copy_start);
7102
7103 /*
7104 * Update the hints and the map size
7105 */
7106
7107 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7108
7109 dst_map->size += size;
7110
7111 /*
7112 * Link in the copy
7113 */
7114
7115 vm_map_copy_insert(dst_map, last, copy);
7116
7117 vm_map_unlock(dst_map);
7118
7119 /*
7120 * XXX If wiring_required, call vm_map_pageable
7121 */
7122
7123 return(KERN_SUCCESS);
7124 }
7125
7126 /*
7127 * Routine: vm_map_copyin
7128 *
7129 * Description:
7130 * see vm_map_copyin_common. Exported via Unsupported.exports.
7131 *
7132 */
7133
7134 #undef vm_map_copyin
7135
7136 kern_return_t
7137 vm_map_copyin(
7138 vm_map_t src_map,
7139 vm_map_address_t src_addr,
7140 vm_map_size_t len,
7141 boolean_t src_destroy,
7142 vm_map_copy_t *copy_result) /* OUT */
7143 {
7144 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7145 FALSE, copy_result, FALSE));
7146 }
7147
7148 /*
7149 * Routine: vm_map_copyin_common
7150 *
7151 * Description:
7152 * Copy the specified region (src_addr, len) from the
7153 * source address space (src_map), possibly removing
7154 * the region from the source address space (src_destroy).
7155 *
7156 * Returns:
7157 * A vm_map_copy_t object (copy_result), suitable for
7158 * insertion into another address space (using vm_map_copyout),
7159 * copying over another address space region (using
7160 * vm_map_copy_overwrite). If the copy is unused, it
7161 * should be destroyed (using vm_map_copy_discard).
7162 *
7163 * In/out conditions:
7164 * The source map should not be locked on entry.
7165 */
7166
7167 typedef struct submap_map {
7168 vm_map_t parent_map;
7169 vm_map_offset_t base_start;
7170 vm_map_offset_t base_end;
7171 vm_map_size_t base_len;
7172 struct submap_map *next;
7173 } submap_map_t;
7174
7175 kern_return_t
7176 vm_map_copyin_common(
7177 vm_map_t src_map,
7178 vm_map_address_t src_addr,
7179 vm_map_size_t len,
7180 boolean_t src_destroy,
7181 __unused boolean_t src_volatile,
7182 vm_map_copy_t *copy_result, /* OUT */
7183 boolean_t use_maxprot)
7184 {
7185 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7186 * in multi-level lookup, this
7187 * entry contains the actual
7188 * vm_object/offset.
7189 */
7190 register
7191 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7192
7193 vm_map_offset_t src_start; /* Start of current entry --
7194 * where copy is taking place now
7195 */
7196 vm_map_offset_t src_end; /* End of entire region to be
7197 * copied */
7198 vm_map_offset_t src_base;
7199 vm_map_t base_map = src_map;
7200 boolean_t map_share=FALSE;
7201 submap_map_t *parent_maps = NULL;
7202
7203 register
7204 vm_map_copy_t copy; /* Resulting copy */
7205 vm_map_address_t copy_addr;
7206
7207 /*
7208 * Check for copies of zero bytes.
7209 */
7210
7211 if (len == 0) {
7212 *copy_result = VM_MAP_COPY_NULL;
7213 return(KERN_SUCCESS);
7214 }
7215
7216 /*
7217 * Check that the end address doesn't overflow
7218 */
7219 src_end = src_addr + len;
7220 if (src_end < src_addr)
7221 return KERN_INVALID_ADDRESS;
7222
7223 /*
7224 * If the copy is sufficiently small, use a kernel buffer instead
7225 * of making a virtual copy. The theory being that the cost of
7226 * setting up VM (and taking C-O-W faults) dominates the copy costs
7227 * for small regions.
7228 */
7229 if ((len < msg_ool_size_small) && !use_maxprot)
7230 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7231 src_destroy, copy_result);
7232
7233 /*
7234 * Compute (page aligned) start and end of region
7235 */
7236 src_start = vm_map_trunc_page(src_addr);
7237 src_end = vm_map_round_page(src_end);
7238
7239 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7240
7241 /*
7242 * Allocate a header element for the list.
7243 *
7244 * Use the start and end in the header to
7245 * remember the endpoints prior to rounding.
7246 */
7247
7248 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7249 vm_map_copy_first_entry(copy) =
7250 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7251 copy->type = VM_MAP_COPY_ENTRY_LIST;
7252 copy->cpy_hdr.nentries = 0;
7253 copy->cpy_hdr.entries_pageable = TRUE;
7254
7255 copy->offset = src_addr;
7256 copy->size = len;
7257
7258 new_entry = vm_map_copy_entry_create(copy);
7259
7260 #define RETURN(x) \
7261 MACRO_BEGIN \
7262 vm_map_unlock(src_map); \
7263 if(src_map != base_map) \
7264 vm_map_deallocate(src_map); \
7265 if (new_entry != VM_MAP_ENTRY_NULL) \
7266 vm_map_copy_entry_dispose(copy,new_entry); \
7267 vm_map_copy_discard(copy); \
7268 { \
7269 submap_map_t *_ptr; \
7270 \
7271 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7272 parent_maps=parent_maps->next; \
7273 if (_ptr->parent_map != base_map) \
7274 vm_map_deallocate(_ptr->parent_map); \
7275 kfree(_ptr, sizeof(submap_map_t)); \
7276 } \
7277 } \
7278 MACRO_RETURN(x); \
7279 MACRO_END
7280
7281 /*
7282 * Find the beginning of the region.
7283 */
7284
7285 vm_map_lock(src_map);
7286
7287 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7288 RETURN(KERN_INVALID_ADDRESS);
7289 if(!tmp_entry->is_sub_map) {
7290 vm_map_clip_start(src_map, tmp_entry, src_start);
7291 }
7292 /* set for later submap fix-up */
7293 copy_addr = src_start;
7294
7295 /*
7296 * Go through entries until we get to the end.
7297 */
7298
7299 while (TRUE) {
7300 register
7301 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
7302 vm_map_size_t src_size; /* Size of source
7303 * map entry (in both
7304 * maps)
7305 */
7306
7307 register
7308 vm_object_t src_object; /* Object to copy */
7309 vm_object_offset_t src_offset;
7310
7311 boolean_t src_needs_copy; /* Should source map
7312 * be made read-only
7313 * for copy-on-write?
7314 */
7315
7316 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7317
7318 boolean_t was_wired; /* Was source wired? */
7319 vm_map_version_t version; /* Version before locks
7320 * dropped to make copy
7321 */
7322 kern_return_t result; /* Return value from
7323 * copy_strategically.
7324 */
7325 while(tmp_entry->is_sub_map) {
7326 vm_map_size_t submap_len;
7327 submap_map_t *ptr;
7328
7329 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7330 ptr->next = parent_maps;
7331 parent_maps = ptr;
7332 ptr->parent_map = src_map;
7333 ptr->base_start = src_start;
7334 ptr->base_end = src_end;
7335 submap_len = tmp_entry->vme_end - src_start;
7336 if(submap_len > (src_end-src_start))
7337 submap_len = src_end-src_start;
7338 ptr->base_len = submap_len;
7339
7340 src_start -= tmp_entry->vme_start;
7341 src_start += tmp_entry->offset;
7342 src_end = src_start + submap_len;
7343 src_map = tmp_entry->object.sub_map;
7344 vm_map_lock(src_map);
7345 /* keep an outstanding reference for all maps in */
7346 /* the parents tree except the base map */
7347 vm_map_reference(src_map);
7348 vm_map_unlock(ptr->parent_map);
7349 if (!vm_map_lookup_entry(
7350 src_map, src_start, &tmp_entry))
7351 RETURN(KERN_INVALID_ADDRESS);
7352 map_share = TRUE;
7353 if(!tmp_entry->is_sub_map)
7354 vm_map_clip_start(src_map, tmp_entry, src_start);
7355 src_entry = tmp_entry;
7356 }
7357 /* we are now in the lowest level submap... */
7358
7359 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7360 (tmp_entry->object.vm_object->phys_contiguous)) {
7361 /* This is not, supported for now.In future */
7362 /* we will need to detect the phys_contig */
7363 /* condition and then upgrade copy_slowly */
7364 /* to do physical copy from the device mem */
7365 /* based object. We can piggy-back off of */
7366 /* the was wired boolean to set-up the */
7367 /* proper handling */
7368 RETURN(KERN_PROTECTION_FAILURE);
7369 }
7370 /*
7371 * Create a new address map entry to hold the result.
7372 * Fill in the fields from the appropriate source entries.
7373 * We must unlock the source map to do this if we need
7374 * to allocate a map entry.
7375 */
7376 if (new_entry == VM_MAP_ENTRY_NULL) {
7377 version.main_timestamp = src_map->timestamp;
7378 vm_map_unlock(src_map);
7379
7380 new_entry = vm_map_copy_entry_create(copy);
7381
7382 vm_map_lock(src_map);
7383 if ((version.main_timestamp + 1) != src_map->timestamp) {
7384 if (!vm_map_lookup_entry(src_map, src_start,
7385 &tmp_entry)) {
7386 RETURN(KERN_INVALID_ADDRESS);
7387 }
7388 if (!tmp_entry->is_sub_map)
7389 vm_map_clip_start(src_map, tmp_entry, src_start);
7390 continue; /* restart w/ new tmp_entry */
7391 }
7392 }
7393
7394 /*
7395 * Verify that the region can be read.
7396 */
7397 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7398 !use_maxprot) ||
7399 (src_entry->max_protection & VM_PROT_READ) == 0)
7400 RETURN(KERN_PROTECTION_FAILURE);
7401
7402 /*
7403 * Clip against the endpoints of the entire region.
7404 */
7405
7406 vm_map_clip_end(src_map, src_entry, src_end);
7407
7408 src_size = src_entry->vme_end - src_start;
7409 src_object = src_entry->object.vm_object;
7410 src_offset = src_entry->offset;
7411 was_wired = (src_entry->wired_count != 0);
7412
7413 vm_map_entry_copy(new_entry, src_entry);
7414 new_entry->use_pmap = FALSE; /* clr address space specifics */
7415
7416 /*
7417 * Attempt non-blocking copy-on-write optimizations.
7418 */
7419
7420 if (src_destroy &&
7421 (src_object == VM_OBJECT_NULL ||
7422 (src_object->internal && !src_object->true_share
7423 && !map_share))) {
7424 /*
7425 * If we are destroying the source, and the object
7426 * is internal, we can move the object reference
7427 * from the source to the copy. The copy is
7428 * copy-on-write only if the source is.
7429 * We make another reference to the object, because
7430 * destroying the source entry will deallocate it.
7431 */
7432 vm_object_reference(src_object);
7433
7434 /*
7435 * Copy is always unwired. vm_map_copy_entry
7436 * set its wired count to zero.
7437 */
7438
7439 goto CopySuccessful;
7440 }
7441
7442
7443 RestartCopy:
7444 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7445 src_object, new_entry, new_entry->object.vm_object,
7446 was_wired, 0);
7447 if ((src_object == VM_OBJECT_NULL ||
7448 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7449 vm_object_copy_quickly(
7450 &new_entry->object.vm_object,
7451 src_offset,
7452 src_size,
7453 &src_needs_copy,
7454 &new_entry_needs_copy)) {
7455
7456 new_entry->needs_copy = new_entry_needs_copy;
7457
7458 /*
7459 * Handle copy-on-write obligations
7460 */
7461
7462 if (src_needs_copy && !tmp_entry->needs_copy) {
7463 vm_prot_t prot;
7464
7465 prot = src_entry->protection & ~VM_PROT_WRITE;
7466
7467 if (override_nx(src_map, src_entry->alias) && prot)
7468 prot |= VM_PROT_EXECUTE;
7469
7470 vm_object_pmap_protect(
7471 src_object,
7472 src_offset,
7473 src_size,
7474 (src_entry->is_shared ?
7475 PMAP_NULL
7476 : src_map->pmap),
7477 src_entry->vme_start,
7478 prot);
7479
7480 tmp_entry->needs_copy = TRUE;
7481 }
7482
7483 /*
7484 * The map has never been unlocked, so it's safe
7485 * to move to the next entry rather than doing
7486 * another lookup.
7487 */
7488
7489 goto CopySuccessful;
7490 }
7491
7492 /*
7493 * Take an object reference, so that we may
7494 * release the map lock(s).
7495 */
7496
7497 assert(src_object != VM_OBJECT_NULL);
7498 vm_object_reference(src_object);
7499
7500 /*
7501 * Record the timestamp for later verification.
7502 * Unlock the map.
7503 */
7504
7505 version.main_timestamp = src_map->timestamp;
7506 vm_map_unlock(src_map); /* Increments timestamp once! */
7507
7508 /*
7509 * Perform the copy
7510 */
7511
7512 if (was_wired) {
7513 CopySlowly:
7514 vm_object_lock(src_object);
7515 result = vm_object_copy_slowly(
7516 src_object,
7517 src_offset,
7518 src_size,
7519 THREAD_UNINT,
7520 &new_entry->object.vm_object);
7521 new_entry->offset = 0;
7522 new_entry->needs_copy = FALSE;
7523
7524 }
7525 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7526 (tmp_entry->is_shared || map_share)) {
7527 vm_object_t new_object;
7528
7529 vm_object_lock_shared(src_object);
7530 new_object = vm_object_copy_delayed(
7531 src_object,
7532 src_offset,
7533 src_size,
7534 TRUE);
7535 if (new_object == VM_OBJECT_NULL)
7536 goto CopySlowly;
7537
7538 new_entry->object.vm_object = new_object;
7539 new_entry->needs_copy = TRUE;
7540 result = KERN_SUCCESS;
7541
7542 } else {
7543 result = vm_object_copy_strategically(src_object,
7544 src_offset,
7545 src_size,
7546 &new_entry->object.vm_object,
7547 &new_entry->offset,
7548 &new_entry_needs_copy);
7549
7550 new_entry->needs_copy = new_entry_needs_copy;
7551 }
7552
7553 if (result != KERN_SUCCESS &&
7554 result != KERN_MEMORY_RESTART_COPY) {
7555 vm_map_lock(src_map);
7556 RETURN(result);
7557 }
7558
7559 /*
7560 * Throw away the extra reference
7561 */
7562
7563 vm_object_deallocate(src_object);
7564
7565 /*
7566 * Verify that the map has not substantially
7567 * changed while the copy was being made.
7568 */
7569
7570 vm_map_lock(src_map);
7571
7572 if ((version.main_timestamp + 1) == src_map->timestamp)
7573 goto VerificationSuccessful;
7574
7575 /*
7576 * Simple version comparison failed.
7577 *
7578 * Retry the lookup and verify that the
7579 * same object/offset are still present.
7580 *
7581 * [Note: a memory manager that colludes with
7582 * the calling task can detect that we have
7583 * cheated. While the map was unlocked, the
7584 * mapping could have been changed and restored.]
7585 */
7586
7587 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7588 RETURN(KERN_INVALID_ADDRESS);
7589 }
7590
7591 src_entry = tmp_entry;
7592 vm_map_clip_start(src_map, src_entry, src_start);
7593
7594 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7595 !use_maxprot) ||
7596 ((src_entry->max_protection & VM_PROT_READ) == 0))
7597 goto VerificationFailed;
7598
7599 if (src_entry->vme_end < new_entry->vme_end)
7600 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7601
7602 if ((src_entry->object.vm_object != src_object) ||
7603 (src_entry->offset != src_offset) ) {
7604
7605 /*
7606 * Verification failed.
7607 *
7608 * Start over with this top-level entry.
7609 */
7610
7611 VerificationFailed: ;
7612
7613 vm_object_deallocate(new_entry->object.vm_object);
7614 tmp_entry = src_entry;
7615 continue;
7616 }
7617
7618 /*
7619 * Verification succeeded.
7620 */
7621
7622 VerificationSuccessful: ;
7623
7624 if (result == KERN_MEMORY_RESTART_COPY)
7625 goto RestartCopy;
7626
7627 /*
7628 * Copy succeeded.
7629 */
7630
7631 CopySuccessful: ;
7632
7633 /*
7634 * Link in the new copy entry.
7635 */
7636
7637 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7638 new_entry);
7639
7640 /*
7641 * Determine whether the entire region
7642 * has been copied.
7643 */
7644 src_base = src_start;
7645 src_start = new_entry->vme_end;
7646 new_entry = VM_MAP_ENTRY_NULL;
7647 while ((src_start >= src_end) && (src_end != 0)) {
7648 if (src_map != base_map) {
7649 submap_map_t *ptr;
7650
7651 ptr = parent_maps;
7652 assert(ptr != NULL);
7653 parent_maps = parent_maps->next;
7654
7655 /* fix up the damage we did in that submap */
7656 vm_map_simplify_range(src_map,
7657 src_base,
7658 src_end);
7659
7660 vm_map_unlock(src_map);
7661 vm_map_deallocate(src_map);
7662 vm_map_lock(ptr->parent_map);
7663 src_map = ptr->parent_map;
7664 src_base = ptr->base_start;
7665 src_start = ptr->base_start + ptr->base_len;
7666 src_end = ptr->base_end;
7667 if ((src_end > src_start) &&
7668 !vm_map_lookup_entry(
7669 src_map, src_start, &tmp_entry))
7670 RETURN(KERN_INVALID_ADDRESS);
7671 kfree(ptr, sizeof(submap_map_t));
7672 if(parent_maps == NULL)
7673 map_share = FALSE;
7674 src_entry = tmp_entry->vme_prev;
7675 } else
7676 break;
7677 }
7678 if ((src_start >= src_end) && (src_end != 0))
7679 break;
7680
7681 /*
7682 * Verify that there are no gaps in the region
7683 */
7684
7685 tmp_entry = src_entry->vme_next;
7686 if ((tmp_entry->vme_start != src_start) ||
7687 (tmp_entry == vm_map_to_entry(src_map)))
7688 RETURN(KERN_INVALID_ADDRESS);
7689 }
7690
7691 /*
7692 * If the source should be destroyed, do it now, since the
7693 * copy was successful.
7694 */
7695 if (src_destroy) {
7696 (void) vm_map_delete(src_map,
7697 vm_map_trunc_page(src_addr),
7698 src_end,
7699 (src_map == kernel_map) ?
7700 VM_MAP_REMOVE_KUNWIRE :
7701 VM_MAP_NO_FLAGS,
7702 VM_MAP_NULL);
7703 } else {
7704 /* fix up the damage we did in the base map */
7705 vm_map_simplify_range(src_map,
7706 vm_map_trunc_page(src_addr),
7707 vm_map_round_page(src_end));
7708 }
7709
7710 vm_map_unlock(src_map);
7711
7712 /* Fix-up start and end points in copy. This is necessary */
7713 /* when the various entries in the copy object were picked */
7714 /* up from different sub-maps */
7715
7716 tmp_entry = vm_map_copy_first_entry(copy);
7717 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7718 tmp_entry->vme_end = copy_addr +
7719 (tmp_entry->vme_end - tmp_entry->vme_start);
7720 tmp_entry->vme_start = copy_addr;
7721 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7722 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7723 }
7724
7725 *copy_result = copy;
7726 return(KERN_SUCCESS);
7727
7728 #undef RETURN
7729 }
7730
7731 /*
7732 * vm_map_copyin_object:
7733 *
7734 * Create a copy object from an object.
7735 * Our caller donates an object reference.
7736 */
7737
7738 kern_return_t
7739 vm_map_copyin_object(
7740 vm_object_t object,
7741 vm_object_offset_t offset, /* offset of region in object */
7742 vm_object_size_t size, /* size of region in object */
7743 vm_map_copy_t *copy_result) /* OUT */
7744 {
7745 vm_map_copy_t copy; /* Resulting copy */
7746
7747 /*
7748 * We drop the object into a special copy object
7749 * that contains the object directly.
7750 */
7751
7752 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7753 copy->type = VM_MAP_COPY_OBJECT;
7754 copy->cpy_object = object;
7755 copy->offset = offset;
7756 copy->size = size;
7757
7758 *copy_result = copy;
7759 return(KERN_SUCCESS);
7760 }
7761
7762 static void
7763 vm_map_fork_share(
7764 vm_map_t old_map,
7765 vm_map_entry_t old_entry,
7766 vm_map_t new_map)
7767 {
7768 vm_object_t object;
7769 vm_map_entry_t new_entry;
7770
7771 /*
7772 * New sharing code. New map entry
7773 * references original object. Internal
7774 * objects use asynchronous copy algorithm for
7775 * future copies. First make sure we have
7776 * the right object. If we need a shadow,
7777 * or someone else already has one, then
7778 * make a new shadow and share it.
7779 */
7780
7781 object = old_entry->object.vm_object;
7782 if (old_entry->is_sub_map) {
7783 assert(old_entry->wired_count == 0);
7784 #ifndef NO_NESTED_PMAP
7785 if(old_entry->use_pmap) {
7786 kern_return_t result;
7787
7788 result = pmap_nest(new_map->pmap,
7789 (old_entry->object.sub_map)->pmap,
7790 (addr64_t)old_entry->vme_start,
7791 (addr64_t)old_entry->vme_start,
7792 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7793 if(result)
7794 panic("vm_map_fork_share: pmap_nest failed!");
7795 }
7796 #endif /* NO_NESTED_PMAP */
7797 } else if (object == VM_OBJECT_NULL) {
7798 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7799 old_entry->vme_start));
7800 old_entry->offset = 0;
7801 old_entry->object.vm_object = object;
7802 assert(!old_entry->needs_copy);
7803 } else if (object->copy_strategy !=
7804 MEMORY_OBJECT_COPY_SYMMETRIC) {
7805
7806 /*
7807 * We are already using an asymmetric
7808 * copy, and therefore we already have
7809 * the right object.
7810 */
7811
7812 assert(! old_entry->needs_copy);
7813 }
7814 else if (old_entry->needs_copy || /* case 1 */
7815 object->shadowed || /* case 2 */
7816 (!object->true_share && /* case 3 */
7817 !old_entry->is_shared &&
7818 (object->size >
7819 (vm_map_size_t)(old_entry->vme_end -
7820 old_entry->vme_start)))) {
7821
7822 /*
7823 * We need to create a shadow.
7824 * There are three cases here.
7825 * In the first case, we need to
7826 * complete a deferred symmetrical
7827 * copy that we participated in.
7828 * In the second and third cases,
7829 * we need to create the shadow so
7830 * that changes that we make to the
7831 * object do not interfere with
7832 * any symmetrical copies which
7833 * have occured (case 2) or which
7834 * might occur (case 3).
7835 *
7836 * The first case is when we had
7837 * deferred shadow object creation
7838 * via the entry->needs_copy mechanism.
7839 * This mechanism only works when
7840 * only one entry points to the source
7841 * object, and we are about to create
7842 * a second entry pointing to the
7843 * same object. The problem is that
7844 * there is no way of mapping from
7845 * an object to the entries pointing
7846 * to it. (Deferred shadow creation
7847 * works with one entry because occurs
7848 * at fault time, and we walk from the
7849 * entry to the object when handling
7850 * the fault.)
7851 *
7852 * The second case is when the object
7853 * to be shared has already been copied
7854 * with a symmetric copy, but we point
7855 * directly to the object without
7856 * needs_copy set in our entry. (This
7857 * can happen because different ranges
7858 * of an object can be pointed to by
7859 * different entries. In particular,
7860 * a single entry pointing to an object
7861 * can be split by a call to vm_inherit,
7862 * which, combined with task_create, can
7863 * result in the different entries
7864 * having different needs_copy values.)
7865 * The shadowed flag in the object allows
7866 * us to detect this case. The problem
7867 * with this case is that if this object
7868 * has or will have shadows, then we
7869 * must not perform an asymmetric copy
7870 * of this object, since such a copy
7871 * allows the object to be changed, which
7872 * will break the previous symmetrical
7873 * copies (which rely upon the object
7874 * not changing). In a sense, the shadowed
7875 * flag says "don't change this object".
7876 * We fix this by creating a shadow
7877 * object for this object, and sharing
7878 * that. This works because we are free
7879 * to change the shadow object (and thus
7880 * to use an asymmetric copy strategy);
7881 * this is also semantically correct,
7882 * since this object is temporary, and
7883 * therefore a copy of the object is
7884 * as good as the object itself. (This
7885 * is not true for permanent objects,
7886 * since the pager needs to see changes,
7887 * which won't happen if the changes
7888 * are made to a copy.)
7889 *
7890 * The third case is when the object
7891 * to be shared has parts sticking
7892 * outside of the entry we're working
7893 * with, and thus may in the future
7894 * be subject to a symmetrical copy.
7895 * (This is a preemptive version of
7896 * case 2.)
7897 */
7898
7899 vm_object_shadow(&old_entry->object.vm_object,
7900 &old_entry->offset,
7901 (vm_map_size_t) (old_entry->vme_end -
7902 old_entry->vme_start));
7903
7904 /*
7905 * If we're making a shadow for other than
7906 * copy on write reasons, then we have
7907 * to remove write permission.
7908 */
7909
7910 if (!old_entry->needs_copy &&
7911 (old_entry->protection & VM_PROT_WRITE)) {
7912 vm_prot_t prot;
7913
7914 prot = old_entry->protection & ~VM_PROT_WRITE;
7915
7916 if (override_nx(old_map, old_entry->alias) && prot)
7917 prot |= VM_PROT_EXECUTE;
7918
7919 if (old_map->mapped) {
7920 vm_object_pmap_protect(
7921 old_entry->object.vm_object,
7922 old_entry->offset,
7923 (old_entry->vme_end -
7924 old_entry->vme_start),
7925 PMAP_NULL,
7926 old_entry->vme_start,
7927 prot);
7928 } else {
7929 pmap_protect(old_map->pmap,
7930 old_entry->vme_start,
7931 old_entry->vme_end,
7932 prot);
7933 }
7934 }
7935
7936 old_entry->needs_copy = FALSE;
7937 object = old_entry->object.vm_object;
7938 }
7939
7940 /*
7941 * If object was using a symmetric copy strategy,
7942 * change its copy strategy to the default
7943 * asymmetric copy strategy, which is copy_delay
7944 * in the non-norma case and copy_call in the
7945 * norma case. Bump the reference count for the
7946 * new entry.
7947 */
7948
7949 if(old_entry->is_sub_map) {
7950 vm_map_lock(old_entry->object.sub_map);
7951 vm_map_reference(old_entry->object.sub_map);
7952 vm_map_unlock(old_entry->object.sub_map);
7953 } else {
7954 vm_object_lock(object);
7955 vm_object_reference_locked(object);
7956 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7957 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7958 }
7959 vm_object_unlock(object);
7960 }
7961
7962 /*
7963 * Clone the entry, using object ref from above.
7964 * Mark both entries as shared.
7965 */
7966
7967 new_entry = vm_map_entry_create(new_map);
7968 vm_map_entry_copy(new_entry, old_entry);
7969 old_entry->is_shared = TRUE;
7970 new_entry->is_shared = TRUE;
7971
7972 /*
7973 * Insert the entry into the new map -- we
7974 * know we're inserting at the end of the new
7975 * map.
7976 */
7977
7978 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7979
7980 /*
7981 * Update the physical map
7982 */
7983
7984 if (old_entry->is_sub_map) {
7985 /* Bill Angell pmap support goes here */
7986 } else {
7987 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7988 old_entry->vme_end - old_entry->vme_start,
7989 old_entry->vme_start);
7990 }
7991 }
7992
7993 static boolean_t
7994 vm_map_fork_copy(
7995 vm_map_t old_map,
7996 vm_map_entry_t *old_entry_p,
7997 vm_map_t new_map)
7998 {
7999 vm_map_entry_t old_entry = *old_entry_p;
8000 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8001 vm_map_offset_t start = old_entry->vme_start;
8002 vm_map_copy_t copy;
8003 vm_map_entry_t last = vm_map_last_entry(new_map);
8004
8005 vm_map_unlock(old_map);
8006 /*
8007 * Use maxprot version of copyin because we
8008 * care about whether this memory can ever
8009 * be accessed, not just whether it's accessible
8010 * right now.
8011 */
8012 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8013 != KERN_SUCCESS) {
8014 /*
8015 * The map might have changed while it
8016 * was unlocked, check it again. Skip
8017 * any blank space or permanently
8018 * unreadable region.
8019 */
8020 vm_map_lock(old_map);
8021 if (!vm_map_lookup_entry(old_map, start, &last) ||
8022 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8023 last = last->vme_next;
8024 }
8025 *old_entry_p = last;
8026
8027 /*
8028 * XXX For some error returns, want to
8029 * XXX skip to the next element. Note
8030 * that INVALID_ADDRESS and
8031 * PROTECTION_FAILURE are handled above.
8032 */
8033
8034 return FALSE;
8035 }
8036
8037 /*
8038 * Insert the copy into the new map
8039 */
8040
8041 vm_map_copy_insert(new_map, last, copy);
8042
8043 /*
8044 * Pick up the traversal at the end of
8045 * the copied region.
8046 */
8047
8048 vm_map_lock(old_map);
8049 start += entry_size;
8050 if (! vm_map_lookup_entry(old_map, start, &last)) {
8051 last = last->vme_next;
8052 } else {
8053 if (last->vme_start == start) {
8054 /*
8055 * No need to clip here and we don't
8056 * want to cause any unnecessary
8057 * unnesting...
8058 */
8059 } else {
8060 vm_map_clip_start(old_map, last, start);
8061 }
8062 }
8063 *old_entry_p = last;
8064
8065 return TRUE;
8066 }
8067
8068 /*
8069 * vm_map_fork:
8070 *
8071 * Create and return a new map based on the old
8072 * map, according to the inheritance values on the
8073 * regions in that map.
8074 *
8075 * The source map must not be locked.
8076 */
8077 vm_map_t
8078 vm_map_fork(
8079 vm_map_t old_map)
8080 {
8081 pmap_t new_pmap;
8082 vm_map_t new_map;
8083 vm_map_entry_t old_entry;
8084 vm_map_size_t new_size = 0, entry_size;
8085 vm_map_entry_t new_entry;
8086 boolean_t src_needs_copy;
8087 boolean_t new_entry_needs_copy;
8088
8089 new_pmap = pmap_create((vm_map_size_t) 0,
8090 #if defined(__i386__) || defined(__x86_64__)
8091 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8092 #else
8093 0
8094 #endif
8095 );
8096 #if defined(__i386__)
8097 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8098 pmap_set_4GB_pagezero(new_pmap);
8099 #endif
8100
8101 vm_map_reference_swap(old_map);
8102 vm_map_lock(old_map);
8103
8104 new_map = vm_map_create(new_pmap,
8105 old_map->min_offset,
8106 old_map->max_offset,
8107 old_map->hdr.entries_pageable);
8108
8109 for (
8110 old_entry = vm_map_first_entry(old_map);
8111 old_entry != vm_map_to_entry(old_map);
8112 ) {
8113
8114 entry_size = old_entry->vme_end - old_entry->vme_start;
8115
8116 switch (old_entry->inheritance) {
8117 case VM_INHERIT_NONE:
8118 break;
8119
8120 case VM_INHERIT_SHARE:
8121 vm_map_fork_share(old_map, old_entry, new_map);
8122 new_size += entry_size;
8123 break;
8124
8125 case VM_INHERIT_COPY:
8126
8127 /*
8128 * Inline the copy_quickly case;
8129 * upon failure, fall back on call
8130 * to vm_map_fork_copy.
8131 */
8132
8133 if(old_entry->is_sub_map)
8134 break;
8135 if ((old_entry->wired_count != 0) ||
8136 ((old_entry->object.vm_object != NULL) &&
8137 (old_entry->object.vm_object->true_share))) {
8138 goto slow_vm_map_fork_copy;
8139 }
8140
8141 new_entry = vm_map_entry_create(new_map);
8142 vm_map_entry_copy(new_entry, old_entry);
8143 /* clear address space specifics */
8144 new_entry->use_pmap = FALSE;
8145
8146 if (! vm_object_copy_quickly(
8147 &new_entry->object.vm_object,
8148 old_entry->offset,
8149 (old_entry->vme_end -
8150 old_entry->vme_start),
8151 &src_needs_copy,
8152 &new_entry_needs_copy)) {
8153 vm_map_entry_dispose(new_map, new_entry);
8154 goto slow_vm_map_fork_copy;
8155 }
8156
8157 /*
8158 * Handle copy-on-write obligations
8159 */
8160
8161 if (src_needs_copy && !old_entry->needs_copy) {
8162 vm_prot_t prot;
8163
8164 prot = old_entry->protection & ~VM_PROT_WRITE;
8165
8166 if (override_nx(old_map, old_entry->alias) && prot)
8167 prot |= VM_PROT_EXECUTE;
8168
8169 vm_object_pmap_protect(
8170 old_entry->object.vm_object,
8171 old_entry->offset,
8172 (old_entry->vme_end -
8173 old_entry->vme_start),
8174 ((old_entry->is_shared
8175 || old_map->mapped)
8176 ? PMAP_NULL :
8177 old_map->pmap),
8178 old_entry->vme_start,
8179 prot);
8180
8181 old_entry->needs_copy = TRUE;
8182 }
8183 new_entry->needs_copy = new_entry_needs_copy;
8184
8185 /*
8186 * Insert the entry at the end
8187 * of the map.
8188 */
8189
8190 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
8191 new_entry);
8192 new_size += entry_size;
8193 break;
8194
8195 slow_vm_map_fork_copy:
8196 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8197 new_size += entry_size;
8198 }
8199 continue;
8200 }
8201 old_entry = old_entry->vme_next;
8202 }
8203
8204 new_map->size = new_size;
8205 vm_map_unlock(old_map);
8206 vm_map_deallocate(old_map);
8207
8208 return(new_map);
8209 }
8210
8211 /*
8212 * vm_map_exec:
8213 *
8214 * Setup the "new_map" with the proper execution environment according
8215 * to the type of executable (platform, 64bit, chroot environment).
8216 * Map the comm page and shared region, etc...
8217 */
8218 kern_return_t
8219 vm_map_exec(
8220 vm_map_t new_map,
8221 task_t task,
8222 void *fsroot,
8223 cpu_type_t cpu)
8224 {
8225 SHARED_REGION_TRACE_DEBUG(
8226 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8227 current_task(), new_map, task, fsroot, cpu));
8228 (void) vm_commpage_enter(new_map, task);
8229 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8230 SHARED_REGION_TRACE_DEBUG(
8231 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8232 current_task(), new_map, task, fsroot, cpu));
8233 return KERN_SUCCESS;
8234 }
8235
8236 /*
8237 * vm_map_lookup_locked:
8238 *
8239 * Finds the VM object, offset, and
8240 * protection for a given virtual address in the
8241 * specified map, assuming a page fault of the
8242 * type specified.
8243 *
8244 * Returns the (object, offset, protection) for
8245 * this address, whether it is wired down, and whether
8246 * this map has the only reference to the data in question.
8247 * In order to later verify this lookup, a "version"
8248 * is returned.
8249 *
8250 * The map MUST be locked by the caller and WILL be
8251 * locked on exit. In order to guarantee the
8252 * existence of the returned object, it is returned
8253 * locked.
8254 *
8255 * If a lookup is requested with "write protection"
8256 * specified, the map may be changed to perform virtual
8257 * copying operations, although the data referenced will
8258 * remain the same.
8259 */
8260 kern_return_t
8261 vm_map_lookup_locked(
8262 vm_map_t *var_map, /* IN/OUT */
8263 vm_map_offset_t vaddr,
8264 vm_prot_t fault_type,
8265 int object_lock_type,
8266 vm_map_version_t *out_version, /* OUT */
8267 vm_object_t *object, /* OUT */
8268 vm_object_offset_t *offset, /* OUT */
8269 vm_prot_t *out_prot, /* OUT */
8270 boolean_t *wired, /* OUT */
8271 vm_object_fault_info_t fault_info, /* OUT */
8272 vm_map_t *real_map)
8273 {
8274 vm_map_entry_t entry;
8275 register vm_map_t map = *var_map;
8276 vm_map_t old_map = *var_map;
8277 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
8278 vm_map_offset_t cow_parent_vaddr = 0;
8279 vm_map_offset_t old_start = 0;
8280 vm_map_offset_t old_end = 0;
8281 register vm_prot_t prot;
8282
8283 *real_map = map;
8284 RetryLookup: ;
8285
8286 /*
8287 * If the map has an interesting hint, try it before calling
8288 * full blown lookup routine.
8289 */
8290 entry = map->hint;
8291
8292 if ((entry == vm_map_to_entry(map)) ||
8293 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8294 vm_map_entry_t tmp_entry;
8295
8296 /*
8297 * Entry was either not a valid hint, or the vaddr
8298 * was not contained in the entry, so do a full lookup.
8299 */
8300 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8301 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8302 vm_map_unlock(cow_sub_map_parent);
8303 if((*real_map != map)
8304 && (*real_map != cow_sub_map_parent))
8305 vm_map_unlock(*real_map);
8306 return KERN_INVALID_ADDRESS;
8307 }
8308
8309 entry = tmp_entry;
8310 }
8311 if(map == old_map) {
8312 old_start = entry->vme_start;
8313 old_end = entry->vme_end;
8314 }
8315
8316 /*
8317 * Handle submaps. Drop lock on upper map, submap is
8318 * returned locked.
8319 */
8320
8321 submap_recurse:
8322 if (entry->is_sub_map) {
8323 vm_map_offset_t local_vaddr;
8324 vm_map_offset_t end_delta;
8325 vm_map_offset_t start_delta;
8326 vm_map_entry_t submap_entry;
8327 boolean_t mapped_needs_copy=FALSE;
8328
8329 local_vaddr = vaddr;
8330
8331 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8332 /* if real_map equals map we unlock below */
8333 if ((*real_map != map) &&
8334 (*real_map != cow_sub_map_parent))
8335 vm_map_unlock(*real_map);
8336 *real_map = entry->object.sub_map;
8337 }
8338
8339 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8340 if (!mapped_needs_copy) {
8341 if (vm_map_lock_read_to_write(map)) {
8342 vm_map_lock_read(map);
8343 /* XXX FBDP: entry still valid ? */
8344 if(*real_map == entry->object.sub_map)
8345 *real_map = map;
8346 goto RetryLookup;
8347 }
8348 vm_map_lock_read(entry->object.sub_map);
8349 cow_sub_map_parent = map;
8350 /* reset base to map before cow object */
8351 /* this is the map which will accept */
8352 /* the new cow object */
8353 old_start = entry->vme_start;
8354 old_end = entry->vme_end;
8355 cow_parent_vaddr = vaddr;
8356 mapped_needs_copy = TRUE;
8357 } else {
8358 vm_map_lock_read(entry->object.sub_map);
8359 if((cow_sub_map_parent != map) &&
8360 (*real_map != map))
8361 vm_map_unlock(map);
8362 }
8363 } else {
8364 vm_map_lock_read(entry->object.sub_map);
8365 /* leave map locked if it is a target */
8366 /* cow sub_map above otherwise, just */
8367 /* follow the maps down to the object */
8368 /* here we unlock knowing we are not */
8369 /* revisiting the map. */
8370 if((*real_map != map) && (map != cow_sub_map_parent))
8371 vm_map_unlock_read(map);
8372 }
8373
8374 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8375 *var_map = map = entry->object.sub_map;
8376
8377 /* calculate the offset in the submap for vaddr */
8378 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8379
8380 RetrySubMap:
8381 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8382 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8383 vm_map_unlock(cow_sub_map_parent);
8384 }
8385 if((*real_map != map)
8386 && (*real_map != cow_sub_map_parent)) {
8387 vm_map_unlock(*real_map);
8388 }
8389 *real_map = map;
8390 return KERN_INVALID_ADDRESS;
8391 }
8392
8393 /* find the attenuated shadow of the underlying object */
8394 /* on our target map */
8395
8396 /* in english the submap object may extend beyond the */
8397 /* region mapped by the entry or, may only fill a portion */
8398 /* of it. For our purposes, we only care if the object */
8399 /* doesn't fill. In this case the area which will */
8400 /* ultimately be clipped in the top map will only need */
8401 /* to be as big as the portion of the underlying entry */
8402 /* which is mapped */
8403 start_delta = submap_entry->vme_start > entry->offset ?
8404 submap_entry->vme_start - entry->offset : 0;
8405
8406 end_delta =
8407 (entry->offset + start_delta + (old_end - old_start)) <=
8408 submap_entry->vme_end ?
8409 0 : (entry->offset +
8410 (old_end - old_start))
8411 - submap_entry->vme_end;
8412
8413 old_start += start_delta;
8414 old_end -= end_delta;
8415
8416 if(submap_entry->is_sub_map) {
8417 entry = submap_entry;
8418 vaddr = local_vaddr;
8419 goto submap_recurse;
8420 }
8421
8422 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8423
8424 vm_object_t sub_object, copy_object;
8425 vm_object_offset_t copy_offset;
8426 vm_map_offset_t local_start;
8427 vm_map_offset_t local_end;
8428 boolean_t copied_slowly = FALSE;
8429
8430 if (vm_map_lock_read_to_write(map)) {
8431 vm_map_lock_read(map);
8432 old_start -= start_delta;
8433 old_end += end_delta;
8434 goto RetrySubMap;
8435 }
8436
8437
8438 sub_object = submap_entry->object.vm_object;
8439 if (sub_object == VM_OBJECT_NULL) {
8440 sub_object =
8441 vm_object_allocate(
8442 (vm_map_size_t)
8443 (submap_entry->vme_end -
8444 submap_entry->vme_start));
8445 submap_entry->object.vm_object = sub_object;
8446 submap_entry->offset = 0;
8447 }
8448 local_start = local_vaddr -
8449 (cow_parent_vaddr - old_start);
8450 local_end = local_vaddr +
8451 (old_end - cow_parent_vaddr);
8452 vm_map_clip_start(map, submap_entry, local_start);
8453 vm_map_clip_end(map, submap_entry, local_end);
8454 /* unnesting was done in vm_map_clip_start/end() */
8455 assert(!submap_entry->use_pmap);
8456
8457 /* This is the COW case, lets connect */
8458 /* an entry in our space to the underlying */
8459 /* object in the submap, bypassing the */
8460 /* submap. */
8461
8462
8463 if(submap_entry->wired_count != 0 ||
8464 (sub_object->copy_strategy ==
8465 MEMORY_OBJECT_COPY_NONE)) {
8466 vm_object_lock(sub_object);
8467 vm_object_copy_slowly(sub_object,
8468 submap_entry->offset,
8469 (submap_entry->vme_end -
8470 submap_entry->vme_start),
8471 FALSE,
8472 &copy_object);
8473 copied_slowly = TRUE;
8474 } else {
8475
8476 /* set up shadow object */
8477 copy_object = sub_object;
8478 vm_object_reference(copy_object);
8479 sub_object->shadowed = TRUE;
8480 submap_entry->needs_copy = TRUE;
8481
8482 prot = submap_entry->protection & ~VM_PROT_WRITE;
8483
8484 if (override_nx(map, submap_entry->alias) && prot)
8485 prot |= VM_PROT_EXECUTE;
8486
8487 vm_object_pmap_protect(
8488 sub_object,
8489 submap_entry->offset,
8490 submap_entry->vme_end -
8491 submap_entry->vme_start,
8492 (submap_entry->is_shared
8493 || map->mapped) ?
8494 PMAP_NULL : map->pmap,
8495 submap_entry->vme_start,
8496 prot);
8497 }
8498
8499 /*
8500 * Adjust the fault offset to the submap entry.
8501 */
8502 copy_offset = (local_vaddr -
8503 submap_entry->vme_start +
8504 submap_entry->offset);
8505
8506 /* This works diffently than the */
8507 /* normal submap case. We go back */
8508 /* to the parent of the cow map and*/
8509 /* clip out the target portion of */
8510 /* the sub_map, substituting the */
8511 /* new copy object, */
8512
8513 vm_map_unlock(map);
8514 local_start = old_start;
8515 local_end = old_end;
8516 map = cow_sub_map_parent;
8517 *var_map = cow_sub_map_parent;
8518 vaddr = cow_parent_vaddr;
8519 cow_sub_map_parent = NULL;
8520
8521 if(!vm_map_lookup_entry(map,
8522 vaddr, &entry)) {
8523 vm_object_deallocate(
8524 copy_object);
8525 vm_map_lock_write_to_read(map);
8526 return KERN_INVALID_ADDRESS;
8527 }
8528
8529 /* clip out the portion of space */
8530 /* mapped by the sub map which */
8531 /* corresponds to the underlying */
8532 /* object */
8533
8534 /*
8535 * Clip (and unnest) the smallest nested chunk
8536 * possible around the faulting address...
8537 */
8538 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8539 local_end = local_start + pmap_nesting_size_min;
8540 /*
8541 * ... but don't go beyond the "old_start" to "old_end"
8542 * range, to avoid spanning over another VM region
8543 * with a possibly different VM object and/or offset.
8544 */
8545 if (local_start < old_start) {
8546 local_start = old_start;
8547 }
8548 if (local_end > old_end) {
8549 local_end = old_end;
8550 }
8551 /*
8552 * Adjust copy_offset to the start of the range.
8553 */
8554 copy_offset -= (vaddr - local_start);
8555
8556 vm_map_clip_start(map, entry, local_start);
8557 vm_map_clip_end(map, entry, local_end);
8558 /* unnesting was done in vm_map_clip_start/end() */
8559 assert(!entry->use_pmap);
8560
8561 /* substitute copy object for */
8562 /* shared map entry */
8563 vm_map_deallocate(entry->object.sub_map);
8564 entry->is_sub_map = FALSE;
8565 entry->object.vm_object = copy_object;
8566
8567 /* propagate the submap entry's protections */
8568 entry->protection |= submap_entry->protection;
8569 entry->max_protection |= submap_entry->max_protection;
8570
8571 if(copied_slowly) {
8572 entry->offset = local_start - old_start;
8573 entry->needs_copy = FALSE;
8574 entry->is_shared = FALSE;
8575 } else {
8576 entry->offset = copy_offset;
8577 entry->needs_copy = TRUE;
8578 if(entry->inheritance == VM_INHERIT_SHARE)
8579 entry->inheritance = VM_INHERIT_COPY;
8580 if (map != old_map)
8581 entry->is_shared = TRUE;
8582 }
8583 if(entry->inheritance == VM_INHERIT_SHARE)
8584 entry->inheritance = VM_INHERIT_COPY;
8585
8586 vm_map_lock_write_to_read(map);
8587 } else {
8588 if((cow_sub_map_parent)
8589 && (cow_sub_map_parent != *real_map)
8590 && (cow_sub_map_parent != map)) {
8591 vm_map_unlock(cow_sub_map_parent);
8592 }
8593 entry = submap_entry;
8594 vaddr = local_vaddr;
8595 }
8596 }
8597
8598 /*
8599 * Check whether this task is allowed to have
8600 * this page.
8601 */
8602
8603 prot = entry->protection;
8604
8605 if (override_nx(map, entry->alias) && prot) {
8606 /*
8607 * HACK -- if not a stack, then allow execution
8608 */
8609 prot |= VM_PROT_EXECUTE;
8610 }
8611
8612 if ((fault_type & (prot)) != fault_type) {
8613 if (*real_map != map) {
8614 vm_map_unlock(*real_map);
8615 }
8616 *real_map = map;
8617
8618 if ((fault_type & VM_PROT_EXECUTE) && prot)
8619 log_stack_execution_failure((addr64_t)vaddr, prot);
8620
8621 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8622 return KERN_PROTECTION_FAILURE;
8623 }
8624
8625 /*
8626 * If this page is not pageable, we have to get
8627 * it for all possible accesses.
8628 */
8629
8630 *wired = (entry->wired_count != 0);
8631 if (*wired)
8632 fault_type = prot;
8633
8634 /*
8635 * If the entry was copy-on-write, we either ...
8636 */
8637
8638 if (entry->needs_copy) {
8639 /*
8640 * If we want to write the page, we may as well
8641 * handle that now since we've got the map locked.
8642 *
8643 * If we don't need to write the page, we just
8644 * demote the permissions allowed.
8645 */
8646
8647 if ((fault_type & VM_PROT_WRITE) || *wired) {
8648 /*
8649 * Make a new object, and place it in the
8650 * object chain. Note that no new references
8651 * have appeared -- one just moved from the
8652 * map to the new object.
8653 */
8654
8655 if (vm_map_lock_read_to_write(map)) {
8656 vm_map_lock_read(map);
8657 goto RetryLookup;
8658 }
8659 vm_object_shadow(&entry->object.vm_object,
8660 &entry->offset,
8661 (vm_map_size_t) (entry->vme_end -
8662 entry->vme_start));
8663
8664 entry->object.vm_object->shadowed = TRUE;
8665 entry->needs_copy = FALSE;
8666 vm_map_lock_write_to_read(map);
8667 }
8668 else {
8669 /*
8670 * We're attempting to read a copy-on-write
8671 * page -- don't allow writes.
8672 */
8673
8674 prot &= (~VM_PROT_WRITE);
8675 }
8676 }
8677
8678 /*
8679 * Create an object if necessary.
8680 */
8681 if (entry->object.vm_object == VM_OBJECT_NULL) {
8682
8683 if (vm_map_lock_read_to_write(map)) {
8684 vm_map_lock_read(map);
8685 goto RetryLookup;
8686 }
8687
8688 entry->object.vm_object = vm_object_allocate(
8689 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8690 entry->offset = 0;
8691 vm_map_lock_write_to_read(map);
8692 }
8693
8694 /*
8695 * Return the object/offset from this entry. If the entry
8696 * was copy-on-write or empty, it has been fixed up. Also
8697 * return the protection.
8698 */
8699
8700 *offset = (vaddr - entry->vme_start) + entry->offset;
8701 *object = entry->object.vm_object;
8702 *out_prot = prot;
8703
8704 if (fault_info) {
8705 fault_info->interruptible = THREAD_UNINT; /* for now... */
8706 /* ... the caller will change "interruptible" if needed */
8707 fault_info->cluster_size = 0;
8708 fault_info->user_tag = entry->alias;
8709 fault_info->behavior = entry->behavior;
8710 fault_info->lo_offset = entry->offset;
8711 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8712 fault_info->no_cache = entry->no_cache;
8713 fault_info->stealth = FALSE;
8714 }
8715
8716 /*
8717 * Lock the object to prevent it from disappearing
8718 */
8719 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8720 vm_object_lock(*object);
8721 else
8722 vm_object_lock_shared(*object);
8723
8724 /*
8725 * Save the version number
8726 */
8727
8728 out_version->main_timestamp = map->timestamp;
8729
8730 return KERN_SUCCESS;
8731 }
8732
8733
8734 /*
8735 * vm_map_verify:
8736 *
8737 * Verifies that the map in question has not changed
8738 * since the given version. If successful, the map
8739 * will not change until vm_map_verify_done() is called.
8740 */
8741 boolean_t
8742 vm_map_verify(
8743 register vm_map_t map,
8744 register vm_map_version_t *version) /* REF */
8745 {
8746 boolean_t result;
8747
8748 vm_map_lock_read(map);
8749 result = (map->timestamp == version->main_timestamp);
8750
8751 if (!result)
8752 vm_map_unlock_read(map);
8753
8754 return(result);
8755 }
8756
8757 /*
8758 * vm_map_verify_done:
8759 *
8760 * Releases locks acquired by a vm_map_verify.
8761 *
8762 * This is now a macro in vm/vm_map.h. It does a
8763 * vm_map_unlock_read on the map.
8764 */
8765
8766
8767 /*
8768 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8769 * Goes away after regular vm_region_recurse function migrates to
8770 * 64 bits
8771 * vm_region_recurse: A form of vm_region which follows the
8772 * submaps in a target map
8773 *
8774 */
8775
8776 kern_return_t
8777 vm_map_region_recurse_64(
8778 vm_map_t map,
8779 vm_map_offset_t *address, /* IN/OUT */
8780 vm_map_size_t *size, /* OUT */
8781 natural_t *nesting_depth, /* IN/OUT */
8782 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8783 mach_msg_type_number_t *count) /* IN/OUT */
8784 {
8785 vm_region_extended_info_data_t extended;
8786 vm_map_entry_t tmp_entry;
8787 vm_map_offset_t user_address;
8788 unsigned int user_max_depth;
8789
8790 /*
8791 * "curr_entry" is the VM map entry preceding or including the
8792 * address we're looking for.
8793 * "curr_map" is the map or sub-map containing "curr_entry".
8794 * "curr_offset" is the cumulated offset of "curr_map" in the
8795 * target task's address space.
8796 * "curr_depth" is the depth of "curr_map" in the chain of
8797 * sub-maps.
8798 * "curr_max_offset" is the maximum offset we should take into
8799 * account in the current map. It may be smaller than the current
8800 * map's "max_offset" because we might not have mapped it all in
8801 * the upper level map.
8802 */
8803 vm_map_entry_t curr_entry;
8804 vm_map_offset_t curr_offset;
8805 vm_map_t curr_map;
8806 unsigned int curr_depth;
8807 vm_map_offset_t curr_max_offset;
8808
8809 /*
8810 * "next_" is the same as "curr_" but for the VM region immediately
8811 * after the address we're looking for. We need to keep track of this
8812 * too because we want to return info about that region if the
8813 * address we're looking for is not mapped.
8814 */
8815 vm_map_entry_t next_entry;
8816 vm_map_offset_t next_offset;
8817 vm_map_t next_map;
8818 unsigned int next_depth;
8819 vm_map_offset_t next_max_offset;
8820
8821 boolean_t look_for_pages;
8822 vm_region_submap_short_info_64_t short_info;
8823
8824 if (map == VM_MAP_NULL) {
8825 /* no address space to work on */
8826 return KERN_INVALID_ARGUMENT;
8827 }
8828
8829 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8830 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8831 /*
8832 * "info" structure is not big enough and
8833 * would overflow
8834 */
8835 return KERN_INVALID_ARGUMENT;
8836 } else {
8837 look_for_pages = FALSE;
8838 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8839 short_info = (vm_region_submap_short_info_64_t) submap_info;
8840 submap_info = NULL;
8841 }
8842 } else {
8843 look_for_pages = TRUE;
8844 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8845 short_info = NULL;
8846 }
8847
8848
8849 user_address = *address;
8850 user_max_depth = *nesting_depth;
8851
8852 curr_entry = NULL;
8853 curr_map = map;
8854 curr_offset = 0;
8855 curr_depth = 0;
8856 curr_max_offset = curr_map->max_offset;
8857
8858 next_entry = NULL;
8859 next_map = NULL;
8860 next_offset = 0;
8861 next_depth = 0;
8862 next_max_offset = curr_max_offset;
8863
8864 if (not_in_kdp) {
8865 vm_map_lock_read(curr_map);
8866 }
8867
8868 for (;;) {
8869 if (vm_map_lookup_entry(curr_map,
8870 user_address - curr_offset,
8871 &tmp_entry)) {
8872 /* tmp_entry contains the address we're looking for */
8873 curr_entry = tmp_entry;
8874 } else {
8875 /*
8876 * The address is not mapped. "tmp_entry" is the
8877 * map entry preceding the address. We want the next
8878 * one, if it exists.
8879 */
8880 curr_entry = tmp_entry->vme_next;
8881 if (curr_entry == vm_map_to_entry(curr_map) ||
8882 curr_entry->vme_start >= curr_max_offset) {
8883 /* no next entry at this level: stop looking */
8884 if (not_in_kdp) {
8885 vm_map_unlock_read(curr_map);
8886 }
8887 curr_entry = NULL;
8888 curr_map = NULL;
8889 curr_offset = 0;
8890 curr_depth = 0;
8891 curr_max_offset = 0;
8892 break;
8893 }
8894 }
8895
8896 /*
8897 * Is the next entry at this level closer to the address (or
8898 * deeper in the submap chain) than the one we had
8899 * so far ?
8900 */
8901 tmp_entry = curr_entry->vme_next;
8902 if (tmp_entry == vm_map_to_entry(curr_map)) {
8903 /* no next entry at this level */
8904 } else if (tmp_entry->vme_start >= curr_max_offset) {
8905 /*
8906 * tmp_entry is beyond the scope of what we mapped of
8907 * this submap in the upper level: ignore it.
8908 */
8909 } else if ((next_entry == NULL) ||
8910 (tmp_entry->vme_start + curr_offset <=
8911 next_entry->vme_start + next_offset)) {
8912 /*
8913 * We didn't have a "next_entry" or this one is
8914 * closer to the address we're looking for:
8915 * use this "tmp_entry" as the new "next_entry".
8916 */
8917 if (next_entry != NULL) {
8918 /* unlock the last "next_map" */
8919 if (next_map != curr_map && not_in_kdp) {
8920 vm_map_unlock_read(next_map);
8921 }
8922 }
8923 next_entry = tmp_entry;
8924 next_map = curr_map;
8925 next_offset = curr_offset;
8926 next_depth = curr_depth;
8927 next_max_offset = curr_max_offset;
8928 }
8929
8930 if (!curr_entry->is_sub_map ||
8931 curr_depth >= user_max_depth) {
8932 /*
8933 * We hit a leaf map or we reached the maximum depth
8934 * we could, so stop looking. Keep the current map
8935 * locked.
8936 */
8937 break;
8938 }
8939
8940 /*
8941 * Get down to the next submap level.
8942 */
8943
8944 /*
8945 * Lock the next level and unlock the current level,
8946 * unless we need to keep it locked to access the "next_entry"
8947 * later.
8948 */
8949 if (not_in_kdp) {
8950 vm_map_lock_read(curr_entry->object.sub_map);
8951 }
8952 if (curr_map == next_map) {
8953 /* keep "next_map" locked in case we need it */
8954 } else {
8955 /* release this map */
8956 if (not_in_kdp)
8957 vm_map_unlock_read(curr_map);
8958 }
8959
8960 /*
8961 * Adjust the offset. "curr_entry" maps the submap
8962 * at relative address "curr_entry->vme_start" in the
8963 * curr_map but skips the first "curr_entry->offset"
8964 * bytes of the submap.
8965 * "curr_offset" always represents the offset of a virtual
8966 * address in the curr_map relative to the absolute address
8967 * space (i.e. the top-level VM map).
8968 */
8969 curr_offset +=
8970 (curr_entry->vme_start - curr_entry->offset);
8971 /* switch to the submap */
8972 curr_map = curr_entry->object.sub_map;
8973 curr_depth++;
8974 /*
8975 * "curr_max_offset" allows us to keep track of the
8976 * portion of the submap that is actually mapped at this level:
8977 * the rest of that submap is irrelevant to us, since it's not
8978 * mapped here.
8979 * The relevant portion of the map starts at
8980 * "curr_entry->offset" up to the size of "curr_entry".
8981 */
8982 curr_max_offset =
8983 curr_entry->vme_end - curr_entry->vme_start +
8984 curr_entry->offset;
8985 curr_entry = NULL;
8986 }
8987
8988 if (curr_entry == NULL) {
8989 /* no VM region contains the address... */
8990 if (next_entry == NULL) {
8991 /* ... and no VM region follows it either */
8992 return KERN_INVALID_ADDRESS;
8993 }
8994 /* ... gather info about the next VM region */
8995 curr_entry = next_entry;
8996 curr_map = next_map; /* still locked ... */
8997 curr_offset = next_offset;
8998 curr_depth = next_depth;
8999 curr_max_offset = next_max_offset;
9000 } else {
9001 /* we won't need "next_entry" after all */
9002 if (next_entry != NULL) {
9003 /* release "next_map" */
9004 if (next_map != curr_map && not_in_kdp) {
9005 vm_map_unlock_read(next_map);
9006 }
9007 }
9008 }
9009 next_entry = NULL;
9010 next_map = NULL;
9011 next_offset = 0;
9012 next_depth = 0;
9013 next_max_offset = 0;
9014
9015 *nesting_depth = curr_depth;
9016 *size = curr_entry->vme_end - curr_entry->vme_start;
9017 *address = curr_entry->vme_start + curr_offset;
9018
9019 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9020 // so probably should be a real 32b ID vs. ptr.
9021 // Current users just check for equality
9022 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9023
9024 if (look_for_pages) {
9025 submap_info->user_tag = curr_entry->alias;
9026 submap_info->offset = curr_entry->offset;
9027 submap_info->protection = curr_entry->protection;
9028 submap_info->inheritance = curr_entry->inheritance;
9029 submap_info->max_protection = curr_entry->max_protection;
9030 submap_info->behavior = curr_entry->behavior;
9031 submap_info->user_wired_count = curr_entry->user_wired_count;
9032 submap_info->is_submap = curr_entry->is_sub_map;
9033 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9034 } else {
9035 short_info->user_tag = curr_entry->alias;
9036 short_info->offset = curr_entry->offset;
9037 short_info->protection = curr_entry->protection;
9038 short_info->inheritance = curr_entry->inheritance;
9039 short_info->max_protection = curr_entry->max_protection;
9040 short_info->behavior = curr_entry->behavior;
9041 short_info->user_wired_count = curr_entry->user_wired_count;
9042 short_info->is_submap = curr_entry->is_sub_map;
9043 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9044 }
9045
9046 extended.pages_resident = 0;
9047 extended.pages_swapped_out = 0;
9048 extended.pages_shared_now_private = 0;
9049 extended.pages_dirtied = 0;
9050 extended.external_pager = 0;
9051 extended.shadow_depth = 0;
9052
9053 if (not_in_kdp) {
9054 if (!curr_entry->is_sub_map) {
9055 vm_map_region_walk(curr_map,
9056 curr_entry->vme_start,
9057 curr_entry,
9058 curr_entry->offset,
9059 (curr_entry->vme_end -
9060 curr_entry->vme_start),
9061 &extended,
9062 look_for_pages);
9063 if (extended.external_pager &&
9064 extended.ref_count == 2 &&
9065 extended.share_mode == SM_SHARED) {
9066 extended.share_mode = SM_PRIVATE;
9067 }
9068 } else {
9069 if (curr_entry->use_pmap) {
9070 extended.share_mode = SM_TRUESHARED;
9071 } else {
9072 extended.share_mode = SM_PRIVATE;
9073 }
9074 extended.ref_count =
9075 curr_entry->object.sub_map->ref_count;
9076 }
9077 }
9078
9079 if (look_for_pages) {
9080 submap_info->pages_resident = extended.pages_resident;
9081 submap_info->pages_swapped_out = extended.pages_swapped_out;
9082 submap_info->pages_shared_now_private =
9083 extended.pages_shared_now_private;
9084 submap_info->pages_dirtied = extended.pages_dirtied;
9085 submap_info->external_pager = extended.external_pager;
9086 submap_info->shadow_depth = extended.shadow_depth;
9087 submap_info->share_mode = extended.share_mode;
9088 submap_info->ref_count = extended.ref_count;
9089 } else {
9090 short_info->external_pager = extended.external_pager;
9091 short_info->shadow_depth = extended.shadow_depth;
9092 short_info->share_mode = extended.share_mode;
9093 short_info->ref_count = extended.ref_count;
9094 }
9095
9096 if (not_in_kdp) {
9097 vm_map_unlock_read(curr_map);
9098 }
9099
9100 return KERN_SUCCESS;
9101 }
9102
9103 /*
9104 * vm_region:
9105 *
9106 * User call to obtain information about a region in
9107 * a task's address map. Currently, only one flavor is
9108 * supported.
9109 *
9110 * XXX The reserved and behavior fields cannot be filled
9111 * in until the vm merge from the IK is completed, and
9112 * vm_reserve is implemented.
9113 */
9114
9115 kern_return_t
9116 vm_map_region(
9117 vm_map_t map,
9118 vm_map_offset_t *address, /* IN/OUT */
9119 vm_map_size_t *size, /* OUT */
9120 vm_region_flavor_t flavor, /* IN */
9121 vm_region_info_t info, /* OUT */
9122 mach_msg_type_number_t *count, /* IN/OUT */
9123 mach_port_t *object_name) /* OUT */
9124 {
9125 vm_map_entry_t tmp_entry;
9126 vm_map_entry_t entry;
9127 vm_map_offset_t start;
9128
9129 if (map == VM_MAP_NULL)
9130 return(KERN_INVALID_ARGUMENT);
9131
9132 switch (flavor) {
9133
9134 case VM_REGION_BASIC_INFO:
9135 /* legacy for old 32-bit objects info */
9136 {
9137 vm_region_basic_info_t basic;
9138
9139 if (*count < VM_REGION_BASIC_INFO_COUNT)
9140 return(KERN_INVALID_ARGUMENT);
9141
9142 basic = (vm_region_basic_info_t) info;
9143 *count = VM_REGION_BASIC_INFO_COUNT;
9144
9145 vm_map_lock_read(map);
9146
9147 start = *address;
9148 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9149 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9150 vm_map_unlock_read(map);
9151 return(KERN_INVALID_ADDRESS);
9152 }
9153 } else {
9154 entry = tmp_entry;
9155 }
9156
9157 start = entry->vme_start;
9158
9159 basic->offset = (uint32_t)entry->offset;
9160 basic->protection = entry->protection;
9161 basic->inheritance = entry->inheritance;
9162 basic->max_protection = entry->max_protection;
9163 basic->behavior = entry->behavior;
9164 basic->user_wired_count = entry->user_wired_count;
9165 basic->reserved = entry->is_sub_map;
9166 *address = start;
9167 *size = (entry->vme_end - start);
9168
9169 if (object_name) *object_name = IP_NULL;
9170 if (entry->is_sub_map) {
9171 basic->shared = FALSE;
9172 } else {
9173 basic->shared = entry->is_shared;
9174 }
9175
9176 vm_map_unlock_read(map);
9177 return(KERN_SUCCESS);
9178 }
9179
9180 case VM_REGION_BASIC_INFO_64:
9181 {
9182 vm_region_basic_info_64_t basic;
9183
9184 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9185 return(KERN_INVALID_ARGUMENT);
9186
9187 basic = (vm_region_basic_info_64_t) info;
9188 *count = VM_REGION_BASIC_INFO_COUNT_64;
9189
9190 vm_map_lock_read(map);
9191
9192 start = *address;
9193 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9194 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9195 vm_map_unlock_read(map);
9196 return(KERN_INVALID_ADDRESS);
9197 }
9198 } else {
9199 entry = tmp_entry;
9200 }
9201
9202 start = entry->vme_start;
9203
9204 basic->offset = entry->offset;
9205 basic->protection = entry->protection;
9206 basic->inheritance = entry->inheritance;
9207 basic->max_protection = entry->max_protection;
9208 basic->behavior = entry->behavior;
9209 basic->user_wired_count = entry->user_wired_count;
9210 basic->reserved = entry->is_sub_map;
9211 *address = start;
9212 *size = (entry->vme_end - start);
9213
9214 if (object_name) *object_name = IP_NULL;
9215 if (entry->is_sub_map) {
9216 basic->shared = FALSE;
9217 } else {
9218 basic->shared = entry->is_shared;
9219 }
9220
9221 vm_map_unlock_read(map);
9222 return(KERN_SUCCESS);
9223 }
9224 case VM_REGION_EXTENDED_INFO:
9225 {
9226 vm_region_extended_info_t extended;
9227
9228 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9229 return(KERN_INVALID_ARGUMENT);
9230
9231 extended = (vm_region_extended_info_t) info;
9232 *count = VM_REGION_EXTENDED_INFO_COUNT;
9233
9234 vm_map_lock_read(map);
9235
9236 start = *address;
9237 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9238 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9239 vm_map_unlock_read(map);
9240 return(KERN_INVALID_ADDRESS);
9241 }
9242 } else {
9243 entry = tmp_entry;
9244 }
9245 start = entry->vme_start;
9246
9247 extended->protection = entry->protection;
9248 extended->user_tag = entry->alias;
9249 extended->pages_resident = 0;
9250 extended->pages_swapped_out = 0;
9251 extended->pages_shared_now_private = 0;
9252 extended->pages_dirtied = 0;
9253 extended->external_pager = 0;
9254 extended->shadow_depth = 0;
9255
9256 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9257
9258 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9259 extended->share_mode = SM_PRIVATE;
9260
9261 if (object_name)
9262 *object_name = IP_NULL;
9263 *address = start;
9264 *size = (entry->vme_end - start);
9265
9266 vm_map_unlock_read(map);
9267 return(KERN_SUCCESS);
9268 }
9269 case VM_REGION_TOP_INFO:
9270 {
9271 vm_region_top_info_t top;
9272
9273 if (*count < VM_REGION_TOP_INFO_COUNT)
9274 return(KERN_INVALID_ARGUMENT);
9275
9276 top = (vm_region_top_info_t) info;
9277 *count = VM_REGION_TOP_INFO_COUNT;
9278
9279 vm_map_lock_read(map);
9280
9281 start = *address;
9282 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9283 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9284 vm_map_unlock_read(map);
9285 return(KERN_INVALID_ADDRESS);
9286 }
9287 } else {
9288 entry = tmp_entry;
9289
9290 }
9291 start = entry->vme_start;
9292
9293 top->private_pages_resident = 0;
9294 top->shared_pages_resident = 0;
9295
9296 vm_map_region_top_walk(entry, top);
9297
9298 if (object_name)
9299 *object_name = IP_NULL;
9300 *address = start;
9301 *size = (entry->vme_end - start);
9302
9303 vm_map_unlock_read(map);
9304 return(KERN_SUCCESS);
9305 }
9306 default:
9307 return(KERN_INVALID_ARGUMENT);
9308 }
9309 }
9310
9311 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9312 MIN((entry_size), \
9313 ((obj)->all_reusable ? \
9314 (obj)->wired_page_count : \
9315 (obj)->resident_page_count - (obj)->reusable_page_count))
9316
9317 void
9318 vm_map_region_top_walk(
9319 vm_map_entry_t entry,
9320 vm_region_top_info_t top)
9321 {
9322
9323 if (entry->object.vm_object == 0 || entry->is_sub_map) {
9324 top->share_mode = SM_EMPTY;
9325 top->ref_count = 0;
9326 top->obj_id = 0;
9327 return;
9328 }
9329
9330 {
9331 struct vm_object *obj, *tmp_obj;
9332 int ref_count;
9333 uint32_t entry_size;
9334
9335 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9336
9337 obj = entry->object.vm_object;
9338
9339 vm_object_lock(obj);
9340
9341 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9342 ref_count--;
9343
9344 assert(obj->reusable_page_count <= obj->resident_page_count);
9345 if (obj->shadow) {
9346 if (ref_count == 1)
9347 top->private_pages_resident =
9348 OBJ_RESIDENT_COUNT(obj, entry_size);
9349 else
9350 top->shared_pages_resident =
9351 OBJ_RESIDENT_COUNT(obj, entry_size);
9352 top->ref_count = ref_count;
9353 top->share_mode = SM_COW;
9354
9355 while ((tmp_obj = obj->shadow)) {
9356 vm_object_lock(tmp_obj);
9357 vm_object_unlock(obj);
9358 obj = tmp_obj;
9359
9360 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9361 ref_count--;
9362
9363 assert(obj->reusable_page_count <= obj->resident_page_count);
9364 top->shared_pages_resident +=
9365 OBJ_RESIDENT_COUNT(obj, entry_size);
9366 top->ref_count += ref_count - 1;
9367 }
9368 } else {
9369 if (entry->needs_copy) {
9370 top->share_mode = SM_COW;
9371 top->shared_pages_resident =
9372 OBJ_RESIDENT_COUNT(obj, entry_size);
9373 } else {
9374 if (ref_count == 1 ||
9375 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9376 top->share_mode = SM_PRIVATE;
9377 top->private_pages_resident =
9378 OBJ_RESIDENT_COUNT(obj,
9379 entry_size);
9380 } else {
9381 top->share_mode = SM_SHARED;
9382 top->shared_pages_resident =
9383 OBJ_RESIDENT_COUNT(obj,
9384 entry_size);
9385 }
9386 }
9387 top->ref_count = ref_count;
9388 }
9389 /* XXX K64: obj_id will be truncated */
9390 top->obj_id = (unsigned int) (uintptr_t)obj;
9391
9392 vm_object_unlock(obj);
9393 }
9394 }
9395
9396 void
9397 vm_map_region_walk(
9398 vm_map_t map,
9399 vm_map_offset_t va,
9400 vm_map_entry_t entry,
9401 vm_object_offset_t offset,
9402 vm_object_size_t range,
9403 vm_region_extended_info_t extended,
9404 boolean_t look_for_pages)
9405 {
9406 register struct vm_object *obj, *tmp_obj;
9407 register vm_map_offset_t last_offset;
9408 register int i;
9409 register int ref_count;
9410 struct vm_object *shadow_object;
9411 int shadow_depth;
9412
9413 if ((entry->object.vm_object == 0) ||
9414 (entry->is_sub_map) ||
9415 (entry->object.vm_object->phys_contiguous)) {
9416 extended->share_mode = SM_EMPTY;
9417 extended->ref_count = 0;
9418 return;
9419 }
9420 {
9421 obj = entry->object.vm_object;
9422
9423 vm_object_lock(obj);
9424
9425 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9426 ref_count--;
9427
9428 if (look_for_pages) {
9429 for (last_offset = offset + range;
9430 offset < last_offset;
9431 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9432 vm_map_region_look_for_page(map, va, obj,
9433 offset, ref_count,
9434 0, extended);
9435 } else {
9436 shadow_object = obj->shadow;
9437 shadow_depth = 0;
9438
9439 if ( !(obj->pager_trusted) && !(obj->internal))
9440 extended->external_pager = 1;
9441
9442 if (shadow_object != VM_OBJECT_NULL) {
9443 vm_object_lock(shadow_object);
9444 for (;
9445 shadow_object != VM_OBJECT_NULL;
9446 shadow_depth++) {
9447 vm_object_t next_shadow;
9448
9449 if ( !(shadow_object->pager_trusted) &&
9450 !(shadow_object->internal))
9451 extended->external_pager = 1;
9452
9453 next_shadow = shadow_object->shadow;
9454 if (next_shadow) {
9455 vm_object_lock(next_shadow);
9456 }
9457 vm_object_unlock(shadow_object);
9458 shadow_object = next_shadow;
9459 }
9460 }
9461 extended->shadow_depth = shadow_depth;
9462 }
9463
9464 if (extended->shadow_depth || entry->needs_copy)
9465 extended->share_mode = SM_COW;
9466 else {
9467 if (ref_count == 1)
9468 extended->share_mode = SM_PRIVATE;
9469 else {
9470 if (obj->true_share)
9471 extended->share_mode = SM_TRUESHARED;
9472 else
9473 extended->share_mode = SM_SHARED;
9474 }
9475 }
9476 extended->ref_count = ref_count - extended->shadow_depth;
9477
9478 for (i = 0; i < extended->shadow_depth; i++) {
9479 if ((tmp_obj = obj->shadow) == 0)
9480 break;
9481 vm_object_lock(tmp_obj);
9482 vm_object_unlock(obj);
9483
9484 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9485 ref_count--;
9486
9487 extended->ref_count += ref_count;
9488 obj = tmp_obj;
9489 }
9490 vm_object_unlock(obj);
9491
9492 if (extended->share_mode == SM_SHARED) {
9493 register vm_map_entry_t cur;
9494 register vm_map_entry_t last;
9495 int my_refs;
9496
9497 obj = entry->object.vm_object;
9498 last = vm_map_to_entry(map);
9499 my_refs = 0;
9500
9501 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9502 ref_count--;
9503 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9504 my_refs += vm_map_region_count_obj_refs(cur, obj);
9505
9506 if (my_refs == ref_count)
9507 extended->share_mode = SM_PRIVATE_ALIASED;
9508 else if (my_refs > 1)
9509 extended->share_mode = SM_SHARED_ALIASED;
9510 }
9511 }
9512 }
9513
9514
9515 /* object is locked on entry and locked on return */
9516
9517
9518 static void
9519 vm_map_region_look_for_page(
9520 __unused vm_map_t map,
9521 __unused vm_map_offset_t va,
9522 vm_object_t object,
9523 vm_object_offset_t offset,
9524 int max_refcnt,
9525 int depth,
9526 vm_region_extended_info_t extended)
9527 {
9528 register vm_page_t p;
9529 register vm_object_t shadow;
9530 register int ref_count;
9531 vm_object_t caller_object;
9532 #if MACH_PAGEMAP
9533 kern_return_t kr;
9534 #endif
9535 shadow = object->shadow;
9536 caller_object = object;
9537
9538
9539 while (TRUE) {
9540
9541 if ( !(object->pager_trusted) && !(object->internal))
9542 extended->external_pager = 1;
9543
9544 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9545 if (shadow && (max_refcnt == 1))
9546 extended->pages_shared_now_private++;
9547
9548 if (!p->fictitious &&
9549 (p->dirty || pmap_is_modified(p->phys_page)))
9550 extended->pages_dirtied++;
9551
9552 extended->pages_resident++;
9553
9554 if(object != caller_object)
9555 vm_object_unlock(object);
9556
9557 return;
9558 }
9559 #if MACH_PAGEMAP
9560 if (object->existence_map) {
9561 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9562
9563 extended->pages_swapped_out++;
9564
9565 if(object != caller_object)
9566 vm_object_unlock(object);
9567
9568 return;
9569 }
9570 } else if (object->internal &&
9571 object->alive &&
9572 !object->terminating &&
9573 object->pager_ready) {
9574
9575 memory_object_t pager;
9576
9577 vm_object_paging_begin(object);
9578 pager = object->pager;
9579 vm_object_unlock(object);
9580
9581 kr = memory_object_data_request(
9582 pager,
9583 offset + object->paging_offset,
9584 0, /* just poke the pager */
9585 VM_PROT_READ,
9586 NULL);
9587
9588 vm_object_lock(object);
9589 vm_object_paging_end(object);
9590
9591 if (kr == KERN_SUCCESS) {
9592 /* the pager has that page */
9593 extended->pages_swapped_out++;
9594 if (object != caller_object)
9595 vm_object_unlock(object);
9596 return;
9597 }
9598 }
9599 #endif /* MACH_PAGEMAP */
9600
9601 if (shadow) {
9602 vm_object_lock(shadow);
9603
9604 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9605 ref_count--;
9606
9607 if (++depth > extended->shadow_depth)
9608 extended->shadow_depth = depth;
9609
9610 if (ref_count > max_refcnt)
9611 max_refcnt = ref_count;
9612
9613 if(object != caller_object)
9614 vm_object_unlock(object);
9615
9616 offset = offset + object->shadow_offset;
9617 object = shadow;
9618 shadow = object->shadow;
9619 continue;
9620 }
9621 if(object != caller_object)
9622 vm_object_unlock(object);
9623 break;
9624 }
9625 }
9626
9627 static int
9628 vm_map_region_count_obj_refs(
9629 vm_map_entry_t entry,
9630 vm_object_t object)
9631 {
9632 register int ref_count;
9633 register vm_object_t chk_obj;
9634 register vm_object_t tmp_obj;
9635
9636 if (entry->object.vm_object == 0)
9637 return(0);
9638
9639 if (entry->is_sub_map)
9640 return(0);
9641 else {
9642 ref_count = 0;
9643
9644 chk_obj = entry->object.vm_object;
9645 vm_object_lock(chk_obj);
9646
9647 while (chk_obj) {
9648 if (chk_obj == object)
9649 ref_count++;
9650 tmp_obj = chk_obj->shadow;
9651 if (tmp_obj)
9652 vm_object_lock(tmp_obj);
9653 vm_object_unlock(chk_obj);
9654
9655 chk_obj = tmp_obj;
9656 }
9657 }
9658 return(ref_count);
9659 }
9660
9661
9662 /*
9663 * Routine: vm_map_simplify
9664 *
9665 * Description:
9666 * Attempt to simplify the map representation in
9667 * the vicinity of the given starting address.
9668 * Note:
9669 * This routine is intended primarily to keep the
9670 * kernel maps more compact -- they generally don't
9671 * benefit from the "expand a map entry" technology
9672 * at allocation time because the adjacent entry
9673 * is often wired down.
9674 */
9675 void
9676 vm_map_simplify_entry(
9677 vm_map_t map,
9678 vm_map_entry_t this_entry)
9679 {
9680 vm_map_entry_t prev_entry;
9681
9682 counter(c_vm_map_simplify_entry_called++);
9683
9684 prev_entry = this_entry->vme_prev;
9685
9686 if ((this_entry != vm_map_to_entry(map)) &&
9687 (prev_entry != vm_map_to_entry(map)) &&
9688
9689 (prev_entry->vme_end == this_entry->vme_start) &&
9690
9691 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9692
9693 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9694 ((prev_entry->offset + (prev_entry->vme_end -
9695 prev_entry->vme_start))
9696 == this_entry->offset) &&
9697
9698 (prev_entry->inheritance == this_entry->inheritance) &&
9699 (prev_entry->protection == this_entry->protection) &&
9700 (prev_entry->max_protection == this_entry->max_protection) &&
9701 (prev_entry->behavior == this_entry->behavior) &&
9702 (prev_entry->alias == this_entry->alias) &&
9703 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9704 (prev_entry->no_cache == this_entry->no_cache) &&
9705 (prev_entry->wired_count == this_entry->wired_count) &&
9706 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9707
9708 (prev_entry->needs_copy == this_entry->needs_copy) &&
9709 (prev_entry->permanent == this_entry->permanent) &&
9710
9711 (prev_entry->use_pmap == FALSE) &&
9712 (this_entry->use_pmap == FALSE) &&
9713 (prev_entry->in_transition == FALSE) &&
9714 (this_entry->in_transition == FALSE) &&
9715 (prev_entry->needs_wakeup == FALSE) &&
9716 (this_entry->needs_wakeup == FALSE) &&
9717 (prev_entry->is_shared == FALSE) &&
9718 (this_entry->is_shared == FALSE)
9719 ) {
9720 _vm_map_entry_unlink(&map->hdr, prev_entry);
9721 this_entry->vme_start = prev_entry->vme_start;
9722 this_entry->offset = prev_entry->offset;
9723 if (prev_entry->is_sub_map) {
9724 vm_map_deallocate(prev_entry->object.sub_map);
9725 } else {
9726 vm_object_deallocate(prev_entry->object.vm_object);
9727 }
9728 vm_map_entry_dispose(map, prev_entry);
9729 SAVE_HINT_MAP_WRITE(map, this_entry);
9730 counter(c_vm_map_simplified++);
9731 }
9732 }
9733
9734 void
9735 vm_map_simplify(
9736 vm_map_t map,
9737 vm_map_offset_t start)
9738 {
9739 vm_map_entry_t this_entry;
9740
9741 vm_map_lock(map);
9742 if (vm_map_lookup_entry(map, start, &this_entry)) {
9743 vm_map_simplify_entry(map, this_entry);
9744 vm_map_simplify_entry(map, this_entry->vme_next);
9745 }
9746 counter(c_vm_map_simplify_called++);
9747 vm_map_unlock(map);
9748 }
9749
9750 static void
9751 vm_map_simplify_range(
9752 vm_map_t map,
9753 vm_map_offset_t start,
9754 vm_map_offset_t end)
9755 {
9756 vm_map_entry_t entry;
9757
9758 /*
9759 * The map should be locked (for "write") by the caller.
9760 */
9761
9762 if (start >= end) {
9763 /* invalid address range */
9764 return;
9765 }
9766
9767 start = vm_map_trunc_page(start);
9768 end = vm_map_round_page(end);
9769
9770 if (!vm_map_lookup_entry(map, start, &entry)) {
9771 /* "start" is not mapped and "entry" ends before "start" */
9772 if (entry == vm_map_to_entry(map)) {
9773 /* start with first entry in the map */
9774 entry = vm_map_first_entry(map);
9775 } else {
9776 /* start with next entry */
9777 entry = entry->vme_next;
9778 }
9779 }
9780
9781 while (entry != vm_map_to_entry(map) &&
9782 entry->vme_start <= end) {
9783 /* try and coalesce "entry" with its previous entry */
9784 vm_map_simplify_entry(map, entry);
9785 entry = entry->vme_next;
9786 }
9787 }
9788
9789
9790 /*
9791 * Routine: vm_map_machine_attribute
9792 * Purpose:
9793 * Provide machine-specific attributes to mappings,
9794 * such as cachability etc. for machines that provide
9795 * them. NUMA architectures and machines with big/strange
9796 * caches will use this.
9797 * Note:
9798 * Responsibilities for locking and checking are handled here,
9799 * everything else in the pmap module. If any non-volatile
9800 * information must be kept, the pmap module should handle
9801 * it itself. [This assumes that attributes do not
9802 * need to be inherited, which seems ok to me]
9803 */
9804 kern_return_t
9805 vm_map_machine_attribute(
9806 vm_map_t map,
9807 vm_map_offset_t start,
9808 vm_map_offset_t end,
9809 vm_machine_attribute_t attribute,
9810 vm_machine_attribute_val_t* value) /* IN/OUT */
9811 {
9812 kern_return_t ret;
9813 vm_map_size_t sync_size;
9814 vm_map_entry_t entry;
9815
9816 if (start < vm_map_min(map) || end > vm_map_max(map))
9817 return KERN_INVALID_ADDRESS;
9818
9819 /* Figure how much memory we need to flush (in page increments) */
9820 sync_size = end - start;
9821
9822 vm_map_lock(map);
9823
9824 if (attribute != MATTR_CACHE) {
9825 /* If we don't have to find physical addresses, we */
9826 /* don't have to do an explicit traversal here. */
9827 ret = pmap_attribute(map->pmap, start, end-start,
9828 attribute, value);
9829 vm_map_unlock(map);
9830 return ret;
9831 }
9832
9833 ret = KERN_SUCCESS; /* Assume it all worked */
9834
9835 while(sync_size) {
9836 if (vm_map_lookup_entry(map, start, &entry)) {
9837 vm_map_size_t sub_size;
9838 if((entry->vme_end - start) > sync_size) {
9839 sub_size = sync_size;
9840 sync_size = 0;
9841 } else {
9842 sub_size = entry->vme_end - start;
9843 sync_size -= sub_size;
9844 }
9845 if(entry->is_sub_map) {
9846 vm_map_offset_t sub_start;
9847 vm_map_offset_t sub_end;
9848
9849 sub_start = (start - entry->vme_start)
9850 + entry->offset;
9851 sub_end = sub_start + sub_size;
9852 vm_map_machine_attribute(
9853 entry->object.sub_map,
9854 sub_start,
9855 sub_end,
9856 attribute, value);
9857 } else {
9858 if(entry->object.vm_object) {
9859 vm_page_t m;
9860 vm_object_t object;
9861 vm_object_t base_object;
9862 vm_object_t last_object;
9863 vm_object_offset_t offset;
9864 vm_object_offset_t base_offset;
9865 vm_map_size_t range;
9866 range = sub_size;
9867 offset = (start - entry->vme_start)
9868 + entry->offset;
9869 base_offset = offset;
9870 object = entry->object.vm_object;
9871 base_object = object;
9872 last_object = NULL;
9873
9874 vm_object_lock(object);
9875
9876 while (range) {
9877 m = vm_page_lookup(
9878 object, offset);
9879
9880 if (m && !m->fictitious) {
9881 ret =
9882 pmap_attribute_cache_sync(
9883 m->phys_page,
9884 PAGE_SIZE,
9885 attribute, value);
9886
9887 } else if (object->shadow) {
9888 offset = offset + object->shadow_offset;
9889 last_object = object;
9890 object = object->shadow;
9891 vm_object_lock(last_object->shadow);
9892 vm_object_unlock(last_object);
9893 continue;
9894 }
9895 range -= PAGE_SIZE;
9896
9897 if (base_object != object) {
9898 vm_object_unlock(object);
9899 vm_object_lock(base_object);
9900 object = base_object;
9901 }
9902 /* Bump to the next page */
9903 base_offset += PAGE_SIZE;
9904 offset = base_offset;
9905 }
9906 vm_object_unlock(object);
9907 }
9908 }
9909 start += sub_size;
9910 } else {
9911 vm_map_unlock(map);
9912 return KERN_FAILURE;
9913 }
9914
9915 }
9916
9917 vm_map_unlock(map);
9918
9919 return ret;
9920 }
9921
9922 /*
9923 * vm_map_behavior_set:
9924 *
9925 * Sets the paging reference behavior of the specified address
9926 * range in the target map. Paging reference behavior affects
9927 * how pagein operations resulting from faults on the map will be
9928 * clustered.
9929 */
9930 kern_return_t
9931 vm_map_behavior_set(
9932 vm_map_t map,
9933 vm_map_offset_t start,
9934 vm_map_offset_t end,
9935 vm_behavior_t new_behavior)
9936 {
9937 register vm_map_entry_t entry;
9938 vm_map_entry_t temp_entry;
9939
9940 XPR(XPR_VM_MAP,
9941 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9942 map, start, end, new_behavior, 0);
9943
9944 switch (new_behavior) {
9945
9946 /*
9947 * This first block of behaviors all set a persistent state on the specified
9948 * memory range. All we have to do here is to record the desired behavior
9949 * in the vm_map_entry_t's.
9950 */
9951
9952 case VM_BEHAVIOR_DEFAULT:
9953 case VM_BEHAVIOR_RANDOM:
9954 case VM_BEHAVIOR_SEQUENTIAL:
9955 case VM_BEHAVIOR_RSEQNTL:
9956 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
9957 vm_map_lock(map);
9958
9959 /*
9960 * The entire address range must be valid for the map.
9961 * Note that vm_map_range_check() does a
9962 * vm_map_lookup_entry() internally and returns the
9963 * entry containing the start of the address range if
9964 * the entire range is valid.
9965 */
9966 if (vm_map_range_check(map, start, end, &temp_entry)) {
9967 entry = temp_entry;
9968 vm_map_clip_start(map, entry, start);
9969 }
9970 else {
9971 vm_map_unlock(map);
9972 return(KERN_INVALID_ADDRESS);
9973 }
9974
9975 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9976 vm_map_clip_end(map, entry, end);
9977 assert(!entry->use_pmap);
9978
9979 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
9980 entry->zero_wired_pages = TRUE;
9981 } else {
9982 entry->behavior = new_behavior;
9983 }
9984 entry = entry->vme_next;
9985 }
9986
9987 vm_map_unlock(map);
9988 break;
9989
9990 /*
9991 * The rest of these are different from the above in that they cause
9992 * an immediate action to take place as opposed to setting a behavior that
9993 * affects future actions.
9994 */
9995
9996 case VM_BEHAVIOR_WILLNEED:
9997 return vm_map_willneed(map, start, end);
9998
9999 case VM_BEHAVIOR_DONTNEED:
10000 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10001
10002 case VM_BEHAVIOR_FREE:
10003 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10004
10005 case VM_BEHAVIOR_REUSABLE:
10006 return vm_map_reusable_pages(map, start, end);
10007
10008 case VM_BEHAVIOR_REUSE:
10009 return vm_map_reuse_pages(map, start, end);
10010
10011 case VM_BEHAVIOR_CAN_REUSE:
10012 return vm_map_can_reuse(map, start, end);
10013
10014 default:
10015 return(KERN_INVALID_ARGUMENT);
10016 }
10017
10018 return(KERN_SUCCESS);
10019 }
10020
10021
10022 /*
10023 * Internals for madvise(MADV_WILLNEED) system call.
10024 *
10025 * The present implementation is to do a read-ahead if the mapping corresponds
10026 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10027 * and basically ignore the "advice" (which we are always free to do).
10028 */
10029
10030
10031 static kern_return_t
10032 vm_map_willneed(
10033 vm_map_t map,
10034 vm_map_offset_t start,
10035 vm_map_offset_t end
10036 )
10037 {
10038 vm_map_entry_t entry;
10039 vm_object_t object;
10040 memory_object_t pager;
10041 struct vm_object_fault_info fault_info;
10042 kern_return_t kr;
10043 vm_object_size_t len;
10044 vm_object_offset_t offset;
10045
10046 /*
10047 * Fill in static values in fault_info. Several fields get ignored by the code
10048 * we call, but we'll fill them in anyway since uninitialized fields are bad
10049 * when it comes to future backwards compatibility.
10050 */
10051
10052 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10053 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10054 fault_info.no_cache = FALSE; /* ignored value */
10055 fault_info.stealth = TRUE;
10056
10057 /*
10058 * The MADV_WILLNEED operation doesn't require any changes to the
10059 * vm_map_entry_t's, so the read lock is sufficient.
10060 */
10061
10062 vm_map_lock_read(map);
10063
10064 /*
10065 * The madvise semantics require that the address range be fully
10066 * allocated with no holes. Otherwise, we're required to return
10067 * an error.
10068 */
10069
10070 if (vm_map_range_check(map, start, end, &entry)) {
10071
10072 /*
10073 * Examine each vm_map_entry_t in the range.
10074 */
10075
10076 for (; entry->vme_start < end; start += len, entry = entry->vme_next) {
10077
10078 /*
10079 * The first time through, the start address could be anywhere within the
10080 * vm_map_entry we found. So adjust the offset to correspond. After that,
10081 * the offset will always be zero to correspond to the beginning of the current
10082 * vm_map_entry.
10083 */
10084
10085 offset = (start - entry->vme_start) + entry->offset;
10086
10087 /*
10088 * Set the length so we don't go beyond the end of the map_entry or beyond the
10089 * end of the range we were given. This range could span also multiple map
10090 * entries all of which map different files, so make sure we only do the right
10091 * amount of I/O for each object. Note that it's possible for there to be
10092 * multiple map entries all referring to the same object but with different
10093 * page permissions, but it's not worth trying to optimize that case.
10094 */
10095
10096 len = MIN(entry->vme_end - start, end - start);
10097
10098 if ((vm_size_t) len != len) {
10099 /* 32-bit overflow */
10100 len = (vm_size_t) (0 - PAGE_SIZE);
10101 }
10102 fault_info.cluster_size = (vm_size_t) len;
10103 fault_info.lo_offset = offset;
10104 fault_info.hi_offset = offset + len;
10105 fault_info.user_tag = entry->alias;
10106
10107 /*
10108 * If there's no read permission to this mapping, then just skip it.
10109 */
10110
10111 if ((entry->protection & VM_PROT_READ) == 0) {
10112 continue;
10113 }
10114
10115 /*
10116 * Find the file object backing this map entry. If there is none,
10117 * then we simply ignore the "will need" advice for this entry and
10118 * go on to the next one.
10119 */
10120
10121 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10122 continue;
10123 }
10124
10125 vm_object_paging_begin(object);
10126 pager = object->pager;
10127 vm_object_unlock(object);
10128
10129 /*
10130 * Get the data from the object asynchronously.
10131 *
10132 * Note that memory_object_data_request() places limits on the amount
10133 * of I/O it will do. Regardless of the len we specified, it won't do
10134 * more than MAX_UPL_TRANSFER and it silently truncates the len to that
10135 * size. This isn't necessarily bad since madvise shouldn't really be
10136 * used to page in unlimited amounts of data. Other Unix variants limit
10137 * the willneed case as well. If this turns out to be an issue for
10138 * developers, then we can always adjust the policy here and still be
10139 * backwards compatible since this is all just "advice".
10140 */
10141
10142 kr = memory_object_data_request(
10143 pager,
10144 offset + object->paging_offset,
10145 0, /* ignored */
10146 VM_PROT_READ,
10147 (memory_object_fault_info_t)&fault_info);
10148
10149 vm_object_lock(object);
10150 vm_object_paging_end(object);
10151 vm_object_unlock(object);
10152
10153 /*
10154 * If we couldn't do the I/O for some reason, just give up on the
10155 * madvise. We still return success to the user since madvise isn't
10156 * supposed to fail when the advice can't be taken.
10157 */
10158
10159 if (kr != KERN_SUCCESS) {
10160 break;
10161 }
10162 }
10163
10164 kr = KERN_SUCCESS;
10165 } else
10166 kr = KERN_INVALID_ADDRESS;
10167
10168 vm_map_unlock_read(map);
10169 return kr;
10170 }
10171
10172 static boolean_t
10173 vm_map_entry_is_reusable(
10174 vm_map_entry_t entry)
10175 {
10176 vm_object_t object;
10177
10178 if (entry->is_shared ||
10179 entry->is_sub_map ||
10180 entry->in_transition ||
10181 entry->protection != VM_PROT_DEFAULT ||
10182 entry->max_protection != VM_PROT_ALL ||
10183 entry->inheritance != VM_INHERIT_DEFAULT ||
10184 entry->no_cache ||
10185 entry->permanent ||
10186 entry->superpage_size != 0 ||
10187 entry->zero_wired_pages ||
10188 entry->wired_count != 0 ||
10189 entry->user_wired_count != 0) {
10190 return FALSE;
10191 }
10192
10193 object = entry->object.vm_object;
10194 if (object == VM_OBJECT_NULL) {
10195 return TRUE;
10196 }
10197 if (object->ref_count == 1 &&
10198 object->wired_page_count == 0 &&
10199 object->copy == VM_OBJECT_NULL &&
10200 object->shadow == VM_OBJECT_NULL &&
10201 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10202 object->internal &&
10203 !object->true_share &&
10204 object->wimg_bits == VM_WIMG_DEFAULT &&
10205 !object->code_signed) {
10206 return TRUE;
10207 }
10208 return FALSE;
10209
10210
10211 }
10212
10213 static kern_return_t
10214 vm_map_reuse_pages(
10215 vm_map_t map,
10216 vm_map_offset_t start,
10217 vm_map_offset_t end)
10218 {
10219 vm_map_entry_t entry;
10220 vm_object_t object;
10221 vm_object_offset_t start_offset, end_offset;
10222
10223 /*
10224 * The MADV_REUSE operation doesn't require any changes to the
10225 * vm_map_entry_t's, so the read lock is sufficient.
10226 */
10227
10228 vm_map_lock_read(map);
10229
10230 /*
10231 * The madvise semantics require that the address range be fully
10232 * allocated with no holes. Otherwise, we're required to return
10233 * an error.
10234 */
10235
10236 if (!vm_map_range_check(map, start, end, &entry)) {
10237 vm_map_unlock_read(map);
10238 vm_page_stats_reusable.reuse_pages_failure++;
10239 return KERN_INVALID_ADDRESS;
10240 }
10241
10242 /*
10243 * Examine each vm_map_entry_t in the range.
10244 */
10245 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10246 entry = entry->vme_next) {
10247 /*
10248 * Sanity check on the VM map entry.
10249 */
10250 if (! vm_map_entry_is_reusable(entry)) {
10251 vm_map_unlock_read(map);
10252 vm_page_stats_reusable.reuse_pages_failure++;
10253 return KERN_INVALID_ADDRESS;
10254 }
10255
10256 /*
10257 * The first time through, the start address could be anywhere
10258 * within the vm_map_entry we found. So adjust the offset to
10259 * correspond.
10260 */
10261 if (entry->vme_start < start) {
10262 start_offset = start - entry->vme_start;
10263 } else {
10264 start_offset = 0;
10265 }
10266 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10267 start_offset += entry->offset;
10268 end_offset += entry->offset;
10269
10270 object = entry->object.vm_object;
10271 if (object != VM_OBJECT_NULL) {
10272 vm_object_lock(object);
10273 vm_object_reuse_pages(object, start_offset, end_offset,
10274 TRUE);
10275 vm_object_unlock(object);
10276 }
10277
10278 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10279 /*
10280 * XXX
10281 * We do not hold the VM map exclusively here.
10282 * The "alias" field is not that critical, so it's
10283 * safe to update it here, as long as it is the only
10284 * one that can be modified while holding the VM map
10285 * "shared".
10286 */
10287 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10288 }
10289 }
10290
10291 vm_map_unlock_read(map);
10292 vm_page_stats_reusable.reuse_pages_success++;
10293 return KERN_SUCCESS;
10294 }
10295
10296
10297 static kern_return_t
10298 vm_map_reusable_pages(
10299 vm_map_t map,
10300 vm_map_offset_t start,
10301 vm_map_offset_t end)
10302 {
10303 vm_map_entry_t entry;
10304 vm_object_t object;
10305 vm_object_offset_t start_offset, end_offset;
10306
10307 /*
10308 * The MADV_REUSABLE operation doesn't require any changes to the
10309 * vm_map_entry_t's, so the read lock is sufficient.
10310 */
10311
10312 vm_map_lock_read(map);
10313
10314 /*
10315 * The madvise semantics require that the address range be fully
10316 * allocated with no holes. Otherwise, we're required to return
10317 * an error.
10318 */
10319
10320 if (!vm_map_range_check(map, start, end, &entry)) {
10321 vm_map_unlock_read(map);
10322 vm_page_stats_reusable.reusable_pages_failure++;
10323 return KERN_INVALID_ADDRESS;
10324 }
10325
10326 /*
10327 * Examine each vm_map_entry_t in the range.
10328 */
10329 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10330 entry = entry->vme_next) {
10331 int kill_pages = 0;
10332
10333 /*
10334 * Sanity check on the VM map entry.
10335 */
10336 if (! vm_map_entry_is_reusable(entry)) {
10337 vm_map_unlock_read(map);
10338 vm_page_stats_reusable.reusable_pages_failure++;
10339 return KERN_INVALID_ADDRESS;
10340 }
10341
10342 /*
10343 * The first time through, the start address could be anywhere
10344 * within the vm_map_entry we found. So adjust the offset to
10345 * correspond.
10346 */
10347 if (entry->vme_start < start) {
10348 start_offset = start - entry->vme_start;
10349 } else {
10350 start_offset = 0;
10351 }
10352 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10353 start_offset += entry->offset;
10354 end_offset += entry->offset;
10355
10356 object = entry->object.vm_object;
10357 if (object == VM_OBJECT_NULL)
10358 continue;
10359
10360
10361 vm_object_lock(object);
10362 if (object->ref_count == 1 && !object->shadow)
10363 kill_pages = 1;
10364 else
10365 kill_pages = -1;
10366 if (kill_pages != -1) {
10367 vm_object_deactivate_pages(object,
10368 start_offset,
10369 end_offset - start_offset,
10370 kill_pages,
10371 TRUE /*reusable_pages*/);
10372 } else {
10373 vm_page_stats_reusable.reusable_pages_shared++;
10374 }
10375 vm_object_unlock(object);
10376
10377 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10378 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10379 /*
10380 * XXX
10381 * We do not hold the VM map exclusively here.
10382 * The "alias" field is not that critical, so it's
10383 * safe to update it here, as long as it is the only
10384 * one that can be modified while holding the VM map
10385 * "shared".
10386 */
10387 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10388 }
10389 }
10390
10391 vm_map_unlock_read(map);
10392 vm_page_stats_reusable.reusable_pages_success++;
10393 return KERN_SUCCESS;
10394 }
10395
10396
10397 static kern_return_t
10398 vm_map_can_reuse(
10399 vm_map_t map,
10400 vm_map_offset_t start,
10401 vm_map_offset_t end)
10402 {
10403 vm_map_entry_t entry;
10404
10405 /*
10406 * The MADV_REUSABLE operation doesn't require any changes to the
10407 * vm_map_entry_t's, so the read lock is sufficient.
10408 */
10409
10410 vm_map_lock_read(map);
10411
10412 /*
10413 * The madvise semantics require that the address range be fully
10414 * allocated with no holes. Otherwise, we're required to return
10415 * an error.
10416 */
10417
10418 if (!vm_map_range_check(map, start, end, &entry)) {
10419 vm_map_unlock_read(map);
10420 vm_page_stats_reusable.can_reuse_failure++;
10421 return KERN_INVALID_ADDRESS;
10422 }
10423
10424 /*
10425 * Examine each vm_map_entry_t in the range.
10426 */
10427 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10428 entry = entry->vme_next) {
10429 /*
10430 * Sanity check on the VM map entry.
10431 */
10432 if (! vm_map_entry_is_reusable(entry)) {
10433 vm_map_unlock_read(map);
10434 vm_page_stats_reusable.can_reuse_failure++;
10435 return KERN_INVALID_ADDRESS;
10436 }
10437 }
10438
10439 vm_map_unlock_read(map);
10440 vm_page_stats_reusable.can_reuse_success++;
10441 return KERN_SUCCESS;
10442 }
10443
10444
10445
10446 #include <mach_kdb.h>
10447 #if MACH_KDB
10448 #include <ddb/db_output.h>
10449 #include <vm/vm_print.h>
10450
10451 #define printf db_printf
10452
10453 /*
10454 * Forward declarations for internal functions.
10455 */
10456 extern void vm_map_links_print(
10457 struct vm_map_links *links);
10458
10459 extern void vm_map_header_print(
10460 struct vm_map_header *header);
10461
10462 extern void vm_map_entry_print(
10463 vm_map_entry_t entry);
10464
10465 extern void vm_follow_entry(
10466 vm_map_entry_t entry);
10467
10468 extern void vm_follow_map(
10469 vm_map_t map);
10470
10471 /*
10472 * vm_map_links_print: [ debug ]
10473 */
10474 void
10475 vm_map_links_print(
10476 struct vm_map_links *links)
10477 {
10478 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10479 links->prev,
10480 links->next,
10481 (unsigned long long)links->start,
10482 (unsigned long long)links->end);
10483 }
10484
10485 /*
10486 * vm_map_header_print: [ debug ]
10487 */
10488 void
10489 vm_map_header_print(
10490 struct vm_map_header *header)
10491 {
10492 vm_map_links_print(&header->links);
10493 iprintf("nentries = %08X, %sentries_pageable\n",
10494 header->nentries,
10495 (header->entries_pageable ? "" : "!"));
10496 }
10497
10498 /*
10499 * vm_follow_entry: [ debug ]
10500 */
10501 void
10502 vm_follow_entry(
10503 vm_map_entry_t entry)
10504 {
10505 int shadows;
10506
10507 iprintf("map entry %08X\n", entry);
10508
10509 db_indent += 2;
10510
10511 shadows = vm_follow_object(entry->object.vm_object);
10512 iprintf("Total objects : %d\n",shadows);
10513
10514 db_indent -= 2;
10515 }
10516
10517 /*
10518 * vm_map_entry_print: [ debug ]
10519 */
10520 void
10521 vm_map_entry_print(
10522 register vm_map_entry_t entry)
10523 {
10524 static const char *inheritance_name[4] =
10525 { "share", "copy", "none", "?"};
10526 static const char *behavior_name[4] =
10527 { "dflt", "rand", "seqtl", "rseqntl" };
10528
10529 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10530
10531 db_indent += 2;
10532
10533 vm_map_links_print(&entry->links);
10534
10535 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10536 (unsigned long long)entry->vme_start,
10537 (unsigned long long)entry->vme_end,
10538 entry->protection,
10539 entry->max_protection,
10540 inheritance_name[(entry->inheritance & 0x3)]);
10541
10542 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10543 behavior_name[(entry->behavior & 0x3)],
10544 entry->wired_count,
10545 entry->user_wired_count);
10546 iprintf("%sin_transition, %sneeds_wakeup\n",
10547 (entry->in_transition ? "" : "!"),
10548 (entry->needs_wakeup ? "" : "!"));
10549
10550 if (entry->is_sub_map) {
10551 iprintf("submap = %08X - offset = %016llX\n",
10552 entry->object.sub_map,
10553 (unsigned long long)entry->offset);
10554 } else {
10555 iprintf("object = %08X offset = %016llX - ",
10556 entry->object.vm_object,
10557 (unsigned long long)entry->offset);
10558 printf("%sis_shared, %sneeds_copy\n",
10559 (entry->is_shared ? "" : "!"),
10560 (entry->needs_copy ? "" : "!"));
10561 }
10562
10563 db_indent -= 2;
10564 }
10565
10566 /*
10567 * vm_follow_map: [ debug ]
10568 */
10569 void
10570 vm_follow_map(
10571 vm_map_t map)
10572 {
10573 register vm_map_entry_t entry;
10574
10575 iprintf("task map %08X\n", map);
10576
10577 db_indent += 2;
10578
10579 for (entry = vm_map_first_entry(map);
10580 entry && entry != vm_map_to_entry(map);
10581 entry = entry->vme_next) {
10582 vm_follow_entry(entry);
10583 }
10584
10585 db_indent -= 2;
10586 }
10587
10588 /*
10589 * vm_map_print: [ debug ]
10590 */
10591 void
10592 vm_map_print(
10593 db_addr_t inmap)
10594 {
10595 register vm_map_entry_t entry;
10596 vm_map_t map;
10597 #if TASK_SWAPPER
10598 char *swstate;
10599 #endif /* TASK_SWAPPER */
10600
10601 map = (vm_map_t)(long)
10602 inmap; /* Make sure we have the right type */
10603
10604 iprintf("task map %08X\n", map);
10605
10606 db_indent += 2;
10607
10608 vm_map_header_print(&map->hdr);
10609
10610 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
10611 map->pmap,
10612 map->size,
10613 map->ref_count,
10614 map->hint,
10615 map->first_free);
10616
10617 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10618 (map->wait_for_space ? "" : "!"),
10619 (map->wiring_required ? "" : "!"),
10620 map->timestamp);
10621
10622 #if TASK_SWAPPER
10623 switch (map->sw_state) {
10624 case MAP_SW_IN:
10625 swstate = "SW_IN";
10626 break;
10627 case MAP_SW_OUT:
10628 swstate = "SW_OUT";
10629 break;
10630 default:
10631 swstate = "????";
10632 break;
10633 }
10634 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10635 #endif /* TASK_SWAPPER */
10636
10637 for (entry = vm_map_first_entry(map);
10638 entry && entry != vm_map_to_entry(map);
10639 entry = entry->vme_next) {
10640 vm_map_entry_print(entry);
10641 }
10642
10643 db_indent -= 2;
10644 }
10645
10646 /*
10647 * Routine: vm_map_copy_print
10648 * Purpose:
10649 * Pretty-print a copy object for ddb.
10650 */
10651
10652 void
10653 vm_map_copy_print(
10654 db_addr_t incopy)
10655 {
10656 vm_map_copy_t copy;
10657 vm_map_entry_t entry;
10658
10659 copy = (vm_map_copy_t)(long)
10660 incopy; /* Make sure we have the right type */
10661
10662 printf("copy object 0x%x\n", copy);
10663
10664 db_indent += 2;
10665
10666 iprintf("type=%d", copy->type);
10667 switch (copy->type) {
10668 case VM_MAP_COPY_ENTRY_LIST:
10669 printf("[entry_list]");
10670 break;
10671
10672 case VM_MAP_COPY_OBJECT:
10673 printf("[object]");
10674 break;
10675
10676 case VM_MAP_COPY_KERNEL_BUFFER:
10677 printf("[kernel_buffer]");
10678 break;
10679
10680 default:
10681 printf("[bad type]");
10682 break;
10683 }
10684 printf(", offset=0x%llx", (unsigned long long)copy->offset);
10685 printf(", size=0x%x\n", copy->size);
10686
10687 switch (copy->type) {
10688 case VM_MAP_COPY_ENTRY_LIST:
10689 vm_map_header_print(&copy->cpy_hdr);
10690 for (entry = vm_map_copy_first_entry(copy);
10691 entry && entry != vm_map_copy_to_entry(copy);
10692 entry = entry->vme_next) {
10693 vm_map_entry_print(entry);
10694 }
10695 break;
10696
10697 case VM_MAP_COPY_OBJECT:
10698 iprintf("object=0x%x\n", copy->cpy_object);
10699 break;
10700
10701 case VM_MAP_COPY_KERNEL_BUFFER:
10702 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
10703 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
10704 break;
10705
10706 }
10707
10708 db_indent -=2;
10709 }
10710
10711 /*
10712 * db_vm_map_total_size(map) [ debug ]
10713 *
10714 * return the total virtual size (in bytes) of the map
10715 */
10716 vm_map_size_t
10717 db_vm_map_total_size(
10718 db_addr_t inmap)
10719 {
10720 vm_map_entry_t entry;
10721 vm_map_size_t total;
10722 vm_map_t map;
10723
10724 map = (vm_map_t)(long)
10725 inmap; /* Make sure we have the right type */
10726
10727 total = 0;
10728 for (entry = vm_map_first_entry(map);
10729 entry != vm_map_to_entry(map);
10730 entry = entry->vme_next) {
10731 total += entry->vme_end - entry->vme_start;
10732 }
10733
10734 return total;
10735 }
10736
10737 #endif /* MACH_KDB */
10738
10739 /*
10740 * Routine: vm_map_entry_insert
10741 *
10742 * Descritpion: This routine inserts a new vm_entry in a locked map.
10743 */
10744 vm_map_entry_t
10745 vm_map_entry_insert(
10746 vm_map_t map,
10747 vm_map_entry_t insp_entry,
10748 vm_map_offset_t start,
10749 vm_map_offset_t end,
10750 vm_object_t object,
10751 vm_object_offset_t offset,
10752 boolean_t needs_copy,
10753 boolean_t is_shared,
10754 boolean_t in_transition,
10755 vm_prot_t cur_protection,
10756 vm_prot_t max_protection,
10757 vm_behavior_t behavior,
10758 vm_inherit_t inheritance,
10759 unsigned wired_count,
10760 boolean_t no_cache,
10761 boolean_t permanent,
10762 unsigned int superpage_size)
10763 {
10764 vm_map_entry_t new_entry;
10765
10766 assert(insp_entry != (vm_map_entry_t)0);
10767
10768 new_entry = vm_map_entry_create(map);
10769
10770 new_entry->vme_start = start;
10771 new_entry->vme_end = end;
10772 assert(page_aligned(new_entry->vme_start));
10773 assert(page_aligned(new_entry->vme_end));
10774
10775 new_entry->object.vm_object = object;
10776 new_entry->offset = offset;
10777 new_entry->is_shared = is_shared;
10778 new_entry->is_sub_map = FALSE;
10779 new_entry->needs_copy = needs_copy;
10780 new_entry->in_transition = in_transition;
10781 new_entry->needs_wakeup = FALSE;
10782 new_entry->inheritance = inheritance;
10783 new_entry->protection = cur_protection;
10784 new_entry->max_protection = max_protection;
10785 new_entry->behavior = behavior;
10786 new_entry->wired_count = wired_count;
10787 new_entry->user_wired_count = 0;
10788 new_entry->use_pmap = FALSE;
10789 new_entry->alias = 0;
10790 new_entry->zero_wired_pages = FALSE;
10791 new_entry->no_cache = no_cache;
10792 new_entry->permanent = permanent;
10793 new_entry->superpage_size = superpage_size;
10794
10795 /*
10796 * Insert the new entry into the list.
10797 */
10798
10799 vm_map_entry_link(map, insp_entry, new_entry);
10800 map->size += end - start;
10801
10802 /*
10803 * Update the free space hint and the lookup hint.
10804 */
10805
10806 SAVE_HINT_MAP_WRITE(map, new_entry);
10807 return new_entry;
10808 }
10809
10810 /*
10811 * Routine: vm_map_remap_extract
10812 *
10813 * Descritpion: This routine returns a vm_entry list from a map.
10814 */
10815 static kern_return_t
10816 vm_map_remap_extract(
10817 vm_map_t map,
10818 vm_map_offset_t addr,
10819 vm_map_size_t size,
10820 boolean_t copy,
10821 struct vm_map_header *map_header,
10822 vm_prot_t *cur_protection,
10823 vm_prot_t *max_protection,
10824 /* What, no behavior? */
10825 vm_inherit_t inheritance,
10826 boolean_t pageable)
10827 {
10828 kern_return_t result;
10829 vm_map_size_t mapped_size;
10830 vm_map_size_t tmp_size;
10831 vm_map_entry_t src_entry; /* result of last map lookup */
10832 vm_map_entry_t new_entry;
10833 vm_object_offset_t offset;
10834 vm_map_offset_t map_address;
10835 vm_map_offset_t src_start; /* start of entry to map */
10836 vm_map_offset_t src_end; /* end of region to be mapped */
10837 vm_object_t object;
10838 vm_map_version_t version;
10839 boolean_t src_needs_copy;
10840 boolean_t new_entry_needs_copy;
10841
10842 assert(map != VM_MAP_NULL);
10843 assert(size != 0 && size == vm_map_round_page(size));
10844 assert(inheritance == VM_INHERIT_NONE ||
10845 inheritance == VM_INHERIT_COPY ||
10846 inheritance == VM_INHERIT_SHARE);
10847
10848 /*
10849 * Compute start and end of region.
10850 */
10851 src_start = vm_map_trunc_page(addr);
10852 src_end = vm_map_round_page(src_start + size);
10853
10854 /*
10855 * Initialize map_header.
10856 */
10857 map_header->links.next = (struct vm_map_entry *)&map_header->links;
10858 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
10859 map_header->nentries = 0;
10860 map_header->entries_pageable = pageable;
10861
10862 *cur_protection = VM_PROT_ALL;
10863 *max_protection = VM_PROT_ALL;
10864
10865 map_address = 0;
10866 mapped_size = 0;
10867 result = KERN_SUCCESS;
10868
10869 /*
10870 * The specified source virtual space might correspond to
10871 * multiple map entries, need to loop on them.
10872 */
10873 vm_map_lock(map);
10874 while (mapped_size != size) {
10875 vm_map_size_t entry_size;
10876
10877 /*
10878 * Find the beginning of the region.
10879 */
10880 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
10881 result = KERN_INVALID_ADDRESS;
10882 break;
10883 }
10884
10885 if (src_start < src_entry->vme_start ||
10886 (mapped_size && src_start != src_entry->vme_start)) {
10887 result = KERN_INVALID_ADDRESS;
10888 break;
10889 }
10890
10891 tmp_size = size - mapped_size;
10892 if (src_end > src_entry->vme_end)
10893 tmp_size -= (src_end - src_entry->vme_end);
10894
10895 entry_size = (vm_map_size_t)(src_entry->vme_end -
10896 src_entry->vme_start);
10897
10898 if(src_entry->is_sub_map) {
10899 vm_map_reference(src_entry->object.sub_map);
10900 object = VM_OBJECT_NULL;
10901 } else {
10902 object = src_entry->object.vm_object;
10903
10904 if (object == VM_OBJECT_NULL) {
10905 object = vm_object_allocate(entry_size);
10906 src_entry->offset = 0;
10907 src_entry->object.vm_object = object;
10908 } else if (object->copy_strategy !=
10909 MEMORY_OBJECT_COPY_SYMMETRIC) {
10910 /*
10911 * We are already using an asymmetric
10912 * copy, and therefore we already have
10913 * the right object.
10914 */
10915 assert(!src_entry->needs_copy);
10916 } else if (src_entry->needs_copy || object->shadowed ||
10917 (object->internal && !object->true_share &&
10918 !src_entry->is_shared &&
10919 object->size > entry_size)) {
10920
10921 vm_object_shadow(&src_entry->object.vm_object,
10922 &src_entry->offset,
10923 entry_size);
10924
10925 if (!src_entry->needs_copy &&
10926 (src_entry->protection & VM_PROT_WRITE)) {
10927 vm_prot_t prot;
10928
10929 prot = src_entry->protection & ~VM_PROT_WRITE;
10930
10931 if (override_nx(map, src_entry->alias) && prot)
10932 prot |= VM_PROT_EXECUTE;
10933
10934 if(map->mapped) {
10935 vm_object_pmap_protect(
10936 src_entry->object.vm_object,
10937 src_entry->offset,
10938 entry_size,
10939 PMAP_NULL,
10940 src_entry->vme_start,
10941 prot);
10942 } else {
10943 pmap_protect(vm_map_pmap(map),
10944 src_entry->vme_start,
10945 src_entry->vme_end,
10946 prot);
10947 }
10948 }
10949
10950 object = src_entry->object.vm_object;
10951 src_entry->needs_copy = FALSE;
10952 }
10953
10954
10955 vm_object_lock(object);
10956 vm_object_reference_locked(object); /* object ref. for new entry */
10957 if (object->copy_strategy ==
10958 MEMORY_OBJECT_COPY_SYMMETRIC) {
10959 object->copy_strategy =
10960 MEMORY_OBJECT_COPY_DELAY;
10961 }
10962 vm_object_unlock(object);
10963 }
10964
10965 offset = src_entry->offset + (src_start - src_entry->vme_start);
10966
10967 new_entry = _vm_map_entry_create(map_header);
10968 vm_map_entry_copy(new_entry, src_entry);
10969 new_entry->use_pmap = FALSE; /* clr address space specifics */
10970
10971 new_entry->vme_start = map_address;
10972 new_entry->vme_end = map_address + tmp_size;
10973 new_entry->inheritance = inheritance;
10974 new_entry->offset = offset;
10975
10976 /*
10977 * The new region has to be copied now if required.
10978 */
10979 RestartCopy:
10980 if (!copy) {
10981 src_entry->is_shared = TRUE;
10982 new_entry->is_shared = TRUE;
10983 if (!(new_entry->is_sub_map))
10984 new_entry->needs_copy = FALSE;
10985
10986 } else if (src_entry->is_sub_map) {
10987 /* make this a COW sub_map if not already */
10988 new_entry->needs_copy = TRUE;
10989 object = VM_OBJECT_NULL;
10990 } else if (src_entry->wired_count == 0 &&
10991 vm_object_copy_quickly(&new_entry->object.vm_object,
10992 new_entry->offset,
10993 (new_entry->vme_end -
10994 new_entry->vme_start),
10995 &src_needs_copy,
10996 &new_entry_needs_copy)) {
10997
10998 new_entry->needs_copy = new_entry_needs_copy;
10999 new_entry->is_shared = FALSE;
11000
11001 /*
11002 * Handle copy_on_write semantics.
11003 */
11004 if (src_needs_copy && !src_entry->needs_copy) {
11005 vm_prot_t prot;
11006
11007 prot = src_entry->protection & ~VM_PROT_WRITE;
11008
11009 if (override_nx(map, src_entry->alias) && prot)
11010 prot |= VM_PROT_EXECUTE;
11011
11012 vm_object_pmap_protect(object,
11013 offset,
11014 entry_size,
11015 ((src_entry->is_shared
11016 || map->mapped) ?
11017 PMAP_NULL : map->pmap),
11018 src_entry->vme_start,
11019 prot);
11020
11021 src_entry->needs_copy = TRUE;
11022 }
11023 /*
11024 * Throw away the old object reference of the new entry.
11025 */
11026 vm_object_deallocate(object);
11027
11028 } else {
11029 new_entry->is_shared = FALSE;
11030
11031 /*
11032 * The map can be safely unlocked since we
11033 * already hold a reference on the object.
11034 *
11035 * Record the timestamp of the map for later
11036 * verification, and unlock the map.
11037 */
11038 version.main_timestamp = map->timestamp;
11039 vm_map_unlock(map); /* Increments timestamp once! */
11040
11041 /*
11042 * Perform the copy.
11043 */
11044 if (src_entry->wired_count > 0) {
11045 vm_object_lock(object);
11046 result = vm_object_copy_slowly(
11047 object,
11048 offset,
11049 entry_size,
11050 THREAD_UNINT,
11051 &new_entry->object.vm_object);
11052
11053 new_entry->offset = 0;
11054 new_entry->needs_copy = FALSE;
11055 } else {
11056 result = vm_object_copy_strategically(
11057 object,
11058 offset,
11059 entry_size,
11060 &new_entry->object.vm_object,
11061 &new_entry->offset,
11062 &new_entry_needs_copy);
11063
11064 new_entry->needs_copy = new_entry_needs_copy;
11065 }
11066
11067 /*
11068 * Throw away the old object reference of the new entry.
11069 */
11070 vm_object_deallocate(object);
11071
11072 if (result != KERN_SUCCESS &&
11073 result != KERN_MEMORY_RESTART_COPY) {
11074 _vm_map_entry_dispose(map_header, new_entry);
11075 break;
11076 }
11077
11078 /*
11079 * Verify that the map has not substantially
11080 * changed while the copy was being made.
11081 */
11082
11083 vm_map_lock(map);
11084 if (version.main_timestamp + 1 != map->timestamp) {
11085 /*
11086 * Simple version comparison failed.
11087 *
11088 * Retry the lookup and verify that the
11089 * same object/offset are still present.
11090 */
11091 vm_object_deallocate(new_entry->
11092 object.vm_object);
11093 _vm_map_entry_dispose(map_header, new_entry);
11094 if (result == KERN_MEMORY_RESTART_COPY)
11095 result = KERN_SUCCESS;
11096 continue;
11097 }
11098
11099 if (result == KERN_MEMORY_RESTART_COPY) {
11100 vm_object_reference(object);
11101 goto RestartCopy;
11102 }
11103 }
11104
11105 _vm_map_entry_link(map_header,
11106 map_header->links.prev, new_entry);
11107
11108 *cur_protection &= src_entry->protection;
11109 *max_protection &= src_entry->max_protection;
11110
11111 map_address += tmp_size;
11112 mapped_size += tmp_size;
11113 src_start += tmp_size;
11114
11115 } /* end while */
11116
11117 vm_map_unlock(map);
11118 if (result != KERN_SUCCESS) {
11119 /*
11120 * Free all allocated elements.
11121 */
11122 for (src_entry = map_header->links.next;
11123 src_entry != (struct vm_map_entry *)&map_header->links;
11124 src_entry = new_entry) {
11125 new_entry = src_entry->vme_next;
11126 _vm_map_entry_unlink(map_header, src_entry);
11127 vm_object_deallocate(src_entry->object.vm_object);
11128 _vm_map_entry_dispose(map_header, src_entry);
11129 }
11130 }
11131 return result;
11132 }
11133
11134 /*
11135 * Routine: vm_remap
11136 *
11137 * Map portion of a task's address space.
11138 * Mapped region must not overlap more than
11139 * one vm memory object. Protections and
11140 * inheritance attributes remain the same
11141 * as in the original task and are out parameters.
11142 * Source and Target task can be identical
11143 * Other attributes are identical as for vm_map()
11144 */
11145 kern_return_t
11146 vm_map_remap(
11147 vm_map_t target_map,
11148 vm_map_address_t *address,
11149 vm_map_size_t size,
11150 vm_map_offset_t mask,
11151 boolean_t anywhere,
11152 vm_map_t src_map,
11153 vm_map_offset_t memory_address,
11154 boolean_t copy,
11155 vm_prot_t *cur_protection,
11156 vm_prot_t *max_protection,
11157 vm_inherit_t inheritance)
11158 {
11159 kern_return_t result;
11160 vm_map_entry_t entry;
11161 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
11162 vm_map_entry_t new_entry;
11163 struct vm_map_header map_header;
11164
11165 if (target_map == VM_MAP_NULL)
11166 return KERN_INVALID_ARGUMENT;
11167
11168 switch (inheritance) {
11169 case VM_INHERIT_NONE:
11170 case VM_INHERIT_COPY:
11171 case VM_INHERIT_SHARE:
11172 if (size != 0 && src_map != VM_MAP_NULL)
11173 break;
11174 /*FALL THRU*/
11175 default:
11176 return KERN_INVALID_ARGUMENT;
11177 }
11178
11179 size = vm_map_round_page(size);
11180
11181 result = vm_map_remap_extract(src_map, memory_address,
11182 size, copy, &map_header,
11183 cur_protection,
11184 max_protection,
11185 inheritance,
11186 target_map->hdr.
11187 entries_pageable);
11188
11189 if (result != KERN_SUCCESS) {
11190 return result;
11191 }
11192
11193 /*
11194 * Allocate/check a range of free virtual address
11195 * space for the target
11196 */
11197 *address = vm_map_trunc_page(*address);
11198 vm_map_lock(target_map);
11199 result = vm_map_remap_range_allocate(target_map, address, size,
11200 mask, anywhere, &insp_entry);
11201
11202 for (entry = map_header.links.next;
11203 entry != (struct vm_map_entry *)&map_header.links;
11204 entry = new_entry) {
11205 new_entry = entry->vme_next;
11206 _vm_map_entry_unlink(&map_header, entry);
11207 if (result == KERN_SUCCESS) {
11208 entry->vme_start += *address;
11209 entry->vme_end += *address;
11210 vm_map_entry_link(target_map, insp_entry, entry);
11211 insp_entry = entry;
11212 } else {
11213 if (!entry->is_sub_map) {
11214 vm_object_deallocate(entry->object.vm_object);
11215 } else {
11216 vm_map_deallocate(entry->object.sub_map);
11217 }
11218 _vm_map_entry_dispose(&map_header, entry);
11219 }
11220 }
11221
11222 if (result == KERN_SUCCESS) {
11223 target_map->size += size;
11224 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11225 }
11226 vm_map_unlock(target_map);
11227
11228 if (result == KERN_SUCCESS && target_map->wiring_required)
11229 result = vm_map_wire(target_map, *address,
11230 *address + size, *cur_protection, TRUE);
11231 return result;
11232 }
11233
11234 /*
11235 * Routine: vm_map_remap_range_allocate
11236 *
11237 * Description:
11238 * Allocate a range in the specified virtual address map.
11239 * returns the address and the map entry just before the allocated
11240 * range
11241 *
11242 * Map must be locked.
11243 */
11244
11245 static kern_return_t
11246 vm_map_remap_range_allocate(
11247 vm_map_t map,
11248 vm_map_address_t *address, /* IN/OUT */
11249 vm_map_size_t size,
11250 vm_map_offset_t mask,
11251 boolean_t anywhere,
11252 vm_map_entry_t *map_entry) /* OUT */
11253 {
11254 register vm_map_entry_t entry;
11255 register vm_map_offset_t start;
11256 register vm_map_offset_t end;
11257
11258 StartAgain: ;
11259
11260 start = *address;
11261
11262 if (anywhere)
11263 {
11264 /*
11265 * Calculate the first possible address.
11266 */
11267
11268 if (start < map->min_offset)
11269 start = map->min_offset;
11270 if (start > map->max_offset)
11271 return(KERN_NO_SPACE);
11272
11273 /*
11274 * Look for the first possible address;
11275 * if there's already something at this
11276 * address, we have to start after it.
11277 */
11278
11279 assert(first_free_is_valid(map));
11280 if (start == map->min_offset) {
11281 if ((entry = map->first_free) != vm_map_to_entry(map))
11282 start = entry->vme_end;
11283 } else {
11284 vm_map_entry_t tmp_entry;
11285 if (vm_map_lookup_entry(map, start, &tmp_entry))
11286 start = tmp_entry->vme_end;
11287 entry = tmp_entry;
11288 }
11289
11290 /*
11291 * In any case, the "entry" always precedes
11292 * the proposed new region throughout the
11293 * loop:
11294 */
11295
11296 while (TRUE) {
11297 register vm_map_entry_t next;
11298
11299 /*
11300 * Find the end of the proposed new region.
11301 * Be sure we didn't go beyond the end, or
11302 * wrap around the address.
11303 */
11304
11305 end = ((start + mask) & ~mask);
11306 if (end < start)
11307 return(KERN_NO_SPACE);
11308 start = end;
11309 end += size;
11310
11311 if ((end > map->max_offset) || (end < start)) {
11312 if (map->wait_for_space) {
11313 if (size <= (map->max_offset -
11314 map->min_offset)) {
11315 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11316 vm_map_unlock(map);
11317 thread_block(THREAD_CONTINUE_NULL);
11318 vm_map_lock(map);
11319 goto StartAgain;
11320 }
11321 }
11322
11323 return(KERN_NO_SPACE);
11324 }
11325
11326 /*
11327 * If there are no more entries, we must win.
11328 */
11329
11330 next = entry->vme_next;
11331 if (next == vm_map_to_entry(map))
11332 break;
11333
11334 /*
11335 * If there is another entry, it must be
11336 * after the end of the potential new region.
11337 */
11338
11339 if (next->vme_start >= end)
11340 break;
11341
11342 /*
11343 * Didn't fit -- move to the next entry.
11344 */
11345
11346 entry = next;
11347 start = entry->vme_end;
11348 }
11349 *address = start;
11350 } else {
11351 vm_map_entry_t temp_entry;
11352
11353 /*
11354 * Verify that:
11355 * the address doesn't itself violate
11356 * the mask requirement.
11357 */
11358
11359 if ((start & mask) != 0)
11360 return(KERN_NO_SPACE);
11361
11362
11363 /*
11364 * ... the address is within bounds
11365 */
11366
11367 end = start + size;
11368
11369 if ((start < map->min_offset) ||
11370 (end > map->max_offset) ||
11371 (start >= end)) {
11372 return(KERN_INVALID_ADDRESS);
11373 }
11374
11375 /*
11376 * ... the starting address isn't allocated
11377 */
11378
11379 if (vm_map_lookup_entry(map, start, &temp_entry))
11380 return(KERN_NO_SPACE);
11381
11382 entry = temp_entry;
11383
11384 /*
11385 * ... the next region doesn't overlap the
11386 * end point.
11387 */
11388
11389 if ((entry->vme_next != vm_map_to_entry(map)) &&
11390 (entry->vme_next->vme_start < end))
11391 return(KERN_NO_SPACE);
11392 }
11393 *map_entry = entry;
11394 return(KERN_SUCCESS);
11395 }
11396
11397 /*
11398 * vm_map_switch:
11399 *
11400 * Set the address map for the current thread to the specified map
11401 */
11402
11403 vm_map_t
11404 vm_map_switch(
11405 vm_map_t map)
11406 {
11407 int mycpu;
11408 thread_t thread = current_thread();
11409 vm_map_t oldmap = thread->map;
11410
11411 mp_disable_preemption();
11412 mycpu = cpu_number();
11413
11414 /*
11415 * Deactivate the current map and activate the requested map
11416 */
11417 PMAP_SWITCH_USER(thread, map, mycpu);
11418
11419 mp_enable_preemption();
11420 return(oldmap);
11421 }
11422
11423
11424 /*
11425 * Routine: vm_map_write_user
11426 *
11427 * Description:
11428 * Copy out data from a kernel space into space in the
11429 * destination map. The space must already exist in the
11430 * destination map.
11431 * NOTE: This routine should only be called by threads
11432 * which can block on a page fault. i.e. kernel mode user
11433 * threads.
11434 *
11435 */
11436 kern_return_t
11437 vm_map_write_user(
11438 vm_map_t map,
11439 void *src_p,
11440 vm_map_address_t dst_addr,
11441 vm_size_t size)
11442 {
11443 kern_return_t kr = KERN_SUCCESS;
11444
11445 if(current_map() == map) {
11446 if (copyout(src_p, dst_addr, size)) {
11447 kr = KERN_INVALID_ADDRESS;
11448 }
11449 } else {
11450 vm_map_t oldmap;
11451
11452 /* take on the identity of the target map while doing */
11453 /* the transfer */
11454
11455 vm_map_reference(map);
11456 oldmap = vm_map_switch(map);
11457 if (copyout(src_p, dst_addr, size)) {
11458 kr = KERN_INVALID_ADDRESS;
11459 }
11460 vm_map_switch(oldmap);
11461 vm_map_deallocate(map);
11462 }
11463 return kr;
11464 }
11465
11466 /*
11467 * Routine: vm_map_read_user
11468 *
11469 * Description:
11470 * Copy in data from a user space source map into the
11471 * kernel map. The space must already exist in the
11472 * kernel map.
11473 * NOTE: This routine should only be called by threads
11474 * which can block on a page fault. i.e. kernel mode user
11475 * threads.
11476 *
11477 */
11478 kern_return_t
11479 vm_map_read_user(
11480 vm_map_t map,
11481 vm_map_address_t src_addr,
11482 void *dst_p,
11483 vm_size_t size)
11484 {
11485 kern_return_t kr = KERN_SUCCESS;
11486
11487 if(current_map() == map) {
11488 if (copyin(src_addr, dst_p, size)) {
11489 kr = KERN_INVALID_ADDRESS;
11490 }
11491 } else {
11492 vm_map_t oldmap;
11493
11494 /* take on the identity of the target map while doing */
11495 /* the transfer */
11496
11497 vm_map_reference(map);
11498 oldmap = vm_map_switch(map);
11499 if (copyin(src_addr, dst_p, size)) {
11500 kr = KERN_INVALID_ADDRESS;
11501 }
11502 vm_map_switch(oldmap);
11503 vm_map_deallocate(map);
11504 }
11505 return kr;
11506 }
11507
11508
11509 /*
11510 * vm_map_check_protection:
11511 *
11512 * Assert that the target map allows the specified
11513 * privilege on the entire address region given.
11514 * The entire region must be allocated.
11515 */
11516 boolean_t
11517 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11518 vm_map_offset_t end, vm_prot_t protection)
11519 {
11520 vm_map_entry_t entry;
11521 vm_map_entry_t tmp_entry;
11522
11523 vm_map_lock(map);
11524
11525 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11526 {
11527 vm_map_unlock(map);
11528 return (FALSE);
11529 }
11530
11531 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11532 vm_map_unlock(map);
11533 return(FALSE);
11534 }
11535
11536 entry = tmp_entry;
11537
11538 while (start < end) {
11539 if (entry == vm_map_to_entry(map)) {
11540 vm_map_unlock(map);
11541 return(FALSE);
11542 }
11543
11544 /*
11545 * No holes allowed!
11546 */
11547
11548 if (start < entry->vme_start) {
11549 vm_map_unlock(map);
11550 return(FALSE);
11551 }
11552
11553 /*
11554 * Check protection associated with entry.
11555 */
11556
11557 if ((entry->protection & protection) != protection) {
11558 vm_map_unlock(map);
11559 return(FALSE);
11560 }
11561
11562 /* go to next entry */
11563
11564 start = entry->vme_end;
11565 entry = entry->vme_next;
11566 }
11567 vm_map_unlock(map);
11568 return(TRUE);
11569 }
11570
11571 kern_return_t
11572 vm_map_purgable_control(
11573 vm_map_t map,
11574 vm_map_offset_t address,
11575 vm_purgable_t control,
11576 int *state)
11577 {
11578 vm_map_entry_t entry;
11579 vm_object_t object;
11580 kern_return_t kr;
11581
11582 /*
11583 * Vet all the input parameters and current type and state of the
11584 * underlaying object. Return with an error if anything is amiss.
11585 */
11586 if (map == VM_MAP_NULL)
11587 return(KERN_INVALID_ARGUMENT);
11588
11589 if (control != VM_PURGABLE_SET_STATE &&
11590 control != VM_PURGABLE_GET_STATE &&
11591 control != VM_PURGABLE_PURGE_ALL)
11592 return(KERN_INVALID_ARGUMENT);
11593
11594 if (control == VM_PURGABLE_PURGE_ALL) {
11595 vm_purgeable_object_purge_all();
11596 return KERN_SUCCESS;
11597 }
11598
11599 if (control == VM_PURGABLE_SET_STATE &&
11600 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11601 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11602 return(KERN_INVALID_ARGUMENT);
11603
11604 vm_map_lock_read(map);
11605
11606 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11607
11608 /*
11609 * Must pass a valid non-submap address.
11610 */
11611 vm_map_unlock_read(map);
11612 return(KERN_INVALID_ADDRESS);
11613 }
11614
11615 if ((entry->protection & VM_PROT_WRITE) == 0) {
11616 /*
11617 * Can't apply purgable controls to something you can't write.
11618 */
11619 vm_map_unlock_read(map);
11620 return(KERN_PROTECTION_FAILURE);
11621 }
11622
11623 object = entry->object.vm_object;
11624 if (object == VM_OBJECT_NULL) {
11625 /*
11626 * Object must already be present or it can't be purgable.
11627 */
11628 vm_map_unlock_read(map);
11629 return KERN_INVALID_ARGUMENT;
11630 }
11631
11632 vm_object_lock(object);
11633
11634 if (entry->offset != 0 ||
11635 entry->vme_end - entry->vme_start != object->size) {
11636 /*
11637 * Can only apply purgable controls to the whole (existing)
11638 * object at once.
11639 */
11640 vm_map_unlock_read(map);
11641 vm_object_unlock(object);
11642 return KERN_INVALID_ARGUMENT;
11643 }
11644
11645 vm_map_unlock_read(map);
11646
11647 kr = vm_object_purgable_control(object, control, state);
11648
11649 vm_object_unlock(object);
11650
11651 return kr;
11652 }
11653
11654 kern_return_t
11655 vm_map_page_query_internal(
11656 vm_map_t target_map,
11657 vm_map_offset_t offset,
11658 int *disposition,
11659 int *ref_count)
11660 {
11661 kern_return_t kr;
11662 vm_page_info_basic_data_t info;
11663 mach_msg_type_number_t count;
11664
11665 count = VM_PAGE_INFO_BASIC_COUNT;
11666 kr = vm_map_page_info(target_map,
11667 offset,
11668 VM_PAGE_INFO_BASIC,
11669 (vm_page_info_t) &info,
11670 &count);
11671 if (kr == KERN_SUCCESS) {
11672 *disposition = info.disposition;
11673 *ref_count = info.ref_count;
11674 } else {
11675 *disposition = 0;
11676 *ref_count = 0;
11677 }
11678
11679 return kr;
11680 }
11681
11682 kern_return_t
11683 vm_map_page_info(
11684 vm_map_t map,
11685 vm_map_offset_t offset,
11686 vm_page_info_flavor_t flavor,
11687 vm_page_info_t info,
11688 mach_msg_type_number_t *count)
11689 {
11690 vm_map_entry_t map_entry;
11691 vm_object_t object;
11692 vm_page_t m;
11693 kern_return_t kr;
11694 kern_return_t retval = KERN_SUCCESS;
11695 boolean_t top_object;
11696 int disposition;
11697 int ref_count;
11698 vm_object_id_t object_id;
11699 vm_page_info_basic_t basic_info;
11700 int depth;
11701
11702 switch (flavor) {
11703 case VM_PAGE_INFO_BASIC:
11704 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
11705 return KERN_INVALID_ARGUMENT;
11706 }
11707 break;
11708 default:
11709 return KERN_INVALID_ARGUMENT;
11710 }
11711
11712 disposition = 0;
11713 ref_count = 0;
11714 object_id = 0;
11715 top_object = TRUE;
11716 depth = 0;
11717
11718 retval = KERN_SUCCESS;
11719 offset = vm_map_trunc_page(offset);
11720
11721 vm_map_lock_read(map);
11722
11723 /*
11724 * First, find the map entry covering "offset", going down
11725 * submaps if necessary.
11726 */
11727 for (;;) {
11728 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
11729 vm_map_unlock_read(map);
11730 return KERN_INVALID_ADDRESS;
11731 }
11732 /* compute offset from this map entry's start */
11733 offset -= map_entry->vme_start;
11734 /* compute offset into this map entry's object (or submap) */
11735 offset += map_entry->offset;
11736
11737 if (map_entry->is_sub_map) {
11738 vm_map_t sub_map;
11739
11740 sub_map = map_entry->object.sub_map;
11741 vm_map_lock_read(sub_map);
11742 vm_map_unlock_read(map);
11743
11744 map = sub_map;
11745
11746 ref_count = MAX(ref_count, map->ref_count);
11747 continue;
11748 }
11749 break;
11750 }
11751
11752 object = map_entry->object.vm_object;
11753 if (object == VM_OBJECT_NULL) {
11754 /* no object -> no page */
11755 vm_map_unlock_read(map);
11756 goto done;
11757 }
11758
11759 vm_object_lock(object);
11760 vm_map_unlock_read(map);
11761
11762 /*
11763 * Go down the VM object shadow chain until we find the page
11764 * we're looking for.
11765 */
11766 for (;;) {
11767 ref_count = MAX(ref_count, object->ref_count);
11768
11769 m = vm_page_lookup(object, offset);
11770
11771 if (m != VM_PAGE_NULL) {
11772 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
11773 break;
11774 } else {
11775 #if MACH_PAGEMAP
11776 if (object->existence_map) {
11777 if (vm_external_state_get(object->existence_map,
11778 offset) ==
11779 VM_EXTERNAL_STATE_EXISTS) {
11780 /*
11781 * this page has been paged out
11782 */
11783 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11784 break;
11785 }
11786 } else
11787 #endif
11788 {
11789 if (object->internal &&
11790 object->alive &&
11791 !object->terminating &&
11792 object->pager_ready) {
11793
11794 memory_object_t pager;
11795
11796 vm_object_paging_begin(object);
11797 pager = object->pager;
11798 vm_object_unlock(object);
11799
11800 /*
11801 * Ask the default pager if
11802 * it has this page.
11803 */
11804 kr = memory_object_data_request(
11805 pager,
11806 offset + object->paging_offset,
11807 0, /* just poke the pager */
11808 VM_PROT_READ,
11809 NULL);
11810
11811 vm_object_lock(object);
11812 vm_object_paging_end(object);
11813
11814 if (kr == KERN_SUCCESS) {
11815 /* the default pager has it */
11816 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11817 break;
11818 }
11819 }
11820 }
11821
11822 if (object->shadow != VM_OBJECT_NULL) {
11823 vm_object_t shadow;
11824
11825 offset += object->shadow_offset;
11826 shadow = object->shadow;
11827
11828 vm_object_lock(shadow);
11829 vm_object_unlock(object);
11830
11831 object = shadow;
11832 top_object = FALSE;
11833 depth++;
11834 } else {
11835 // if (!object->internal)
11836 // break;
11837 // retval = KERN_FAILURE;
11838 // goto done_with_object;
11839 break;
11840 }
11841 }
11842 }
11843 /* The ref_count is not strictly accurate, it measures the number */
11844 /* of entities holding a ref on the object, they may not be mapping */
11845 /* the object or may not be mapping the section holding the */
11846 /* target page but its still a ball park number and though an over- */
11847 /* count, it picks up the copy-on-write cases */
11848
11849 /* We could also get a picture of page sharing from pmap_attributes */
11850 /* but this would under count as only faulted-in mappings would */
11851 /* show up. */
11852
11853 if (top_object == TRUE && object->shadow)
11854 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
11855
11856 if (! object->internal)
11857 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
11858
11859 if (m == VM_PAGE_NULL)
11860 goto done_with_object;
11861
11862 if (m->fictitious) {
11863 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
11864 goto done_with_object;
11865 }
11866 if (m->dirty || pmap_is_modified(m->phys_page))
11867 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
11868
11869 if (m->reference || pmap_is_referenced(m->phys_page))
11870 disposition |= VM_PAGE_QUERY_PAGE_REF;
11871
11872 if (m->speculative)
11873 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
11874
11875 if (m->cs_validated)
11876 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
11877 if (m->cs_tainted)
11878 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
11879
11880 done_with_object:
11881 vm_object_unlock(object);
11882 done:
11883
11884 switch (flavor) {
11885 case VM_PAGE_INFO_BASIC:
11886 basic_info = (vm_page_info_basic_t) info;
11887 basic_info->disposition = disposition;
11888 basic_info->ref_count = ref_count;
11889 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
11890 basic_info->offset = (memory_object_offset_t) offset;
11891 basic_info->depth = depth;
11892 break;
11893 }
11894
11895 return retval;
11896 }
11897
11898 /*
11899 * vm_map_msync
11900 *
11901 * Synchronises the memory range specified with its backing store
11902 * image by either flushing or cleaning the contents to the appropriate
11903 * memory manager engaging in a memory object synchronize dialog with
11904 * the manager. The client doesn't return until the manager issues
11905 * m_o_s_completed message. MIG Magically converts user task parameter
11906 * to the task's address map.
11907 *
11908 * interpretation of sync_flags
11909 * VM_SYNC_INVALIDATE - discard pages, only return precious
11910 * pages to manager.
11911 *
11912 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11913 * - discard pages, write dirty or precious
11914 * pages back to memory manager.
11915 *
11916 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11917 * - write dirty or precious pages back to
11918 * the memory manager.
11919 *
11920 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
11921 * is a hole in the region, and we would
11922 * have returned KERN_SUCCESS, return
11923 * KERN_INVALID_ADDRESS instead.
11924 *
11925 * NOTE
11926 * The memory object attributes have not yet been implemented, this
11927 * function will have to deal with the invalidate attribute
11928 *
11929 * RETURNS
11930 * KERN_INVALID_TASK Bad task parameter
11931 * KERN_INVALID_ARGUMENT both sync and async were specified.
11932 * KERN_SUCCESS The usual.
11933 * KERN_INVALID_ADDRESS There was a hole in the region.
11934 */
11935
11936 kern_return_t
11937 vm_map_msync(
11938 vm_map_t map,
11939 vm_map_address_t address,
11940 vm_map_size_t size,
11941 vm_sync_t sync_flags)
11942 {
11943 msync_req_t msr;
11944 msync_req_t new_msr;
11945 queue_chain_t req_q; /* queue of requests for this msync */
11946 vm_map_entry_t entry;
11947 vm_map_size_t amount_left;
11948 vm_object_offset_t offset;
11949 boolean_t do_sync_req;
11950 boolean_t had_hole = FALSE;
11951 memory_object_t pager;
11952
11953 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
11954 (sync_flags & VM_SYNC_SYNCHRONOUS))
11955 return(KERN_INVALID_ARGUMENT);
11956
11957 /*
11958 * align address and size on page boundaries
11959 */
11960 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
11961 address = vm_map_trunc_page(address);
11962
11963 if (map == VM_MAP_NULL)
11964 return(KERN_INVALID_TASK);
11965
11966 if (size == 0)
11967 return(KERN_SUCCESS);
11968
11969 queue_init(&req_q);
11970 amount_left = size;
11971
11972 while (amount_left > 0) {
11973 vm_object_size_t flush_size;
11974 vm_object_t object;
11975
11976 vm_map_lock(map);
11977 if (!vm_map_lookup_entry(map,
11978 vm_map_trunc_page(address), &entry)) {
11979
11980 vm_map_size_t skip;
11981
11982 /*
11983 * hole in the address map.
11984 */
11985 had_hole = TRUE;
11986
11987 /*
11988 * Check for empty map.
11989 */
11990 if (entry == vm_map_to_entry(map) &&
11991 entry->vme_next == entry) {
11992 vm_map_unlock(map);
11993 break;
11994 }
11995 /*
11996 * Check that we don't wrap and that
11997 * we have at least one real map entry.
11998 */
11999 if ((map->hdr.nentries == 0) ||
12000 (entry->vme_next->vme_start < address)) {
12001 vm_map_unlock(map);
12002 break;
12003 }
12004 /*
12005 * Move up to the next entry if needed
12006 */
12007 skip = (entry->vme_next->vme_start - address);
12008 if (skip >= amount_left)
12009 amount_left = 0;
12010 else
12011 amount_left -= skip;
12012 address = entry->vme_next->vme_start;
12013 vm_map_unlock(map);
12014 continue;
12015 }
12016
12017 offset = address - entry->vme_start;
12018
12019 /*
12020 * do we have more to flush than is contained in this
12021 * entry ?
12022 */
12023 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12024 flush_size = entry->vme_end -
12025 (entry->vme_start + offset);
12026 } else {
12027 flush_size = amount_left;
12028 }
12029 amount_left -= flush_size;
12030 address += flush_size;
12031
12032 if (entry->is_sub_map == TRUE) {
12033 vm_map_t local_map;
12034 vm_map_offset_t local_offset;
12035
12036 local_map = entry->object.sub_map;
12037 local_offset = entry->offset;
12038 vm_map_unlock(map);
12039 if (vm_map_msync(
12040 local_map,
12041 local_offset,
12042 flush_size,
12043 sync_flags) == KERN_INVALID_ADDRESS) {
12044 had_hole = TRUE;
12045 }
12046 continue;
12047 }
12048 object = entry->object.vm_object;
12049
12050 /*
12051 * We can't sync this object if the object has not been
12052 * created yet
12053 */
12054 if (object == VM_OBJECT_NULL) {
12055 vm_map_unlock(map);
12056 continue;
12057 }
12058 offset += entry->offset;
12059
12060 vm_object_lock(object);
12061
12062 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12063 int kill_pages = 0;
12064 boolean_t reusable_pages = FALSE;
12065
12066 if (sync_flags & VM_SYNC_KILLPAGES) {
12067 if (object->ref_count == 1 && !object->shadow)
12068 kill_pages = 1;
12069 else
12070 kill_pages = -1;
12071 }
12072 if (kill_pages != -1)
12073 vm_object_deactivate_pages(object, offset,
12074 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12075 vm_object_unlock(object);
12076 vm_map_unlock(map);
12077 continue;
12078 }
12079 /*
12080 * We can't sync this object if there isn't a pager.
12081 * Don't bother to sync internal objects, since there can't
12082 * be any "permanent" storage for these objects anyway.
12083 */
12084 if ((object->pager == MEMORY_OBJECT_NULL) ||
12085 (object->internal) || (object->private)) {
12086 vm_object_unlock(object);
12087 vm_map_unlock(map);
12088 continue;
12089 }
12090 /*
12091 * keep reference on the object until syncing is done
12092 */
12093 vm_object_reference_locked(object);
12094 vm_object_unlock(object);
12095
12096 vm_map_unlock(map);
12097
12098 do_sync_req = vm_object_sync(object,
12099 offset,
12100 flush_size,
12101 sync_flags & VM_SYNC_INVALIDATE,
12102 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12103 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12104 sync_flags & VM_SYNC_SYNCHRONOUS);
12105 /*
12106 * only send a m_o_s if we returned pages or if the entry
12107 * is writable (ie dirty pages may have already been sent back)
12108 */
12109 if (!do_sync_req) {
12110 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12111 /*
12112 * clear out the clustering and read-ahead hints
12113 */
12114 vm_object_lock(object);
12115
12116 object->pages_created = 0;
12117 object->pages_used = 0;
12118 object->sequential = 0;
12119 object->last_alloc = 0;
12120
12121 vm_object_unlock(object);
12122 }
12123 vm_object_deallocate(object);
12124 continue;
12125 }
12126 msync_req_alloc(new_msr);
12127
12128 vm_object_lock(object);
12129 offset += object->paging_offset;
12130
12131 new_msr->offset = offset;
12132 new_msr->length = flush_size;
12133 new_msr->object = object;
12134 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12135 re_iterate:
12136
12137 /*
12138 * We can't sync this object if there isn't a pager. The
12139 * pager can disappear anytime we're not holding the object
12140 * lock. So this has to be checked anytime we goto re_iterate.
12141 */
12142
12143 pager = object->pager;
12144
12145 if (pager == MEMORY_OBJECT_NULL) {
12146 vm_object_unlock(object);
12147 vm_object_deallocate(object);
12148 continue;
12149 }
12150
12151 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12152 /*
12153 * need to check for overlapping entry, if found, wait
12154 * on overlapping msr to be done, then reiterate
12155 */
12156 msr_lock(msr);
12157 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12158 ((offset >= msr->offset &&
12159 offset < (msr->offset + msr->length)) ||
12160 (msr->offset >= offset &&
12161 msr->offset < (offset + flush_size))))
12162 {
12163 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12164 msr_unlock(msr);
12165 vm_object_unlock(object);
12166 thread_block(THREAD_CONTINUE_NULL);
12167 vm_object_lock(object);
12168 goto re_iterate;
12169 }
12170 msr_unlock(msr);
12171 }/* queue_iterate */
12172
12173 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12174
12175 vm_object_paging_begin(object);
12176 vm_object_unlock(object);
12177
12178 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12179
12180 (void) memory_object_synchronize(
12181 pager,
12182 offset,
12183 flush_size,
12184 sync_flags & ~VM_SYNC_CONTIGUOUS);
12185
12186 vm_object_lock(object);
12187 vm_object_paging_end(object);
12188 vm_object_unlock(object);
12189 }/* while */
12190
12191 /*
12192 * wait for memory_object_sychronize_completed messages from pager(s)
12193 */
12194
12195 while (!queue_empty(&req_q)) {
12196 msr = (msync_req_t)queue_first(&req_q);
12197 msr_lock(msr);
12198 while(msr->flag != VM_MSYNC_DONE) {
12199 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12200 msr_unlock(msr);
12201 thread_block(THREAD_CONTINUE_NULL);
12202 msr_lock(msr);
12203 }/* while */
12204 queue_remove(&req_q, msr, msync_req_t, req_q);
12205 msr_unlock(msr);
12206 vm_object_deallocate(msr->object);
12207 msync_req_free(msr);
12208 }/* queue_iterate */
12209
12210 /* for proper msync() behaviour */
12211 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12212 return(KERN_INVALID_ADDRESS);
12213
12214 return(KERN_SUCCESS);
12215 }/* vm_msync */
12216
12217 /*
12218 * Routine: convert_port_entry_to_map
12219 * Purpose:
12220 * Convert from a port specifying an entry or a task
12221 * to a map. Doesn't consume the port ref; produces a map ref,
12222 * which may be null. Unlike convert_port_to_map, the
12223 * port may be task or a named entry backed.
12224 * Conditions:
12225 * Nothing locked.
12226 */
12227
12228
12229 vm_map_t
12230 convert_port_entry_to_map(
12231 ipc_port_t port)
12232 {
12233 vm_map_t map;
12234 vm_named_entry_t named_entry;
12235 uint32_t try_failed_count = 0;
12236
12237 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12238 while(TRUE) {
12239 ip_lock(port);
12240 if(ip_active(port) && (ip_kotype(port)
12241 == IKOT_NAMED_ENTRY)) {
12242 named_entry =
12243 (vm_named_entry_t)port->ip_kobject;
12244 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12245 ip_unlock(port);
12246
12247 try_failed_count++;
12248 mutex_pause(try_failed_count);
12249 continue;
12250 }
12251 named_entry->ref_count++;
12252 lck_mtx_unlock(&(named_entry)->Lock);
12253 ip_unlock(port);
12254 if ((named_entry->is_sub_map) &&
12255 (named_entry->protection
12256 & VM_PROT_WRITE)) {
12257 map = named_entry->backing.map;
12258 } else {
12259 mach_destroy_memory_entry(port);
12260 return VM_MAP_NULL;
12261 }
12262 vm_map_reference_swap(map);
12263 mach_destroy_memory_entry(port);
12264 break;
12265 }
12266 else
12267 return VM_MAP_NULL;
12268 }
12269 }
12270 else
12271 map = convert_port_to_map(port);
12272
12273 return map;
12274 }
12275
12276 /*
12277 * Routine: convert_port_entry_to_object
12278 * Purpose:
12279 * Convert from a port specifying a named entry to an
12280 * object. Doesn't consume the port ref; produces a map ref,
12281 * which may be null.
12282 * Conditions:
12283 * Nothing locked.
12284 */
12285
12286
12287 vm_object_t
12288 convert_port_entry_to_object(
12289 ipc_port_t port)
12290 {
12291 vm_object_t object;
12292 vm_named_entry_t named_entry;
12293 uint32_t try_failed_count = 0;
12294
12295 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12296 while(TRUE) {
12297 ip_lock(port);
12298 if(ip_active(port) && (ip_kotype(port)
12299 == IKOT_NAMED_ENTRY)) {
12300 named_entry =
12301 (vm_named_entry_t)port->ip_kobject;
12302 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12303 ip_unlock(port);
12304
12305 try_failed_count++;
12306 mutex_pause(try_failed_count);
12307 continue;
12308 }
12309 named_entry->ref_count++;
12310 lck_mtx_unlock(&(named_entry)->Lock);
12311 ip_unlock(port);
12312 if ((!named_entry->is_sub_map) &&
12313 (!named_entry->is_pager) &&
12314 (named_entry->protection
12315 & VM_PROT_WRITE)) {
12316 object = named_entry->backing.object;
12317 } else {
12318 mach_destroy_memory_entry(port);
12319 return (vm_object_t)NULL;
12320 }
12321 vm_object_reference(named_entry->backing.object);
12322 mach_destroy_memory_entry(port);
12323 break;
12324 }
12325 else
12326 return (vm_object_t)NULL;
12327 }
12328 } else {
12329 return (vm_object_t)NULL;
12330 }
12331
12332 return object;
12333 }
12334
12335 /*
12336 * Export routines to other components for the things we access locally through
12337 * macros.
12338 */
12339 #undef current_map
12340 vm_map_t
12341 current_map(void)
12342 {
12343 return (current_map_fast());
12344 }
12345
12346 /*
12347 * vm_map_reference:
12348 *
12349 * Most code internal to the osfmk will go through a
12350 * macro defining this. This is always here for the
12351 * use of other kernel components.
12352 */
12353 #undef vm_map_reference
12354 void
12355 vm_map_reference(
12356 register vm_map_t map)
12357 {
12358 if (map == VM_MAP_NULL)
12359 return;
12360
12361 lck_mtx_lock(&map->s_lock);
12362 #if TASK_SWAPPER
12363 assert(map->res_count > 0);
12364 assert(map->ref_count >= map->res_count);
12365 map->res_count++;
12366 #endif
12367 map->ref_count++;
12368 lck_mtx_unlock(&map->s_lock);
12369 }
12370
12371 /*
12372 * vm_map_deallocate:
12373 *
12374 * Removes a reference from the specified map,
12375 * destroying it if no references remain.
12376 * The map should not be locked.
12377 */
12378 void
12379 vm_map_deallocate(
12380 register vm_map_t map)
12381 {
12382 unsigned int ref;
12383
12384 if (map == VM_MAP_NULL)
12385 return;
12386
12387 lck_mtx_lock(&map->s_lock);
12388 ref = --map->ref_count;
12389 if (ref > 0) {
12390 vm_map_res_deallocate(map);
12391 lck_mtx_unlock(&map->s_lock);
12392 return;
12393 }
12394 assert(map->ref_count == 0);
12395 lck_mtx_unlock(&map->s_lock);
12396
12397 #if TASK_SWAPPER
12398 /*
12399 * The map residence count isn't decremented here because
12400 * the vm_map_delete below will traverse the entire map,
12401 * deleting entries, and the residence counts on objects
12402 * and sharing maps will go away then.
12403 */
12404 #endif
12405
12406 vm_map_destroy(map, VM_MAP_NO_FLAGS);
12407 }
12408
12409
12410 void
12411 vm_map_disable_NX(vm_map_t map)
12412 {
12413 if (map == NULL)
12414 return;
12415 if (map->pmap == NULL)
12416 return;
12417
12418 pmap_disable_NX(map->pmap);
12419 }
12420
12421 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12422 * more descriptive.
12423 */
12424 void
12425 vm_map_set_32bit(vm_map_t map)
12426 {
12427 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12428 }
12429
12430
12431 void
12432 vm_map_set_64bit(vm_map_t map)
12433 {
12434 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12435 }
12436
12437 vm_map_offset_t
12438 vm_compute_max_offset(unsigned is64)
12439 {
12440 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12441 }
12442
12443 boolean_t
12444 vm_map_is_64bit(
12445 vm_map_t map)
12446 {
12447 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12448 }
12449
12450 boolean_t
12451 vm_map_has_4GB_pagezero(
12452 vm_map_t map)
12453 {
12454 /*
12455 * XXX FBDP
12456 * We should lock the VM map (for read) here but we can get away
12457 * with it for now because there can't really be any race condition:
12458 * the VM map's min_offset is changed only when the VM map is created
12459 * and when the zero page is established (when the binary gets loaded),
12460 * and this routine gets called only when the task terminates and the
12461 * VM map is being torn down, and when a new map is created via
12462 * load_machfile()/execve().
12463 */
12464 return (map->min_offset >= 0x100000000ULL);
12465 }
12466
12467 void
12468 vm_map_set_4GB_pagezero(vm_map_t map)
12469 {
12470 #ifdef __i386__
12471 pmap_set_4GB_pagezero(map->pmap);
12472 #else
12473 #pragma unused(map)
12474 #endif
12475
12476 }
12477
12478 void
12479 vm_map_clear_4GB_pagezero(vm_map_t map)
12480 {
12481 #ifdef __i386__
12482 pmap_clear_4GB_pagezero(map->pmap);
12483 #else
12484 #pragma unused(map)
12485 #endif
12486 }
12487
12488 /*
12489 * Raise a VM map's minimum offset.
12490 * To strictly enforce "page zero" reservation.
12491 */
12492 kern_return_t
12493 vm_map_raise_min_offset(
12494 vm_map_t map,
12495 vm_map_offset_t new_min_offset)
12496 {
12497 vm_map_entry_t first_entry;
12498
12499 new_min_offset = vm_map_round_page(new_min_offset);
12500
12501 vm_map_lock(map);
12502
12503 if (new_min_offset < map->min_offset) {
12504 /*
12505 * Can't move min_offset backwards, as that would expose
12506 * a part of the address space that was previously, and for
12507 * possibly good reasons, inaccessible.
12508 */
12509 vm_map_unlock(map);
12510 return KERN_INVALID_ADDRESS;
12511 }
12512
12513 first_entry = vm_map_first_entry(map);
12514 if (first_entry != vm_map_to_entry(map) &&
12515 first_entry->vme_start < new_min_offset) {
12516 /*
12517 * Some memory was already allocated below the new
12518 * minimun offset. It's too late to change it now...
12519 */
12520 vm_map_unlock(map);
12521 return KERN_NO_SPACE;
12522 }
12523
12524 map->min_offset = new_min_offset;
12525
12526 vm_map_unlock(map);
12527
12528 return KERN_SUCCESS;
12529 }
12530
12531 /*
12532 * Set the limit on the maximum amount of user wired memory allowed for this map.
12533 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12534 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12535 * don't have to reach over to the BSD data structures.
12536 */
12537
12538 void
12539 vm_map_set_user_wire_limit(vm_map_t map,
12540 vm_size_t limit)
12541 {
12542 map->user_wire_limit = limit;
12543 }
12544
12545
12546 void vm_map_switch_protect(vm_map_t map,
12547 boolean_t val)
12548 {
12549 vm_map_lock(map);
12550 map->switch_protect=val;
12551 vm_map_unlock(map);
12552 }
12553
12554 /* Add (generate) code signature for memory range */
12555 #if CONFIG_DYNAMIC_CODE_SIGNING
12556 kern_return_t vm_map_sign(vm_map_t map,
12557 vm_map_offset_t start,
12558 vm_map_offset_t end)
12559 {
12560 vm_map_entry_t entry;
12561 vm_page_t m;
12562 vm_object_t object;
12563
12564 /*
12565 * Vet all the input parameters and current type and state of the
12566 * underlaying object. Return with an error if anything is amiss.
12567 */
12568 if (map == VM_MAP_NULL)
12569 return(KERN_INVALID_ARGUMENT);
12570
12571 vm_map_lock_read(map);
12572
12573 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12574 /*
12575 * Must pass a valid non-submap address.
12576 */
12577 vm_map_unlock_read(map);
12578 return(KERN_INVALID_ADDRESS);
12579 }
12580
12581 if((entry->vme_start > start) || (entry->vme_end < end)) {
12582 /*
12583 * Map entry doesn't cover the requested range. Not handling
12584 * this situation currently.
12585 */
12586 vm_map_unlock_read(map);
12587 return(KERN_INVALID_ARGUMENT);
12588 }
12589
12590 object = entry->object.vm_object;
12591 if (object == VM_OBJECT_NULL) {
12592 /*
12593 * Object must already be present or we can't sign.
12594 */
12595 vm_map_unlock_read(map);
12596 return KERN_INVALID_ARGUMENT;
12597 }
12598
12599 vm_object_lock(object);
12600 vm_map_unlock_read(map);
12601
12602 while(start < end) {
12603 uint32_t refmod;
12604
12605 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12606 if (m==VM_PAGE_NULL) {
12607 /* shoud we try to fault a page here? we can probably
12608 * demand it exists and is locked for this request */
12609 vm_object_unlock(object);
12610 return KERN_FAILURE;
12611 }
12612 /* deal with special page status */
12613 if (m->busy ||
12614 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12615 vm_object_unlock(object);
12616 return KERN_FAILURE;
12617 }
12618
12619 /* Page is OK... now "validate" it */
12620 /* This is the place where we'll call out to create a code
12621 * directory, later */
12622 m->cs_validated = TRUE;
12623
12624 /* The page is now "clean" for codesigning purposes. That means
12625 * we don't consider it as modified (wpmapped) anymore. But
12626 * we'll disconnect the page so we note any future modification
12627 * attempts. */
12628 m->wpmapped = FALSE;
12629 refmod = pmap_disconnect(m->phys_page);
12630
12631 /* Pull the dirty status from the pmap, since we cleared the
12632 * wpmapped bit */
12633 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
12634 m->dirty = TRUE;
12635 }
12636
12637 /* On to the next page */
12638 start += PAGE_SIZE;
12639 }
12640 vm_object_unlock(object);
12641
12642 return KERN_SUCCESS;
12643 }
12644 #endif