]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-792.25.20.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
0c530ab8 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
6601e61a 4 * @APPLE_LICENSE_HEADER_START@
1c79356b 5 *
6601e61a
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
8f6c56a5 11 *
6601e61a
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
6601e61a
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
8f6c56a5 19 *
6601e61a 20 * @APPLE_LICENSE_HEADER_END@
1c79356b
A
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: vm/vm_map.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 * Date: 1985
56 *
57 * Virtual memory mapping module.
58 */
59
1c79356b
A
60#include <task_swapper.h>
61#include <mach_assert.h>
91447636 62#include <libkern/OSAtomic.h>
1c79356b
A
63
64#include <mach/kern_return.h>
65#include <mach/port.h>
66#include <mach/vm_attributes.h>
67#include <mach/vm_param.h>
68#include <mach/vm_behavior.h>
55e303ae 69#include <mach/vm_statistics.h>
91447636 70#include <mach/memory_object.h>
0c530ab8 71#include <mach/mach_vm.h>
91447636
A
72#include <machine/cpu_capabilities.h>
73
1c79356b
A
74#include <kern/assert.h>
75#include <kern/counters.h>
91447636 76#include <kern/kalloc.h>
1c79356b 77#include <kern/zalloc.h>
91447636
A
78
79#include <vm/cpm.h>
1c79356b
A
80#include <vm/vm_init.h>
81#include <vm/vm_fault.h>
82#include <vm/vm_map.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/vm_kern.h>
86#include <ipc/ipc_port.h>
87#include <kern/sched_prim.h>
88#include <kern/misc_protos.h>
1c79356b 89#include <ddb/tr.h>
55e303ae 90#include <machine/db_machdep.h>
1c79356b
A
91#include <kern/xpr.h>
92
91447636
A
93#include <mach/vm_map_server.h>
94#include <mach/mach_host_server.h>
95#include <vm/vm_shared_memory_server.h>
96#include <vm/vm_protos.h> // for vm_map_commpage64 and vm_map_remove_compage64
97
98#ifdef ppc
99#include <ppc/mappings.h>
100#endif /* ppc */
101
102#include <vm/vm_protos.h>
103
1c79356b
A
104/* Internal prototypes
105 */
91447636
A
106
107static void vm_map_simplify_range(
108 vm_map_t map,
109 vm_map_offset_t start,
110 vm_map_offset_t end); /* forward */
111
112static boolean_t vm_map_range_check(
1c79356b 113 vm_map_t map,
91447636
A
114 vm_map_offset_t start,
115 vm_map_offset_t end,
1c79356b
A
116 vm_map_entry_t *entry);
117
91447636 118static vm_map_entry_t _vm_map_entry_create(
1c79356b
A
119 struct vm_map_header *map_header);
120
91447636 121static void _vm_map_entry_dispose(
1c79356b
A
122 struct vm_map_header *map_header,
123 vm_map_entry_t entry);
124
91447636 125static void vm_map_pmap_enter(
1c79356b 126 vm_map_t map,
91447636
A
127 vm_map_offset_t addr,
128 vm_map_offset_t end_addr,
1c79356b
A
129 vm_object_t object,
130 vm_object_offset_t offset,
131 vm_prot_t protection);
132
91447636
A
133static void _vm_map_clip_end(
134 struct vm_map_header *map_header,
135 vm_map_entry_t entry,
136 vm_map_offset_t end);
137
138static void _vm_map_clip_start(
1c79356b
A
139 struct vm_map_header *map_header,
140 vm_map_entry_t entry,
91447636 141 vm_map_offset_t start);
1c79356b 142
91447636 143static void vm_map_entry_delete(
1c79356b
A
144 vm_map_t map,
145 vm_map_entry_t entry);
146
91447636 147static kern_return_t vm_map_delete(
1c79356b 148 vm_map_t map,
91447636
A
149 vm_map_offset_t start,
150 vm_map_offset_t end,
151 int flags,
152 vm_map_t zap_map);
1c79356b 153
91447636 154static kern_return_t vm_map_copy_overwrite_unaligned(
1c79356b
A
155 vm_map_t dst_map,
156 vm_map_entry_t entry,
157 vm_map_copy_t copy,
91447636 158 vm_map_address_t start);
1c79356b 159
91447636 160static kern_return_t vm_map_copy_overwrite_aligned(
1c79356b
A
161 vm_map_t dst_map,
162 vm_map_entry_t tmp_entry,
163 vm_map_copy_t copy,
91447636 164 vm_map_offset_t start,
1c79356b
A
165 pmap_t pmap);
166
91447636 167static kern_return_t vm_map_copyin_kernel_buffer(
1c79356b 168 vm_map_t src_map,
91447636
A
169 vm_map_address_t src_addr,
170 vm_map_size_t len,
1c79356b
A
171 boolean_t src_destroy,
172 vm_map_copy_t *copy_result); /* OUT */
173
91447636 174static kern_return_t vm_map_copyout_kernel_buffer(
1c79356b 175 vm_map_t map,
91447636 176 vm_map_address_t *addr, /* IN/OUT */
1c79356b
A
177 vm_map_copy_t copy,
178 boolean_t overwrite);
179
91447636 180static void vm_map_fork_share(
1c79356b
A
181 vm_map_t old_map,
182 vm_map_entry_t old_entry,
183 vm_map_t new_map);
184
91447636 185static boolean_t vm_map_fork_copy(
1c79356b
A
186 vm_map_t old_map,
187 vm_map_entry_t *old_entry_p,
188 vm_map_t new_map);
189
0c530ab8 190void vm_map_region_top_walk(
1c79356b
A
191 vm_map_entry_t entry,
192 vm_region_top_info_t top);
193
0c530ab8 194void vm_map_region_walk(
91447636
A
195 vm_map_t map,
196 vm_map_offset_t va,
1c79356b 197 vm_map_entry_t entry,
1c79356b 198 vm_object_offset_t offset,
91447636
A
199 vm_object_size_t range,
200 vm_region_extended_info_t extended);
201
202static kern_return_t vm_map_wire_nested(
1c79356b 203 vm_map_t map,
91447636
A
204 vm_map_offset_t start,
205 vm_map_offset_t end,
206 vm_prot_t access_type,
207 boolean_t user_wire,
208 pmap_t map_pmap,
209 vm_map_offset_t pmap_addr);
210
211static kern_return_t vm_map_unwire_nested(
212 vm_map_t map,
213 vm_map_offset_t start,
214 vm_map_offset_t end,
215 boolean_t user_wire,
216 pmap_t map_pmap,
217 vm_map_offset_t pmap_addr);
218
219static kern_return_t vm_map_overwrite_submap_recurse(
220 vm_map_t dst_map,
221 vm_map_offset_t dst_addr,
222 vm_map_size_t dst_size);
223
224static kern_return_t vm_map_copy_overwrite_nested(
225 vm_map_t dst_map,
226 vm_map_offset_t dst_addr,
227 vm_map_copy_t copy,
228 boolean_t interruptible,
229 pmap_t pmap);
230
231static kern_return_t vm_map_remap_extract(
232 vm_map_t map,
233 vm_map_offset_t addr,
234 vm_map_size_t size,
235 boolean_t copy,
236 struct vm_map_header *map_header,
237 vm_prot_t *cur_protection,
238 vm_prot_t *max_protection,
239 vm_inherit_t inheritance,
240 boolean_t pageable);
241
242static kern_return_t vm_map_remap_range_allocate(
243 vm_map_t map,
244 vm_map_address_t *address,
245 vm_map_size_t size,
246 vm_map_offset_t mask,
247 boolean_t anywhere,
248 vm_map_entry_t *map_entry);
249
250static void vm_map_region_look_for_page(
251 vm_map_t map,
252 vm_map_offset_t va,
253 vm_object_t object,
254 vm_object_offset_t offset,
255 int max_refcnt,
256 int depth,
257 vm_region_extended_info_t extended);
258
259static int vm_map_region_count_obj_refs(
260 vm_map_entry_t entry,
261 vm_object_t object);
1c79356b
A
262
263/*
264 * Macros to copy a vm_map_entry. We must be careful to correctly
265 * manage the wired page count. vm_map_entry_copy() creates a new
266 * map entry to the same memory - the wired count in the new entry
267 * must be set to zero. vm_map_entry_copy_full() creates a new
268 * entry that is identical to the old entry. This preserves the
269 * wire count; it's used for map splitting and zone changing in
270 * vm_map_copyout.
271 */
272#define vm_map_entry_copy(NEW,OLD) \
273MACRO_BEGIN \
274 *(NEW) = *(OLD); \
275 (NEW)->is_shared = FALSE; \
276 (NEW)->needs_wakeup = FALSE; \
277 (NEW)->in_transition = FALSE; \
278 (NEW)->wired_count = 0; \
279 (NEW)->user_wired_count = 0; \
280MACRO_END
281
282#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
283
284/*
285 * Virtual memory maps provide for the mapping, protection,
286 * and sharing of virtual memory objects. In addition,
287 * this module provides for an efficient virtual copy of
288 * memory from one map to another.
289 *
290 * Synchronization is required prior to most operations.
291 *
292 * Maps consist of an ordered doubly-linked list of simple
293 * entries; a single hint is used to speed up lookups.
294 *
295 * Sharing maps have been deleted from this version of Mach.
296 * All shared objects are now mapped directly into the respective
297 * maps. This requires a change in the copy on write strategy;
298 * the asymmetric (delayed) strategy is used for shared temporary
299 * objects instead of the symmetric (shadow) strategy. All maps
300 * are now "top level" maps (either task map, kernel map or submap
301 * of the kernel map).
302 *
303 * Since portions of maps are specified by start/end addreses,
304 * which may not align with existing map entries, all
305 * routines merely "clip" entries to these start/end values.
306 * [That is, an entry is split into two, bordering at a
307 * start or end value.] Note that these clippings may not
308 * always be necessary (as the two resulting entries are then
309 * not changed); however, the clipping is done for convenience.
310 * No attempt is currently made to "glue back together" two
311 * abutting entries.
312 *
313 * The symmetric (shadow) copy strategy implements virtual copy
314 * by copying VM object references from one map to
315 * another, and then marking both regions as copy-on-write.
316 * It is important to note that only one writeable reference
317 * to a VM object region exists in any map when this strategy
318 * is used -- this means that shadow object creation can be
319 * delayed until a write operation occurs. The symmetric (delayed)
320 * strategy allows multiple maps to have writeable references to
321 * the same region of a vm object, and hence cannot delay creating
322 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
323 * Copying of permanent objects is completely different; see
324 * vm_object_copy_strategically() in vm_object.c.
325 */
326
91447636
A
327static zone_t vm_map_zone; /* zone for vm_map structures */
328static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
329static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
330static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
1c79356b
A
331
332
333/*
334 * Placeholder object for submap operations. This object is dropped
335 * into the range by a call to vm_map_find, and removed when
336 * vm_map_submap creates the submap.
337 */
338
339vm_object_t vm_submap_object;
340
341/*
342 * vm_map_init:
343 *
344 * Initialize the vm_map module. Must be called before
345 * any other vm_map routines.
346 *
347 * Map and entry structures are allocated from zones -- we must
348 * initialize those zones.
349 *
350 * There are three zones of interest:
351 *
352 * vm_map_zone: used to allocate maps.
353 * vm_map_entry_zone: used to allocate map entries.
354 * vm_map_kentry_zone: used to allocate map entries for the kernel.
355 *
356 * The kernel allocates map entries from a special zone that is initially
357 * "crammed" with memory. It would be difficult (perhaps impossible) for
358 * the kernel to allocate more memory to a entry zone when it became
359 * empty since the very act of allocating memory implies the creation
360 * of a new entry.
361 */
362
91447636
A
363static void *map_data;
364static vm_map_size_t map_data_size;
365static void *kentry_data;
366static vm_map_size_t kentry_data_size;
367static int kentry_count = 2048; /* to init kentry_data_size */
1c79356b 368
0b4e3aa0
A
369#define NO_COALESCE_LIMIT (1024 * 128)
370
1c79356b
A
371/*
372 * Threshold for aggressive (eager) page map entering for vm copyout
373 * operations. Any copyout larger will NOT be aggressively entered.
374 */
91447636 375static vm_map_size_t vm_map_aggressive_enter_max; /* set by bootstrap */
1c79356b 376
55e303ae
A
377/* Skip acquiring locks if we're in the midst of a kernel core dump */
378extern unsigned int not_in_kdp;
379
0c530ab8
A
380#ifdef __i386__
381kern_return_t
382vm_map_apple_protected(
383 vm_map_t map,
384 vm_map_offset_t start,
385 vm_map_offset_t end)
386{
387 boolean_t map_locked;
388 kern_return_t kr;
389 vm_map_entry_t map_entry;
390 memory_object_t protected_mem_obj;
391 vm_object_t protected_object;
392 vm_map_offset_t map_addr;
393
394 vm_map_lock_read(map);
395 map_locked = TRUE;
396
397 /* lookup the protected VM object */
398 if (!vm_map_lookup_entry(map,
399 start,
400 &map_entry) ||
401 map_entry->vme_end != end ||
402 map_entry->is_sub_map) {
403 /* that memory is not properly mapped */
404 kr = KERN_INVALID_ARGUMENT;
405 goto done;
406 }
407 protected_object = map_entry->object.vm_object;
408 if (protected_object == VM_OBJECT_NULL) {
409 /* there should be a VM object here at this point */
410 kr = KERN_INVALID_ARGUMENT;
411 goto done;
412 }
413
414 /*
415 * Lookup (and create if necessary) the protected memory object
416 * matching that VM object.
417 * If successful, this also grabs a reference on the memory object,
418 * to guarantee that it doesn't go away before we get a chance to map
419 * it.
420 */
421
422 protected_mem_obj = apple_protect_pager_setup(protected_object);
423 if (protected_mem_obj == NULL) {
424 kr = KERN_FAILURE;
425 goto done;
426 }
427
428 vm_map_unlock_read(map);
429 map_locked = FALSE;
430
431 /* map this memory object in place of the current one */
432 map_addr = start;
433 kr = mach_vm_map(map,
434 &map_addr,
435 end - start,
436 (mach_vm_offset_t) 0,
437 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
438 (ipc_port_t) protected_mem_obj,
439 map_entry->offset + (start - map_entry->vme_start),
440 TRUE,
441 map_entry->protection,
442 map_entry->max_protection,
443 map_entry->inheritance);
444 assert(map_addr == start);
445 if (kr == KERN_SUCCESS) {
446 /* let the pager know that this mem_obj is mapped */
447 apple_protect_pager_map(protected_mem_obj);
448 }
449 /*
450 * Release the reference obtained by apple_protect_pager_setup().
451 * The mapping (if it succeeded) is now holding a reference on the
452 * memory object.
453 */
454 memory_object_deallocate(protected_mem_obj);
455
456done:
457 if (map_locked) {
458 vm_map_unlock_read(map);
459 }
460 return kr;
461}
462#endif /* __i386__ */
463
464
1c79356b
A
465void
466vm_map_init(
467 void)
468{
91447636 469 vm_map_zone = zinit((vm_map_size_t) sizeof(struct vm_map), 40*1024,
1c79356b
A
470 PAGE_SIZE, "maps");
471
91447636 472 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
1c79356b
A
473 1024*1024, PAGE_SIZE*5,
474 "non-kernel map entries");
475
91447636 476 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
1c79356b
A
477 kentry_data_size, kentry_data_size,
478 "kernel map entries");
479
91447636 480 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
1c79356b
A
481 16*1024, PAGE_SIZE, "map copies");
482
483 /*
484 * Cram the map and kentry zones with initial data.
485 * Set kentry_zone non-collectible to aid zone_gc().
486 */
487 zone_change(vm_map_zone, Z_COLLECT, FALSE);
488 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
489 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
490 zcram(vm_map_zone, map_data, map_data_size);
491 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
492}
493
494void
495vm_map_steal_memory(
496 void)
497{
91447636 498 map_data_size = vm_map_round_page(10 * sizeof(struct vm_map));
1c79356b
A
499 map_data = pmap_steal_memory(map_data_size);
500
501#if 0
502 /*
503 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
504 * physical page (i.e. that beyond the kernel image and page tables)
505 * individually; we guess at most one entry per eight pages in the
506 * real world. This works out to roughly .1 of 1% of physical memory,
507 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
508 */
509#endif
510 kentry_count = pmap_free_pages() / 8;
511
512
513 kentry_data_size =
91447636 514 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
1c79356b
A
515 kentry_data = pmap_steal_memory(kentry_data_size);
516}
517
518/*
519 * vm_map_create:
520 *
521 * Creates and returns a new empty VM map with
522 * the given physical map structure, and having
523 * the given lower and upper address bounds.
524 */
525vm_map_t
526vm_map_create(
91447636
A
527 pmap_t pmap,
528 vm_map_offset_t min,
529 vm_map_offset_t max,
530 boolean_t pageable)
1c79356b
A
531{
532 register vm_map_t result;
533
534 result = (vm_map_t) zalloc(vm_map_zone);
535 if (result == VM_MAP_NULL)
536 panic("vm_map_create");
537
538 vm_map_first_entry(result) = vm_map_to_entry(result);
539 vm_map_last_entry(result) = vm_map_to_entry(result);
540 result->hdr.nentries = 0;
541 result->hdr.entries_pageable = pageable;
542
543 result->size = 0;
544 result->ref_count = 1;
545#if TASK_SWAPPER
546 result->res_count = 1;
547 result->sw_state = MAP_SW_IN;
548#endif /* TASK_SWAPPER */
549 result->pmap = pmap;
550 result->min_offset = min;
551 result->max_offset = max;
552 result->wiring_required = FALSE;
553 result->no_zero_fill = FALSE;
9bccf70c 554 result->mapped = FALSE;
1c79356b
A
555 result->wait_for_space = FALSE;
556 result->first_free = vm_map_to_entry(result);
557 result->hint = vm_map_to_entry(result);
558 vm_map_lock_init(result);
91447636 559 mutex_init(&result->s_lock, 0);
1c79356b
A
560
561 return(result);
562}
563
564/*
565 * vm_map_entry_create: [ internal use only ]
566 *
567 * Allocates a VM map entry for insertion in the
568 * given map (or map copy). No fields are filled.
569 */
570#define vm_map_entry_create(map) \
571 _vm_map_entry_create(&(map)->hdr)
572
573#define vm_map_copy_entry_create(copy) \
574 _vm_map_entry_create(&(copy)->cpy_hdr)
575
91447636 576static vm_map_entry_t
1c79356b
A
577_vm_map_entry_create(
578 register struct vm_map_header *map_header)
579{
580 register zone_t zone;
581 register vm_map_entry_t entry;
582
583 if (map_header->entries_pageable)
584 zone = vm_map_entry_zone;
585 else
586 zone = vm_map_kentry_zone;
587
588 entry = (vm_map_entry_t) zalloc(zone);
589 if (entry == VM_MAP_ENTRY_NULL)
590 panic("vm_map_entry_create");
591
592 return(entry);
593}
594
595/*
596 * vm_map_entry_dispose: [ internal use only ]
597 *
598 * Inverse of vm_map_entry_create.
599 */
600#define vm_map_entry_dispose(map, entry) \
601MACRO_BEGIN \
602 if((entry) == (map)->first_free) \
603 (map)->first_free = vm_map_to_entry(map); \
604 if((entry) == (map)->hint) \
605 (map)->hint = vm_map_to_entry(map); \
606 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
607MACRO_END
608
609#define vm_map_copy_entry_dispose(map, entry) \
610 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
611
91447636 612static void
1c79356b
A
613_vm_map_entry_dispose(
614 register struct vm_map_header *map_header,
615 register vm_map_entry_t entry)
616{
617 register zone_t zone;
618
619 if (map_header->entries_pageable)
620 zone = vm_map_entry_zone;
621 else
622 zone = vm_map_kentry_zone;
623
91447636 624 zfree(zone, entry);
1c79356b
A
625}
626
91447636
A
627#if MACH_ASSERT
628static boolean_t first_free_is_valid(vm_map_t map); /* forward */
629static boolean_t first_free_check = FALSE;
630static boolean_t
1c79356b
A
631first_free_is_valid(
632 vm_map_t map)
633{
634 vm_map_entry_t entry, next;
635
636 if (!first_free_check)
637 return TRUE;
638
639 entry = vm_map_to_entry(map);
640 next = entry->vme_next;
91447636
A
641 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
642 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
1c79356b
A
643 next != vm_map_to_entry(map))) {
644 entry = next;
645 next = entry->vme_next;
646 if (entry == vm_map_to_entry(map))
647 break;
648 }
649 if (map->first_free != entry) {
650 printf("Bad first_free for map 0x%x: 0x%x should be 0x%x\n",
651 map, map->first_free, entry);
652 return FALSE;
653 }
654 return TRUE;
655}
91447636 656#endif /* MACH_ASSERT */
1c79356b
A
657
658/*
659 * UPDATE_FIRST_FREE:
660 *
661 * Updates the map->first_free pointer to the
662 * entry immediately before the first hole in the map.
663 * The map should be locked.
664 */
665#define UPDATE_FIRST_FREE(map, new_first_free) \
666MACRO_BEGIN \
667 vm_map_t UFF_map; \
668 vm_map_entry_t UFF_first_free; \
669 vm_map_entry_t UFF_next_entry; \
670 UFF_map = (map); \
671 UFF_first_free = (new_first_free); \
672 UFF_next_entry = UFF_first_free->vme_next; \
91447636
A
673 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
674 vm_map_trunc_page(UFF_first_free->vme_end) || \
675 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
676 vm_map_trunc_page(UFF_first_free->vme_start) && \
1c79356b
A
677 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
678 UFF_first_free = UFF_next_entry; \
679 UFF_next_entry = UFF_first_free->vme_next; \
680 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
681 break; \
682 } \
683 UFF_map->first_free = UFF_first_free; \
684 assert(first_free_is_valid(UFF_map)); \
685MACRO_END
686
687/*
688 * vm_map_entry_{un,}link:
689 *
690 * Insert/remove entries from maps (or map copies).
691 */
692#define vm_map_entry_link(map, after_where, entry) \
693MACRO_BEGIN \
694 vm_map_t VMEL_map; \
695 vm_map_entry_t VMEL_entry; \
696 VMEL_map = (map); \
697 VMEL_entry = (entry); \
698 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
699 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
700MACRO_END
701
702
703#define vm_map_copy_entry_link(copy, after_where, entry) \
704 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
705
706#define _vm_map_entry_link(hdr, after_where, entry) \
707 MACRO_BEGIN \
708 (hdr)->nentries++; \
709 (entry)->vme_prev = (after_where); \
710 (entry)->vme_next = (after_where)->vme_next; \
711 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
712 MACRO_END
713
714#define vm_map_entry_unlink(map, entry) \
715MACRO_BEGIN \
716 vm_map_t VMEU_map; \
717 vm_map_entry_t VMEU_entry; \
718 vm_map_entry_t VMEU_first_free; \
719 VMEU_map = (map); \
720 VMEU_entry = (entry); \
721 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
722 VMEU_first_free = VMEU_entry->vme_prev; \
723 else \
724 VMEU_first_free = VMEU_map->first_free; \
725 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
726 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
727MACRO_END
728
729#define vm_map_copy_entry_unlink(copy, entry) \
730 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
731
732#define _vm_map_entry_unlink(hdr, entry) \
733 MACRO_BEGIN \
734 (hdr)->nentries--; \
735 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
736 (entry)->vme_prev->vme_next = (entry)->vme_next; \
737 MACRO_END
738
1c79356b 739#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
740/*
741 * vm_map_res_reference:
742 *
743 * Adds another valid residence count to the given map.
744 *
745 * Map is locked so this function can be called from
746 * vm_map_swapin.
747 *
748 */
749void vm_map_res_reference(register vm_map_t map)
750{
751 /* assert map is locked */
752 assert(map->res_count >= 0);
753 assert(map->ref_count >= map->res_count);
754 if (map->res_count == 0) {
755 mutex_unlock(&map->s_lock);
756 vm_map_lock(map);
757 vm_map_swapin(map);
758 mutex_lock(&map->s_lock);
759 ++map->res_count;
760 vm_map_unlock(map);
761 } else
762 ++map->res_count;
763}
764
765/*
766 * vm_map_reference_swap:
767 *
768 * Adds valid reference and residence counts to the given map.
769 *
770 * The map may not be in memory (i.e. zero residence count).
771 *
772 */
773void vm_map_reference_swap(register vm_map_t map)
774{
775 assert(map != VM_MAP_NULL);
776 mutex_lock(&map->s_lock);
777 assert(map->res_count >= 0);
778 assert(map->ref_count >= map->res_count);
779 map->ref_count++;
780 vm_map_res_reference(map);
781 mutex_unlock(&map->s_lock);
782}
783
784/*
785 * vm_map_res_deallocate:
786 *
787 * Decrement residence count on a map; possibly causing swapout.
788 *
789 * The map must be in memory (i.e. non-zero residence count).
790 *
791 * The map is locked, so this function is callable from vm_map_deallocate.
792 *
793 */
794void vm_map_res_deallocate(register vm_map_t map)
795{
796 assert(map->res_count > 0);
797 if (--map->res_count == 0) {
798 mutex_unlock(&map->s_lock);
799 vm_map_lock(map);
800 vm_map_swapout(map);
801 vm_map_unlock(map);
802 mutex_lock(&map->s_lock);
803 }
804 assert(map->ref_count >= map->res_count);
805}
806#endif /* MACH_ASSERT && TASK_SWAPPER */
807
1c79356b
A
808/*
809 * vm_map_destroy:
810 *
811 * Actually destroy a map.
812 */
813void
814vm_map_destroy(
815 register vm_map_t map)
91447636 816{
1c79356b
A
817 vm_map_lock(map);
818 (void) vm_map_delete(map, map->min_offset,
91447636
A
819 map->max_offset, VM_MAP_NO_FLAGS,
820 VM_MAP_NULL);
1c79356b 821 vm_map_unlock(map);
91447636 822
91447636 823 if (map->hdr.nentries!=0)
0c530ab8 824 vm_map_remove_commpage(map);
91447636 825
0c530ab8
A
826// assert(map->hdr.nentries==0);
827// if(map->hdr.nentries) { /* (BRINGUP) */
828// panic("vm_map_destroy: hdr.nentries is not 0 (%d) in map %08X\n", map->hdr.nentries, map);
829// }
1c79356b 830
55e303ae
A
831 if(map->pmap)
832 pmap_destroy(map->pmap);
1c79356b 833
91447636 834 zfree(vm_map_zone, map);
1c79356b
A
835}
836
837#if TASK_SWAPPER
838/*
839 * vm_map_swapin/vm_map_swapout
840 *
841 * Swap a map in and out, either referencing or releasing its resources.
842 * These functions are internal use only; however, they must be exported
843 * because they may be called from macros, which are exported.
844 *
845 * In the case of swapout, there could be races on the residence count,
846 * so if the residence count is up, we return, assuming that a
847 * vm_map_deallocate() call in the near future will bring us back.
848 *
849 * Locking:
850 * -- We use the map write lock for synchronization among races.
851 * -- The map write lock, and not the simple s_lock, protects the
852 * swap state of the map.
853 * -- If a map entry is a share map, then we hold both locks, in
854 * hierarchical order.
855 *
856 * Synchronization Notes:
857 * 1) If a vm_map_swapin() call happens while swapout in progress, it
858 * will block on the map lock and proceed when swapout is through.
859 * 2) A vm_map_reference() call at this time is illegal, and will
860 * cause a panic. vm_map_reference() is only allowed on resident
861 * maps, since it refuses to block.
862 * 3) A vm_map_swapin() call during a swapin will block, and
863 * proceeed when the first swapin is done, turning into a nop.
864 * This is the reason the res_count is not incremented until
865 * after the swapin is complete.
866 * 4) There is a timing hole after the checks of the res_count, before
867 * the map lock is taken, during which a swapin may get the lock
868 * before a swapout about to happen. If this happens, the swapin
869 * will detect the state and increment the reference count, causing
870 * the swapout to be a nop, thereby delaying it until a later
871 * vm_map_deallocate. If the swapout gets the lock first, then
872 * the swapin will simply block until the swapout is done, and
873 * then proceed.
874 *
875 * Because vm_map_swapin() is potentially an expensive operation, it
876 * should be used with caution.
877 *
878 * Invariants:
879 * 1) A map with a residence count of zero is either swapped, or
880 * being swapped.
881 * 2) A map with a non-zero residence count is either resident,
882 * or being swapped in.
883 */
884
885int vm_map_swap_enable = 1;
886
887void vm_map_swapin (vm_map_t map)
888{
889 register vm_map_entry_t entry;
890
891 if (!vm_map_swap_enable) /* debug */
892 return;
893
894 /*
895 * Map is locked
896 * First deal with various races.
897 */
898 if (map->sw_state == MAP_SW_IN)
899 /*
900 * we raced with swapout and won. Returning will incr.
901 * the res_count, turning the swapout into a nop.
902 */
903 return;
904
905 /*
906 * The residence count must be zero. If we raced with another
907 * swapin, the state would have been IN; if we raced with a
908 * swapout (after another competing swapin), we must have lost
909 * the race to get here (see above comment), in which case
910 * res_count is still 0.
911 */
912 assert(map->res_count == 0);
913
914 /*
915 * There are no intermediate states of a map going out or
916 * coming in, since the map is locked during the transition.
917 */
918 assert(map->sw_state == MAP_SW_OUT);
919
920 /*
921 * We now operate upon each map entry. If the entry is a sub-
922 * or share-map, we call vm_map_res_reference upon it.
923 * If the entry is an object, we call vm_object_res_reference
924 * (this may iterate through the shadow chain).
925 * Note that we hold the map locked the entire time,
926 * even if we get back here via a recursive call in
927 * vm_map_res_reference.
928 */
929 entry = vm_map_first_entry(map);
930
931 while (entry != vm_map_to_entry(map)) {
932 if (entry->object.vm_object != VM_OBJECT_NULL) {
933 if (entry->is_sub_map) {
934 vm_map_t lmap = entry->object.sub_map;
935 mutex_lock(&lmap->s_lock);
936 vm_map_res_reference(lmap);
937 mutex_unlock(&lmap->s_lock);
938 } else {
939 vm_object_t object = entry->object.vm_object;
940 vm_object_lock(object);
941 /*
942 * This call may iterate through the
943 * shadow chain.
944 */
945 vm_object_res_reference(object);
946 vm_object_unlock(object);
947 }
948 }
949 entry = entry->vme_next;
950 }
951 assert(map->sw_state == MAP_SW_OUT);
952 map->sw_state = MAP_SW_IN;
953}
954
955void vm_map_swapout(vm_map_t map)
956{
957 register vm_map_entry_t entry;
958
959 /*
960 * Map is locked
961 * First deal with various races.
962 * If we raced with a swapin and lost, the residence count
963 * will have been incremented to 1, and we simply return.
964 */
965 mutex_lock(&map->s_lock);
966 if (map->res_count != 0) {
967 mutex_unlock(&map->s_lock);
968 return;
969 }
970 mutex_unlock(&map->s_lock);
971
972 /*
973 * There are no intermediate states of a map going out or
974 * coming in, since the map is locked during the transition.
975 */
976 assert(map->sw_state == MAP_SW_IN);
977
978 if (!vm_map_swap_enable)
979 return;
980
981 /*
982 * We now operate upon each map entry. If the entry is a sub-
983 * or share-map, we call vm_map_res_deallocate upon it.
984 * If the entry is an object, we call vm_object_res_deallocate
985 * (this may iterate through the shadow chain).
986 * Note that we hold the map locked the entire time,
987 * even if we get back here via a recursive call in
988 * vm_map_res_deallocate.
989 */
990 entry = vm_map_first_entry(map);
991
992 while (entry != vm_map_to_entry(map)) {
993 if (entry->object.vm_object != VM_OBJECT_NULL) {
994 if (entry->is_sub_map) {
995 vm_map_t lmap = entry->object.sub_map;
996 mutex_lock(&lmap->s_lock);
997 vm_map_res_deallocate(lmap);
998 mutex_unlock(&lmap->s_lock);
999 } else {
1000 vm_object_t object = entry->object.vm_object;
1001 vm_object_lock(object);
1002 /*
1003 * This call may take a long time,
1004 * since it could actively push
1005 * out pages (if we implement it
1006 * that way).
1007 */
1008 vm_object_res_deallocate(object);
1009 vm_object_unlock(object);
1010 }
1011 }
1012 entry = entry->vme_next;
1013 }
1014 assert(map->sw_state == MAP_SW_IN);
1015 map->sw_state = MAP_SW_OUT;
1016}
1017
1018#endif /* TASK_SWAPPER */
1019
1020
1021/*
0c530ab8 1022 * SAVE_HINT_MAP_READ:
1c79356b
A
1023 *
1024 * Saves the specified entry as the hint for
0c530ab8
A
1025 * future lookups. only a read lock is held on map,
1026 * so make sure the store is atomic... OSCompareAndSwap
1027 * guarantees this... also, we don't care if we collide
1028 * and someone else wins and stores their 'hint'
1c79356b 1029 */
0c530ab8
A
1030#define SAVE_HINT_MAP_READ(map,value) \
1031MACRO_BEGIN \
1032 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1033MACRO_END
1034
1035
1036/*
1037 * SAVE_HINT_MAP_WRITE:
1038 *
1039 * Saves the specified entry as the hint for
1040 * future lookups. write lock held on map,
1041 * so no one else can be writing or looking
1042 * until the lock is dropped, so it's safe
1043 * to just do an assignment
1044 */
1045#define SAVE_HINT_MAP_WRITE(map,value) \
55e303ae 1046MACRO_BEGIN \
1c79356b 1047 (map)->hint = (value); \
55e303ae 1048MACRO_END
1c79356b
A
1049
1050/*
1051 * vm_map_lookup_entry: [ internal use only ]
1052 *
1053 * Finds the map entry containing (or
1054 * immediately preceding) the specified address
1055 * in the given map; the entry is returned
1056 * in the "entry" parameter. The boolean
1057 * result indicates whether the address is
1058 * actually contained in the map.
1059 */
1060boolean_t
1061vm_map_lookup_entry(
91447636
A
1062 register vm_map_t map,
1063 register vm_map_offset_t address,
1c79356b
A
1064 vm_map_entry_t *entry) /* OUT */
1065{
1066 register vm_map_entry_t cur;
1067 register vm_map_entry_t last;
1068
1069 /*
1070 * Start looking either from the head of the
1071 * list, or from the hint.
1072 */
1c79356b 1073 cur = map->hint;
1c79356b
A
1074
1075 if (cur == vm_map_to_entry(map))
1076 cur = cur->vme_next;
1077
1078 if (address >= cur->vme_start) {
1079 /*
1080 * Go from hint to end of list.
1081 *
1082 * But first, make a quick check to see if
1083 * we are already looking at the entry we
1084 * want (which is usually the case).
1085 * Note also that we don't need to save the hint
1086 * here... it is the same hint (unless we are
1087 * at the header, in which case the hint didn't
1088 * buy us anything anyway).
1089 */
1090 last = vm_map_to_entry(map);
1091 if ((cur != last) && (cur->vme_end > address)) {
1092 *entry = cur;
1093 return(TRUE);
1094 }
1095 }
1096 else {
1097 /*
1098 * Go from start to hint, *inclusively*
1099 */
1100 last = cur->vme_next;
1101 cur = vm_map_first_entry(map);
1102 }
1103
1104 /*
1105 * Search linearly
1106 */
1107
1108 while (cur != last) {
1109 if (cur->vme_end > address) {
1110 if (address >= cur->vme_start) {
1111 /*
1112 * Save this lookup for future
1113 * hints, and return
1114 */
1115
1116 *entry = cur;
0c530ab8
A
1117 SAVE_HINT_MAP_READ(map, cur);
1118
1c79356b
A
1119 return(TRUE);
1120 }
1121 break;
1122 }
1123 cur = cur->vme_next;
1124 }
1125 *entry = cur->vme_prev;
0c530ab8
A
1126 SAVE_HINT_MAP_READ(map, *entry);
1127
1c79356b
A
1128 return(FALSE);
1129}
1130
1131/*
1132 * Routine: vm_map_find_space
1133 * Purpose:
1134 * Allocate a range in the specified virtual address map,
1135 * returning the entry allocated for that range.
1136 * Used by kmem_alloc, etc.
1137 *
1138 * The map must be NOT be locked. It will be returned locked
1139 * on KERN_SUCCESS, unlocked on failure.
1140 *
1141 * If an entry is allocated, the object/offset fields
1142 * are initialized to zero.
1143 */
1144kern_return_t
1145vm_map_find_space(
1146 register vm_map_t map,
91447636
A
1147 vm_map_offset_t *address, /* OUT */
1148 vm_map_size_t size,
1149 vm_map_offset_t mask,
0c530ab8 1150 int flags,
1c79356b
A
1151 vm_map_entry_t *o_entry) /* OUT */
1152{
1153 register vm_map_entry_t entry, new_entry;
91447636
A
1154 register vm_map_offset_t start;
1155 register vm_map_offset_t end;
1156
1157 if (size == 0) {
1158 *address = 0;
1159 return KERN_INVALID_ARGUMENT;
1160 }
1c79356b
A
1161
1162 new_entry = vm_map_entry_create(map);
1163
1164 /*
1165 * Look for the first possible address; if there's already
1166 * something at this address, we have to start after it.
1167 */
1168
1169 vm_map_lock(map);
1170
1171 assert(first_free_is_valid(map));
1172 if ((entry = map->first_free) == vm_map_to_entry(map))
1173 start = map->min_offset;
1174 else
1175 start = entry->vme_end;
1176
1177 /*
1178 * In any case, the "entry" always precedes
1179 * the proposed new region throughout the loop:
1180 */
1181
1182 while (TRUE) {
1183 register vm_map_entry_t next;
1184
1185 /*
1186 * Find the end of the proposed new region.
1187 * Be sure we didn't go beyond the end, or
1188 * wrap around the address.
1189 */
1190
1191 end = ((start + mask) & ~mask);
1192 if (end < start) {
1193 vm_map_entry_dispose(map, new_entry);
1194 vm_map_unlock(map);
1195 return(KERN_NO_SPACE);
1196 }
1197 start = end;
1198 end += size;
1199
1200 if ((end > map->max_offset) || (end < start)) {
1201 vm_map_entry_dispose(map, new_entry);
1202 vm_map_unlock(map);
1203 return(KERN_NO_SPACE);
1204 }
1205
1206 /*
1207 * If there are no more entries, we must win.
1208 */
1209
1210 next = entry->vme_next;
1211 if (next == vm_map_to_entry(map))
1212 break;
1213
1214 /*
1215 * If there is another entry, it must be
1216 * after the end of the potential new region.
1217 */
1218
1219 if (next->vme_start >= end)
1220 break;
1221
1222 /*
1223 * Didn't fit -- move to the next entry.
1224 */
1225
1226 entry = next;
1227 start = entry->vme_end;
1228 }
1229
1230 /*
1231 * At this point,
1232 * "start" and "end" should define the endpoints of the
1233 * available new range, and
1234 * "entry" should refer to the region before the new
1235 * range, and
1236 *
1237 * the map should be locked.
1238 */
1239
1240 *address = start;
1241
1242 new_entry->vme_start = start;
1243 new_entry->vme_end = end;
1244 assert(page_aligned(new_entry->vme_start));
1245 assert(page_aligned(new_entry->vme_end));
1246
1247 new_entry->is_shared = FALSE;
1248 new_entry->is_sub_map = FALSE;
1249 new_entry->use_pmap = FALSE;
1250 new_entry->object.vm_object = VM_OBJECT_NULL;
1251 new_entry->offset = (vm_object_offset_t) 0;
1252
1253 new_entry->needs_copy = FALSE;
1254
1255 new_entry->inheritance = VM_INHERIT_DEFAULT;
1256 new_entry->protection = VM_PROT_DEFAULT;
1257 new_entry->max_protection = VM_PROT_ALL;
1258 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1259 new_entry->wired_count = 0;
1260 new_entry->user_wired_count = 0;
1261
1262 new_entry->in_transition = FALSE;
1263 new_entry->needs_wakeup = FALSE;
1264
0c530ab8
A
1265 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1266
1c79356b
A
1267 /*
1268 * Insert the new entry into the list
1269 */
1270
1271 vm_map_entry_link(map, entry, new_entry);
1272
1273 map->size += size;
1274
1275 /*
1276 * Update the lookup hint
1277 */
0c530ab8 1278 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1279
1280 *o_entry = new_entry;
1281 return(KERN_SUCCESS);
1282}
1283
1284int vm_map_pmap_enter_print = FALSE;
1285int vm_map_pmap_enter_enable = FALSE;
1286
1287/*
91447636 1288 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1289 *
1290 * Description:
1291 * Force pages from the specified object to be entered into
1292 * the pmap at the specified address if they are present.
1293 * As soon as a page not found in the object the scan ends.
1294 *
1295 * Returns:
1296 * Nothing.
1297 *
1298 * In/out conditions:
1299 * The source map should not be locked on entry.
1300 */
91447636 1301static void
1c79356b
A
1302vm_map_pmap_enter(
1303 vm_map_t map,
91447636
A
1304 register vm_map_offset_t addr,
1305 register vm_map_offset_t end_addr,
1c79356b
A
1306 register vm_object_t object,
1307 vm_object_offset_t offset,
1308 vm_prot_t protection)
1309{
9bccf70c 1310 unsigned int cache_attr;
0b4e3aa0 1311
55e303ae
A
1312 if(map->pmap == 0)
1313 return;
1314
1c79356b
A
1315 while (addr < end_addr) {
1316 register vm_page_t m;
1317
1318 vm_object_lock(object);
1319 vm_object_paging_begin(object);
1320
1321 m = vm_page_lookup(object, offset);
91447636
A
1322 /*
1323 * ENCRYPTED SWAP:
1324 * The user should never see encrypted data, so do not
1325 * enter an encrypted page in the page table.
1326 */
1327 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1c79356b
A
1328 (m->unusual && ( m->error || m->restart || m->absent ||
1329 protection & m->page_lock))) {
1330
1331 vm_object_paging_end(object);
1332 vm_object_unlock(object);
1333 return;
1334 }
1335
1336 assert(!m->fictitious); /* XXX is this possible ??? */
1337
1338 if (vm_map_pmap_enter_print) {
1339 printf("vm_map_pmap_enter:");
91447636
A
1340 printf("map: %x, addr: %llx, object: %x, offset: %llx\n",
1341 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1342 }
1c79356b 1343 m->busy = TRUE;
765c9de3
A
1344
1345 if (m->no_isync == TRUE) {
91447636 1346 pmap_sync_page_data_phys(m->phys_page);
765c9de3
A
1347 m->no_isync = FALSE;
1348 }
9bccf70c
A
1349
1350 cache_attr = ((unsigned int)object->wimg_bits) & VM_WIMG_MASK;
1c79356b
A
1351 vm_object_unlock(object);
1352
9bccf70c
A
1353 PMAP_ENTER(map->pmap, addr, m,
1354 protection, cache_attr, FALSE);
1c79356b
A
1355
1356 vm_object_lock(object);
0b4e3aa0 1357
1c79356b
A
1358 PAGE_WAKEUP_DONE(m);
1359 vm_page_lock_queues();
1360 if (!m->active && !m->inactive)
1361 vm_page_activate(m);
1362 vm_page_unlock_queues();
1363 vm_object_paging_end(object);
1364 vm_object_unlock(object);
1365
1366 offset += PAGE_SIZE_64;
1367 addr += PAGE_SIZE;
1368 }
1369}
1370
91447636
A
1371boolean_t vm_map_pmap_is_empty(
1372 vm_map_t map,
1373 vm_map_offset_t start,
1374 vm_map_offset_t end);
1375boolean_t vm_map_pmap_is_empty(
1376 vm_map_t map,
1377 vm_map_offset_t start,
1378 vm_map_offset_t end)
1379{
1380 vm_map_offset_t offset;
1381 ppnum_t phys_page;
1382
1383 if (map->pmap == NULL) {
1384 return TRUE;
1385 }
1386 for (offset = start;
1387 offset < end;
1388 offset += PAGE_SIZE) {
1389 phys_page = pmap_find_phys(map->pmap, offset);
1390 if (phys_page) {
1391 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1392 "page %d at 0x%llx\n",
1393 map, start, end, phys_page, offset);
1394 return FALSE;
1395 }
1396 }
1397 return TRUE;
1398}
1399
1c79356b
A
1400/*
1401 * Routine: vm_map_enter
1402 *
1403 * Description:
1404 * Allocate a range in the specified virtual address map.
1405 * The resulting range will refer to memory defined by
1406 * the given memory object and offset into that object.
1407 *
1408 * Arguments are as defined in the vm_map call.
1409 */
91447636
A
1410int _map_enter_debug = 0;
1411static unsigned int vm_map_enter_restore_successes = 0;
1412static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1413kern_return_t
1414vm_map_enter(
91447636
A
1415 vm_map_t map,
1416 vm_map_offset_t *address, /* IN/OUT */
1417 vm_map_size_t size,
1418 vm_map_offset_t mask,
1c79356b
A
1419 int flags,
1420 vm_object_t object,
1421 vm_object_offset_t offset,
1422 boolean_t needs_copy,
1423 vm_prot_t cur_protection,
1424 vm_prot_t max_protection,
1425 vm_inherit_t inheritance)
1426{
91447636
A
1427 vm_map_entry_t entry, new_entry;
1428 vm_map_offset_t start, tmp_start;
1429 vm_map_offset_t end, tmp_end;
1c79356b 1430 kern_return_t result = KERN_SUCCESS;
91447636
A
1431 vm_map_t zap_old_map = VM_MAP_NULL;
1432 vm_map_t zap_new_map = VM_MAP_NULL;
1433 boolean_t map_locked = FALSE;
1434 boolean_t pmap_empty = TRUE;
1435 boolean_t new_mapping_established = FALSE;
1436 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1437 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1438 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1c79356b
A
1439 char alias;
1440
91447636
A
1441 if (size == 0) {
1442 *address = 0;
1443 return KERN_INVALID_ARGUMENT;
1444 }
1445
1c79356b 1446 VM_GET_FLAGS_ALIAS(flags, alias);
0c530ab8 1447
1c79356b
A
1448#define RETURN(value) { result = value; goto BailOut; }
1449
1450 assert(page_aligned(*address));
1451 assert(page_aligned(size));
91447636
A
1452
1453 /*
1454 * Only zero-fill objects are allowed to be purgable.
1455 * LP64todo - limit purgable objects to 32-bits for now
1456 */
1457 if (purgable &&
1458 (offset != 0 ||
1459 (object != VM_OBJECT_NULL &&
1460 (object->size != size ||
1461 object->purgable == VM_OBJECT_NONPURGABLE))
1462 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1463 return KERN_INVALID_ARGUMENT;
1464
1465 if (!anywhere && overwrite) {
1466 /*
1467 * Create a temporary VM map to hold the old mappings in the
1468 * affected area while we create the new one.
1469 * This avoids releasing the VM map lock in
1470 * vm_map_entry_delete() and allows atomicity
1471 * when we want to replace some mappings with a new one.
1472 * It also allows us to restore the old VM mappings if the
1473 * new mapping fails.
1474 */
1475 zap_old_map = vm_map_create(PMAP_NULL,
1476 *address,
1477 *address + size,
1478 TRUE);
1479 }
1480
1c79356b
A
1481 StartAgain: ;
1482
1483 start = *address;
1484
1485 if (anywhere) {
1486 vm_map_lock(map);
91447636 1487 map_locked = TRUE;
1c79356b
A
1488
1489 /*
1490 * Calculate the first possible address.
1491 */
1492
1493 if (start < map->min_offset)
1494 start = map->min_offset;
1495 if (start > map->max_offset)
1496 RETURN(KERN_NO_SPACE);
1497
1498 /*
1499 * Look for the first possible address;
1500 * if there's already something at this
1501 * address, we have to start after it.
1502 */
1503
1504 assert(first_free_is_valid(map));
1505 if (start == map->min_offset) {
1506 if ((entry = map->first_free) != vm_map_to_entry(map))
1507 start = entry->vme_end;
1508 } else {
1509 vm_map_entry_t tmp_entry;
1510 if (vm_map_lookup_entry(map, start, &tmp_entry))
1511 start = tmp_entry->vme_end;
1512 entry = tmp_entry;
1513 }
1514
1515 /*
1516 * In any case, the "entry" always precedes
1517 * the proposed new region throughout the
1518 * loop:
1519 */
1520
1521 while (TRUE) {
1522 register vm_map_entry_t next;
1523
1524 /*
1525 * Find the end of the proposed new region.
1526 * Be sure we didn't go beyond the end, or
1527 * wrap around the address.
1528 */
1529
1530 end = ((start + mask) & ~mask);
1531 if (end < start)
1532 RETURN(KERN_NO_SPACE);
1533 start = end;
1534 end += size;
1535
1536 if ((end > map->max_offset) || (end < start)) {
1537 if (map->wait_for_space) {
1538 if (size <= (map->max_offset -
1539 map->min_offset)) {
1540 assert_wait((event_t)map,
1541 THREAD_ABORTSAFE);
1542 vm_map_unlock(map);
91447636
A
1543 map_locked = FALSE;
1544 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1545 goto StartAgain;
1546 }
1547 }
1548 RETURN(KERN_NO_SPACE);
1549 }
1550
1551 /*
1552 * If there are no more entries, we must win.
1553 */
1554
1555 next = entry->vme_next;
1556 if (next == vm_map_to_entry(map))
1557 break;
1558
1559 /*
1560 * If there is another entry, it must be
1561 * after the end of the potential new region.
1562 */
1563
1564 if (next->vme_start >= end)
1565 break;
1566
1567 /*
1568 * Didn't fit -- move to the next entry.
1569 */
1570
1571 entry = next;
1572 start = entry->vme_end;
1573 }
1574 *address = start;
1575 } else {
1576 vm_map_entry_t temp_entry;
1577
1578 /*
1579 * Verify that:
1580 * the address doesn't itself violate
1581 * the mask requirement.
1582 */
1583
1584 vm_map_lock(map);
91447636 1585 map_locked = TRUE;
1c79356b
A
1586 if ((start & mask) != 0)
1587 RETURN(KERN_NO_SPACE);
1588
1589 /*
1590 * ... the address is within bounds
1591 */
1592
1593 end = start + size;
1594
1595 if ((start < map->min_offset) ||
1596 (end > map->max_offset) ||
1597 (start >= end)) {
1598 RETURN(KERN_INVALID_ADDRESS);
1599 }
1600
91447636
A
1601 if (overwrite && zap_old_map != VM_MAP_NULL) {
1602 /*
1603 * Fixed mapping and "overwrite" flag: attempt to
1604 * remove all existing mappings in the specified
1605 * address range, saving them in our "zap_old_map".
1606 */
1607 (void) vm_map_delete(map, start, end,
1608 VM_MAP_REMOVE_SAVE_ENTRIES,
1609 zap_old_map);
1610 }
1611
1c79356b
A
1612 /*
1613 * ... the starting address isn't allocated
1614 */
1615
1616 if (vm_map_lookup_entry(map, start, &temp_entry))
1617 RETURN(KERN_NO_SPACE);
1618
1619 entry = temp_entry;
1620
1621 /*
1622 * ... the next region doesn't overlap the
1623 * end point.
1624 */
1625
1626 if ((entry->vme_next != vm_map_to_entry(map)) &&
1627 (entry->vme_next->vme_start < end))
1628 RETURN(KERN_NO_SPACE);
1629 }
1630
1631 /*
1632 * At this point,
1633 * "start" and "end" should define the endpoints of the
1634 * available new range, and
1635 * "entry" should refer to the region before the new
1636 * range, and
1637 *
1638 * the map should be locked.
1639 */
1640
1641 /*
1642 * See whether we can avoid creating a new entry (and object) by
1643 * extending one of our neighbors. [So far, we only attempt to
91447636
A
1644 * extend from below.] Note that we can never extend/join
1645 * purgable objects because they need to remain distinct
1646 * entities in order to implement their "volatile object"
1647 * semantics.
1c79356b
A
1648 */
1649
91447636
A
1650 if (purgable) {
1651 if (object == VM_OBJECT_NULL) {
1652 object = vm_object_allocate(size);
1653 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1654 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
1655 offset = (vm_object_offset_t)0;
1656 }
1657 } else if ((object == VM_OBJECT_NULL) &&
1c79356b
A
1658 (entry != vm_map_to_entry(map)) &&
1659 (entry->vme_end == start) &&
1660 (!entry->is_shared) &&
1661 (!entry->is_sub_map) &&
1662 (entry->alias == alias) &&
1663 (entry->inheritance == inheritance) &&
1664 (entry->protection == cur_protection) &&
1665 (entry->max_protection == max_protection) &&
1666 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1667 (entry->in_transition == 0) &&
55e303ae 1668 ((alias == VM_MEMORY_REALLOC) || ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1c79356b
A
1669 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1670 if (vm_object_coalesce(entry->object.vm_object,
1671 VM_OBJECT_NULL,
1672 entry->offset,
1673 (vm_object_offset_t) 0,
91447636
A
1674 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1675 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
1676
1677 /*
1678 * Coalesced the two objects - can extend
1679 * the previous map entry to include the
1680 * new range.
1681 */
1682 map->size += (end - entry->vme_end);
1683 entry->vme_end = end;
1684 UPDATE_FIRST_FREE(map, map->first_free);
1685 RETURN(KERN_SUCCESS);
1686 }
1687 }
1688
1689 /*
1690 * Create a new entry
91447636
A
1691 * LP64todo - for now, we can only allocate 4GB internal objects
1692 * because the default pager can't page bigger ones. Remove this
1693 * when it can.
0c530ab8
A
1694 *
1695 * XXX FBDP
1696 * The reserved "page zero" in each process's address space can
1697 * be arbitrarily large. Splitting it into separate 4GB objects and
1698 * therefore different VM map entries serves no purpose and just
1699 * slows down operations on the VM map, so let's not split the
1700 * allocation into 4GB chunks if the max protection is NONE. That
1701 * memory should never be accessible, so it will never get to the
1702 * default pager.
1c79356b 1703 */
91447636 1704 tmp_start = start;
0c530ab8
A
1705 if (object == VM_OBJECT_NULL &&
1706 size > (vm_map_size_t)VM_MAX_ADDRESS &&
1707 max_protection != VM_PROT_NONE)
91447636
A
1708 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1709 else
1710 tmp_end = end;
1711 do {
1712 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1713 object, offset, needs_copy, FALSE, FALSE,
1c79356b
A
1714 cur_protection, max_protection,
1715 VM_BEHAVIOR_DEFAULT, inheritance, 0);
91447636
A
1716 new_entry->alias = alias;
1717 entry = new_entry;
0c530ab8 1718 } while (tmp_end != end &&
91447636
A
1719 (tmp_start = tmp_end) &&
1720 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1721 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1722
1c79356b 1723 vm_map_unlock(map);
91447636
A
1724 map_locked = FALSE;
1725
1726 new_mapping_established = TRUE;
1c79356b
A
1727
1728 /* Wire down the new entry if the user
1729 * requested all new map entries be wired.
1730 */
1731 if (map->wiring_required) {
91447636 1732 pmap_empty = FALSE; /* pmap won't be empty */
1c79356b 1733 result = vm_map_wire(map, start, end,
91447636
A
1734 new_entry->protection, TRUE);
1735 RETURN(result);
1c79356b
A
1736 }
1737
1738 if ((object != VM_OBJECT_NULL) &&
1739 (vm_map_pmap_enter_enable) &&
1740 (!anywhere) &&
1741 (!needs_copy) &&
1742 (size < (128*1024))) {
91447636 1743 pmap_empty = FALSE; /* pmap won't be empty */
0c530ab8
A
1744
1745#ifdef STACK_ONLY_NX
1746 if (alias != VM_MEMORY_STACK && cur_protection)
1747 cur_protection |= VM_PROT_EXECUTE;
1748#endif
1c79356b
A
1749 vm_map_pmap_enter(map, start, end,
1750 object, offset, cur_protection);
1751 }
1752
1c79356b 1753 BailOut: ;
91447636
A
1754 if (result == KERN_SUCCESS &&
1755 pmap_empty &&
1756 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
1757 assert(vm_map_pmap_is_empty(map, *address, *address+size));
1758 }
1759
1760 if (result != KERN_SUCCESS) {
1761 if (new_mapping_established) {
1762 /*
1763 * We have to get rid of the new mappings since we
1764 * won't make them available to the user.
1765 * Try and do that atomically, to minimize the risk
1766 * that someone else create new mappings that range.
1767 */
1768 zap_new_map = vm_map_create(PMAP_NULL,
1769 *address,
1770 *address + size,
1771 TRUE);
1772 if (!map_locked) {
1773 vm_map_lock(map);
1774 map_locked = TRUE;
1775 }
1776 (void) vm_map_delete(map, *address, *address+size,
1777 VM_MAP_REMOVE_SAVE_ENTRIES,
1778 zap_new_map);
1779 }
1780 if (zap_old_map != VM_MAP_NULL &&
1781 zap_old_map->hdr.nentries != 0) {
1782 vm_map_entry_t entry1, entry2;
1783
1784 /*
1785 * The new mapping failed. Attempt to restore
1786 * the old mappings, saved in the "zap_old_map".
1787 */
1788 if (!map_locked) {
1789 vm_map_lock(map);
1790 map_locked = TRUE;
1791 }
1792
1793 /* first check if the coast is still clear */
1794 start = vm_map_first_entry(zap_old_map)->vme_start;
1795 end = vm_map_last_entry(zap_old_map)->vme_end;
1796 if (vm_map_lookup_entry(map, start, &entry1) ||
1797 vm_map_lookup_entry(map, end, &entry2) ||
1798 entry1 != entry2) {
1799 /*
1800 * Part of that range has already been
1801 * re-mapped: we can't restore the old
1802 * mappings...
1803 */
1804 vm_map_enter_restore_failures++;
1805 } else {
1806 /*
1807 * Transfer the saved map entries from
1808 * "zap_old_map" to the original "map",
1809 * inserting them all after "entry1".
1810 */
1811 for (entry2 = vm_map_first_entry(zap_old_map);
1812 entry2 != vm_map_to_entry(zap_old_map);
1813 entry2 = vm_map_first_entry(zap_old_map)) {
1814 vm_map_entry_unlink(zap_old_map,
1815 entry2);
1816 vm_map_entry_link(map, entry1, entry2);
1817 entry1 = entry2;
1818 }
1819 if (map->wiring_required) {
1820 /*
1821 * XXX TODO: we should rewire the
1822 * old pages here...
1823 */
1824 }
1825 vm_map_enter_restore_successes++;
1826 }
1827 }
1828 }
1829
1830 if (map_locked) {
1831 vm_map_unlock(map);
1832 }
1833
1834 /*
1835 * Get rid of the "zap_maps" and all the map entries that
1836 * they may still contain.
1837 */
1838 if (zap_old_map != VM_MAP_NULL) {
1839 vm_map_destroy(zap_old_map);
1840 zap_old_map = VM_MAP_NULL;
1841 }
1842 if (zap_new_map != VM_MAP_NULL) {
1843 vm_map_destroy(zap_new_map);
1844 zap_new_map = VM_MAP_NULL;
1845 }
1846
1847 return result;
1c79356b
A
1848
1849#undef RETURN
1850}
1851
91447636
A
1852
1853#if VM_CPM
1854
1855#ifdef MACH_ASSERT
0c530ab8 1856extern pmap_paddr_t avail_start, avail_end;
91447636
A
1857#endif
1858
1859/*
1860 * Allocate memory in the specified map, with the caveat that
1861 * the memory is physically contiguous. This call may fail
1862 * if the system can't find sufficient contiguous memory.
1863 * This call may cause or lead to heart-stopping amounts of
1864 * paging activity.
1865 *
1866 * Memory obtained from this call should be freed in the
1867 * normal way, viz., via vm_deallocate.
1868 */
1869kern_return_t
1870vm_map_enter_cpm(
1871 vm_map_t map,
1872 vm_map_offset_t *addr,
1873 vm_map_size_t size,
1874 int flags)
1875{
1876 vm_object_t cpm_obj;
1877 pmap_t pmap;
1878 vm_page_t m, pages;
1879 kern_return_t kr;
1880 vm_map_offset_t va, start, end, offset;
1881#if MACH_ASSERT
1882 vm_map_offset_t prev_addr;
1883#endif /* MACH_ASSERT */
1884
1885 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
1886
1887 if (!vm_allocate_cpm_enabled)
1888 return KERN_FAILURE;
1889
1890 if (size == 0) {
1891 *addr = 0;
1892 return KERN_SUCCESS;
1893 }
1894
1895 if (anywhere)
1896 *addr = vm_map_min(map);
1897 else
1898 *addr = vm_map_trunc_page(*addr);
1899 size = vm_map_round_page(size);
1900
1901 /*
1902 * LP64todo - cpm_allocate should probably allow
1903 * allocations of >4GB, but not with the current
1904 * algorithm, so just cast down the size for now.
1905 */
1906 if (size > VM_MAX_ADDRESS)
1907 return KERN_RESOURCE_SHORTAGE;
1908 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
1909 &pages, TRUE)) != KERN_SUCCESS)
1910 return kr;
1911
1912 cpm_obj = vm_object_allocate((vm_object_size_t)size);
1913 assert(cpm_obj != VM_OBJECT_NULL);
1914 assert(cpm_obj->internal);
1915 assert(cpm_obj->size == (vm_object_size_t)size);
1916 assert(cpm_obj->can_persist == FALSE);
1917 assert(cpm_obj->pager_created == FALSE);
1918 assert(cpm_obj->pageout == FALSE);
1919 assert(cpm_obj->shadow == VM_OBJECT_NULL);
1920
1921 /*
1922 * Insert pages into object.
1923 */
1924
1925 vm_object_lock(cpm_obj);
1926 for (offset = 0; offset < size; offset += PAGE_SIZE) {
1927 m = pages;
1928 pages = NEXT_PAGE(m);
0c530ab8 1929 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
1930
1931 assert(!m->gobbled);
1932 assert(!m->wanted);
1933 assert(!m->pageout);
1934 assert(!m->tabled);
1935 /*
1936 * ENCRYPTED SWAP:
1937 * "m" is not supposed to be pageable, so it
1938 * should not be encrypted. It wouldn't be safe
1939 * to enter it in a new VM object while encrypted.
1940 */
1941 ASSERT_PAGE_DECRYPTED(m);
1942 assert(m->busy);
0c530ab8 1943 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
91447636
A
1944
1945 m->busy = FALSE;
1946 vm_page_insert(m, cpm_obj, offset);
1947 }
1948 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
1949 vm_object_unlock(cpm_obj);
1950
1951 /*
1952 * Hang onto a reference on the object in case a
1953 * multi-threaded application for some reason decides
1954 * to deallocate the portion of the address space into
1955 * which we will insert this object.
1956 *
1957 * Unfortunately, we must insert the object now before
1958 * we can talk to the pmap module about which addresses
1959 * must be wired down. Hence, the race with a multi-
1960 * threaded app.
1961 */
1962 vm_object_reference(cpm_obj);
1963
1964 /*
1965 * Insert object into map.
1966 */
1967
1968 kr = vm_map_enter(
1969 map,
1970 addr,
1971 size,
1972 (vm_map_offset_t)0,
1973 flags,
1974 cpm_obj,
1975 (vm_object_offset_t)0,
1976 FALSE,
1977 VM_PROT_ALL,
1978 VM_PROT_ALL,
1979 VM_INHERIT_DEFAULT);
1980
1981 if (kr != KERN_SUCCESS) {
1982 /*
1983 * A CPM object doesn't have can_persist set,
1984 * so all we have to do is deallocate it to
1985 * free up these pages.
1986 */
1987 assert(cpm_obj->pager_created == FALSE);
1988 assert(cpm_obj->can_persist == FALSE);
1989 assert(cpm_obj->pageout == FALSE);
1990 assert(cpm_obj->shadow == VM_OBJECT_NULL);
1991 vm_object_deallocate(cpm_obj); /* kill acquired ref */
1992 vm_object_deallocate(cpm_obj); /* kill creation ref */
1993 }
1994
1995 /*
1996 * Inform the physical mapping system that the
1997 * range of addresses may not fault, so that
1998 * page tables and such can be locked down as well.
1999 */
2000 start = *addr;
2001 end = start + size;
2002 pmap = vm_map_pmap(map);
2003 pmap_pageable(pmap, start, end, FALSE);
2004
2005 /*
2006 * Enter each page into the pmap, to avoid faults.
2007 * Note that this loop could be coded more efficiently,
2008 * if the need arose, rather than looking up each page
2009 * again.
2010 */
2011 for (offset = 0, va = start; offset < size;
2012 va += PAGE_SIZE, offset += PAGE_SIZE) {
2013 vm_object_lock(cpm_obj);
2014 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2015 vm_object_unlock(cpm_obj);
2016 assert(m != VM_PAGE_NULL);
2017 PMAP_ENTER(pmap, va, m, VM_PROT_ALL,
2018 ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK,
2019 TRUE);
2020 }
2021
2022#if MACH_ASSERT
2023 /*
2024 * Verify ordering in address space.
2025 */
2026 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2027 vm_object_lock(cpm_obj);
2028 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2029 vm_object_unlock(cpm_obj);
2030 if (m == VM_PAGE_NULL)
2031 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2032 cpm_obj, offset);
2033 assert(m->tabled);
2034 assert(!m->busy);
2035 assert(!m->wanted);
2036 assert(!m->fictitious);
2037 assert(!m->private);
2038 assert(!m->absent);
2039 assert(!m->error);
2040 assert(!m->cleaning);
2041 assert(!m->precious);
2042 assert(!m->clustered);
2043 if (offset != 0) {
2044 if (m->phys_page != prev_addr + 1) {
2045 printf("start 0x%x end 0x%x va 0x%x\n",
2046 start, end, va);
2047 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2048 printf("m 0x%x prev_address 0x%x\n", m,
2049 prev_addr);
2050 panic("vm_allocate_cpm: pages not contig!");
2051 }
2052 }
2053 prev_addr = m->phys_page;
2054 }
2055#endif /* MACH_ASSERT */
2056
2057 vm_object_deallocate(cpm_obj); /* kill extra ref */
2058
2059 return kr;
2060}
2061
2062
2063#else /* VM_CPM */
2064
2065/*
2066 * Interface is defined in all cases, but unless the kernel
2067 * is built explicitly for this option, the interface does
2068 * nothing.
2069 */
2070
2071kern_return_t
2072vm_map_enter_cpm(
2073 __unused vm_map_t map,
2074 __unused vm_map_offset_t *addr,
2075 __unused vm_map_size_t size,
2076 __unused int flags)
2077{
2078 return KERN_FAILURE;
2079}
2080#endif /* VM_CPM */
2081
1c79356b
A
2082/*
2083 * vm_map_clip_start: [ internal use only ]
2084 *
2085 * Asserts that the given entry begins at or after
2086 * the specified address; if necessary,
2087 * it splits the entry into two.
2088 */
0c530ab8 2089#ifndef NO_NESTED_PMAP
1c79356b
A
2090#define vm_map_clip_start(map, entry, startaddr) \
2091MACRO_BEGIN \
2092 vm_map_t VMCS_map; \
2093 vm_map_entry_t VMCS_entry; \
91447636 2094 vm_map_offset_t VMCS_startaddr; \
1c79356b
A
2095 VMCS_map = (map); \
2096 VMCS_entry = (entry); \
2097 VMCS_startaddr = (startaddr); \
2098 if (VMCS_startaddr > VMCS_entry->vme_start) { \
2099 if(entry->use_pmap) { \
91447636 2100 vm_map_offset_t pmap_base_addr; \
1c79356b
A
2101 \
2102 pmap_base_addr = 0xF0000000 & entry->vme_start; \
55e303ae 2103 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \
1c79356b 2104 entry->use_pmap = FALSE; \
9bccf70c
A
2105 } else if(entry->object.vm_object \
2106 && !entry->is_sub_map \
2107 && entry->object.vm_object->phys_contiguous) { \
2108 pmap_remove(map->pmap, \
55e303ae
A
2109 (addr64_t)(entry->vme_start), \
2110 (addr64_t)(entry->vme_end)); \
1c79356b
A
2111 } \
2112 _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
2113 } \
2114 UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
2115MACRO_END
0c530ab8 2116#else /* NO_NESTED_PMAP */
1c79356b
A
2117#define vm_map_clip_start(map, entry, startaddr) \
2118MACRO_BEGIN \
2119 vm_map_t VMCS_map; \
2120 vm_map_entry_t VMCS_entry; \
91447636 2121 vm_map_offset_t VMCS_startaddr; \
1c79356b
A
2122 VMCS_map = (map); \
2123 VMCS_entry = (entry); \
2124 VMCS_startaddr = (startaddr); \
2125 if (VMCS_startaddr > VMCS_entry->vme_start) { \
2126 _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
2127 } \
2128 UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
2129MACRO_END
0c530ab8 2130#endif /* NO_NESTED_PMAP */
1c79356b
A
2131
2132#define vm_map_copy_clip_start(copy, entry, startaddr) \
2133 MACRO_BEGIN \
2134 if ((startaddr) > (entry)->vme_start) \
2135 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2136 MACRO_END
2137
2138/*
2139 * This routine is called only when it is known that
2140 * the entry must be split.
2141 */
91447636 2142static void
1c79356b
A
2143_vm_map_clip_start(
2144 register struct vm_map_header *map_header,
2145 register vm_map_entry_t entry,
91447636 2146 register vm_map_offset_t start)
1c79356b
A
2147{
2148 register vm_map_entry_t new_entry;
2149
2150 /*
2151 * Split off the front portion --
2152 * note that we must insert the new
2153 * entry BEFORE this one, so that
2154 * this entry has the specified starting
2155 * address.
2156 */
2157
2158 new_entry = _vm_map_entry_create(map_header);
2159 vm_map_entry_copy_full(new_entry, entry);
2160
2161 new_entry->vme_end = start;
2162 entry->offset += (start - entry->vme_start);
2163 entry->vme_start = start;
2164
2165 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2166
2167 if (entry->is_sub_map)
2168 vm_map_reference(new_entry->object.sub_map);
2169 else
2170 vm_object_reference(new_entry->object.vm_object);
2171}
2172
2173
2174/*
2175 * vm_map_clip_end: [ internal use only ]
2176 *
2177 * Asserts that the given entry ends at or before
2178 * the specified address; if necessary,
2179 * it splits the entry into two.
2180 */
0c530ab8 2181#ifndef NO_NESTED_PMAP
1c79356b
A
2182#define vm_map_clip_end(map, entry, endaddr) \
2183MACRO_BEGIN \
2184 vm_map_t VMCE_map; \
2185 vm_map_entry_t VMCE_entry; \
91447636 2186 vm_map_offset_t VMCE_endaddr; \
1c79356b
A
2187 VMCE_map = (map); \
2188 VMCE_entry = (entry); \
2189 VMCE_endaddr = (endaddr); \
2190 if (VMCE_endaddr < VMCE_entry->vme_end) { \
2191 if(entry->use_pmap) { \
91447636 2192 vm_map_offset_t pmap_base_addr; \
1c79356b
A
2193 \
2194 pmap_base_addr = 0xF0000000 & entry->vme_start; \
55e303ae 2195 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \
1c79356b 2196 entry->use_pmap = FALSE; \
9bccf70c
A
2197 } else if(entry->object.vm_object \
2198 && !entry->is_sub_map \
2199 && entry->object.vm_object->phys_contiguous) { \
2200 pmap_remove(map->pmap, \
55e303ae
A
2201 (addr64_t)(entry->vme_start), \
2202 (addr64_t)(entry->vme_end)); \
1c79356b
A
2203 } \
2204 _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
2205 } \
2206 UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
2207MACRO_END
0c530ab8 2208#else /* NO_NESTED_PMAP */
1c79356b
A
2209#define vm_map_clip_end(map, entry, endaddr) \
2210MACRO_BEGIN \
2211 vm_map_t VMCE_map; \
2212 vm_map_entry_t VMCE_entry; \
91447636 2213 vm_map_offset_t VMCE_endaddr; \
1c79356b
A
2214 VMCE_map = (map); \
2215 VMCE_entry = (entry); \
2216 VMCE_endaddr = (endaddr); \
2217 if (VMCE_endaddr < VMCE_entry->vme_end) { \
2218 _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
2219 } \
2220 UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
2221MACRO_END
0c530ab8
A
2222#endif /* NO_NESTED_PMAP */
2223
1c79356b
A
2224
2225#define vm_map_copy_clip_end(copy, entry, endaddr) \
2226 MACRO_BEGIN \
2227 if ((endaddr) < (entry)->vme_end) \
2228 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2229 MACRO_END
2230
2231/*
2232 * This routine is called only when it is known that
2233 * the entry must be split.
2234 */
91447636 2235static void
1c79356b
A
2236_vm_map_clip_end(
2237 register struct vm_map_header *map_header,
2238 register vm_map_entry_t entry,
91447636 2239 register vm_map_offset_t end)
1c79356b
A
2240{
2241 register vm_map_entry_t new_entry;
2242
2243 /*
2244 * Create a new entry and insert it
2245 * AFTER the specified entry
2246 */
2247
2248 new_entry = _vm_map_entry_create(map_header);
2249 vm_map_entry_copy_full(new_entry, entry);
2250
2251 new_entry->vme_start = entry->vme_end = end;
2252 new_entry->offset += (end - entry->vme_start);
2253
2254 _vm_map_entry_link(map_header, entry, new_entry);
2255
2256 if (entry->is_sub_map)
2257 vm_map_reference(new_entry->object.sub_map);
2258 else
2259 vm_object_reference(new_entry->object.vm_object);
2260}
2261
2262
2263/*
2264 * VM_MAP_RANGE_CHECK: [ internal use only ]
2265 *
2266 * Asserts that the starting and ending region
2267 * addresses fall within the valid range of the map.
2268 */
2269#define VM_MAP_RANGE_CHECK(map, start, end) \
2270 { \
2271 if (start < vm_map_min(map)) \
2272 start = vm_map_min(map); \
2273 if (end > vm_map_max(map)) \
2274 end = vm_map_max(map); \
2275 if (start > end) \
2276 start = end; \
2277 }
2278
2279/*
2280 * vm_map_range_check: [ internal use only ]
2281 *
2282 * Check that the region defined by the specified start and
2283 * end addresses are wholly contained within a single map
2284 * entry or set of adjacent map entries of the spacified map,
2285 * i.e. the specified region contains no unmapped space.
2286 * If any or all of the region is unmapped, FALSE is returned.
2287 * Otherwise, TRUE is returned and if the output argument 'entry'
2288 * is not NULL it points to the map entry containing the start
2289 * of the region.
2290 *
2291 * The map is locked for reading on entry and is left locked.
2292 */
91447636 2293static boolean_t
1c79356b
A
2294vm_map_range_check(
2295 register vm_map_t map,
91447636
A
2296 register vm_map_offset_t start,
2297 register vm_map_offset_t end,
1c79356b
A
2298 vm_map_entry_t *entry)
2299{
2300 vm_map_entry_t cur;
91447636 2301 register vm_map_offset_t prev;
1c79356b
A
2302
2303 /*
2304 * Basic sanity checks first
2305 */
2306 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
2307 return (FALSE);
2308
2309 /*
2310 * Check first if the region starts within a valid
2311 * mapping for the map.
2312 */
2313 if (!vm_map_lookup_entry(map, start, &cur))
2314 return (FALSE);
2315
2316 /*
2317 * Optimize for the case that the region is contained
2318 * in a single map entry.
2319 */
2320 if (entry != (vm_map_entry_t *) NULL)
2321 *entry = cur;
2322 if (end <= cur->vme_end)
2323 return (TRUE);
2324
2325 /*
2326 * If the region is not wholly contained within a
2327 * single entry, walk the entries looking for holes.
2328 */
2329 prev = cur->vme_end;
2330 cur = cur->vme_next;
2331 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
2332 if (end <= cur->vme_end)
2333 return (TRUE);
2334 prev = cur->vme_end;
2335 cur = cur->vme_next;
2336 }
2337 return (FALSE);
2338}
2339
2340/*
2341 * vm_map_submap: [ kernel use only ]
2342 *
2343 * Mark the given range as handled by a subordinate map.
2344 *
2345 * This range must have been created with vm_map_find using
2346 * the vm_submap_object, and no other operations may have been
2347 * performed on this range prior to calling vm_map_submap.
2348 *
2349 * Only a limited number of operations can be performed
2350 * within this rage after calling vm_map_submap:
2351 * vm_fault
2352 * [Don't try vm_map_copyin!]
2353 *
2354 * To remove a submapping, one must first remove the
2355 * range from the superior map, and then destroy the
2356 * submap (if desired). [Better yet, don't try it.]
2357 */
2358kern_return_t
2359vm_map_submap(
91447636
A
2360 vm_map_t map,
2361 vm_map_offset_t start,
2362 vm_map_offset_t end,
1c79356b 2363 vm_map_t submap,
91447636 2364 vm_map_offset_t offset,
0c530ab8 2365#ifdef NO_NESTED_PMAP
91447636 2366 __unused
0c530ab8 2367#endif /* NO_NESTED_PMAP */
1c79356b
A
2368 boolean_t use_pmap)
2369{
2370 vm_map_entry_t entry;
2371 register kern_return_t result = KERN_INVALID_ARGUMENT;
2372 register vm_object_t object;
2373
2374 vm_map_lock(map);
2375
9bccf70c
A
2376 submap->mapped = TRUE;
2377
1c79356b
A
2378 VM_MAP_RANGE_CHECK(map, start, end);
2379
2380 if (vm_map_lookup_entry(map, start, &entry)) {
2381 vm_map_clip_start(map, entry, start);
2382 }
2383 else
2384 entry = entry->vme_next;
2385
2386 if(entry == vm_map_to_entry(map)) {
2387 vm_map_unlock(map);
2388 return KERN_INVALID_ARGUMENT;
2389 }
2390
2391 vm_map_clip_end(map, entry, end);
2392
2393 if ((entry->vme_start == start) && (entry->vme_end == end) &&
2394 (!entry->is_sub_map) &&
2395 ((object = entry->object.vm_object) == vm_submap_object) &&
2396 (object->resident_page_count == 0) &&
2397 (object->copy == VM_OBJECT_NULL) &&
2398 (object->shadow == VM_OBJECT_NULL) &&
2399 (!object->pager_created)) {
55e303ae
A
2400 entry->offset = (vm_object_offset_t)offset;
2401 entry->object.vm_object = VM_OBJECT_NULL;
2402 vm_object_deallocate(object);
2403 entry->is_sub_map = TRUE;
2404 entry->object.sub_map = submap;
2405 vm_map_reference(submap);
0c530ab8 2406#ifndef NO_NESTED_PMAP
55e303ae
A
2407 if ((use_pmap) && (offset == 0)) {
2408 /* nest if platform code will allow */
2409 if(submap->pmap == NULL) {
0c530ab8 2410 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
55e303ae 2411 if(submap->pmap == PMAP_NULL) {
91447636 2412 vm_map_unlock(map);
55e303ae
A
2413 return(KERN_NO_SPACE);
2414 }
2415 }
2416 result = pmap_nest(map->pmap, (entry->object.sub_map)->pmap,
91447636
A
2417 (addr64_t)start,
2418 (addr64_t)start,
2419 (uint64_t)(end - start));
55e303ae
A
2420 if(result)
2421 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
2422 entry->use_pmap = TRUE;
2423 }
0c530ab8 2424#else /* NO_NESTED_PMAP */
55e303ae 2425 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 2426#endif /* NO_NESTED_PMAP */
55e303ae 2427 result = KERN_SUCCESS;
1c79356b
A
2428 }
2429 vm_map_unlock(map);
2430
2431 return(result);
2432}
2433
2434/*
2435 * vm_map_protect:
2436 *
2437 * Sets the protection of the specified address
2438 * region in the target map. If "set_max" is
2439 * specified, the maximum protection is to be set;
2440 * otherwise, only the current protection is affected.
2441 */
2442kern_return_t
2443vm_map_protect(
2444 register vm_map_t map,
91447636
A
2445 register vm_map_offset_t start,
2446 register vm_map_offset_t end,
1c79356b
A
2447 register vm_prot_t new_prot,
2448 register boolean_t set_max)
2449{
2450 register vm_map_entry_t current;
91447636 2451 register vm_map_offset_t prev;
1c79356b
A
2452 vm_map_entry_t entry;
2453 vm_prot_t new_max;
2454 boolean_t clip;
2455
2456 XPR(XPR_VM_MAP,
2457 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
2458 (integer_t)map, start, end, new_prot, set_max);
2459
2460 vm_map_lock(map);
2461
91447636
A
2462 /* LP64todo - remove this check when vm_map_commpage64()
2463 * no longer has to stuff in a map_entry for the commpage
2464 * above the map's max_offset.
2465 */
2466 if (start >= map->max_offset) {
2467 vm_map_unlock(map);
2468 return(KERN_INVALID_ADDRESS);
2469 }
2470
1c79356b
A
2471 /*
2472 * Lookup the entry. If it doesn't start in a valid
2473 * entry, return an error. Remember if we need to
2474 * clip the entry. We don't do it here because we don't
2475 * want to make any changes until we've scanned the
2476 * entire range below for address and protection
2477 * violations.
2478 */
2479 if (!(clip = vm_map_lookup_entry(map, start, &entry))) {
2480 vm_map_unlock(map);
2481 return(KERN_INVALID_ADDRESS);
2482 }
2483
2484 /*
2485 * Make a first pass to check for protection and address
2486 * violations.
2487 */
2488
2489 current = entry;
2490 prev = current->vme_start;
2491 while ((current != vm_map_to_entry(map)) &&
2492 (current->vme_start < end)) {
2493
2494 /*
2495 * If there is a hole, return an error.
2496 */
2497 if (current->vme_start != prev) {
2498 vm_map_unlock(map);
2499 return(KERN_INVALID_ADDRESS);
2500 }
2501
2502 new_max = current->max_protection;
2503 if(new_prot & VM_PROT_COPY) {
2504 new_max |= VM_PROT_WRITE;
2505 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
2506 vm_map_unlock(map);
2507 return(KERN_PROTECTION_FAILURE);
2508 }
2509 } else {
2510 if ((new_prot & new_max) != new_prot) {
2511 vm_map_unlock(map);
2512 return(KERN_PROTECTION_FAILURE);
2513 }
2514 }
2515
2516 prev = current->vme_end;
2517 current = current->vme_next;
2518 }
2519 if (end > prev) {
2520 vm_map_unlock(map);
2521 return(KERN_INVALID_ADDRESS);
2522 }
2523
2524 /*
2525 * Go back and fix up protections.
2526 * Clip to start here if the range starts within
2527 * the entry.
2528 */
2529
2530 current = entry;
2531 if (clip) {
2532 vm_map_clip_start(map, entry, start);
2533 }
2534 while ((current != vm_map_to_entry(map)) &&
2535 (current->vme_start < end)) {
2536
2537 vm_prot_t old_prot;
2538
2539 vm_map_clip_end(map, current, end);
2540
2541 old_prot = current->protection;
2542
2543 if(new_prot & VM_PROT_COPY) {
2544 /* caller is asking specifically to copy the */
2545 /* mapped data, this implies that max protection */
2546 /* will include write. Caller must be prepared */
2547 /* for loss of shared memory communication in the */
2548 /* target area after taking this step */
2549 current->needs_copy = TRUE;
2550 current->max_protection |= VM_PROT_WRITE;
2551 }
2552
2553 if (set_max)
2554 current->protection =
2555 (current->max_protection =
2556 new_prot & ~VM_PROT_COPY) &
2557 old_prot;
2558 else
2559 current->protection = new_prot & ~VM_PROT_COPY;
2560
2561 /*
2562 * Update physical map if necessary.
2563 * If the request is to turn off write protection,
2564 * we won't do it for real (in pmap). This is because
2565 * it would cause copy-on-write to fail. We've already
2566 * set, the new protection in the map, so if a
2567 * write-protect fault occurred, it will be fixed up
2568 * properly, COW or not.
2569 */
2570 /* the 256M hack for existing hardware limitations */
2571 if (current->protection != old_prot) {
2572 if(current->is_sub_map && current->use_pmap) {
91447636
A
2573 vm_map_offset_t pmap_base_addr;
2574 vm_map_offset_t pmap_end_addr;
0c530ab8 2575#ifdef NO_NESTED_PMAP
91447636 2576 __unused
0c530ab8 2577#endif /* NO_NESTED_PMAP */
1c79356b
A
2578 vm_map_entry_t local_entry;
2579
0c530ab8 2580
1c79356b
A
2581 pmap_base_addr = 0xF0000000 & current->vme_start;
2582 pmap_end_addr = (pmap_base_addr + 0x10000000) - 1;
0c530ab8 2583#ifndef NO_NESTED_PMAP
1c79356b
A
2584 if(!vm_map_lookup_entry(map,
2585 pmap_base_addr, &local_entry))
2586 panic("vm_map_protect: nested pmap area is missing");
2587 while ((local_entry != vm_map_to_entry(map)) &&
2588 (local_entry->vme_start < pmap_end_addr)) {
2589 local_entry->use_pmap = FALSE;
2590 local_entry = local_entry->vme_next;
2591 }
55e303ae 2592 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr);
0c530ab8 2593#endif /* NO_NESTED_PMAP */
1c79356b
A
2594 }
2595 if (!(current->protection & VM_PROT_WRITE)) {
2596 /* Look one level in we support nested pmaps */
2597 /* from mapped submaps which are direct entries */
2598 /* in our map */
0c530ab8
A
2599
2600 vm_prot_t prot;
2601
2602 prot = current->protection;
2603#ifdef STACK_ONLY_NX
2604 if (current->alias != VM_MEMORY_STACK && prot)
2605 prot |= VM_PROT_EXECUTE;
2606#endif
2607 if (current->is_sub_map && current->use_pmap) {
1c79356b
A
2608 pmap_protect(current->object.sub_map->pmap,
2609 current->vme_start,
2610 current->vme_end,
0c530ab8 2611 prot);
1c79356b
A
2612 } else {
2613 pmap_protect(map->pmap, current->vme_start,
2614 current->vme_end,
0c530ab8 2615 prot);
1c79356b
A
2616 }
2617 }
2618 }
2619 current = current->vme_next;
2620 }
2621
5353443c 2622 current = entry;
91447636
A
2623 while ((current != vm_map_to_entry(map)) &&
2624 (current->vme_start <= end)) {
5353443c
A
2625 vm_map_simplify_entry(map, current);
2626 current = current->vme_next;
2627 }
2628
1c79356b
A
2629 vm_map_unlock(map);
2630 return(KERN_SUCCESS);
2631}
2632
2633/*
2634 * vm_map_inherit:
2635 *
2636 * Sets the inheritance of the specified address
2637 * range in the target map. Inheritance
2638 * affects how the map will be shared with
2639 * child maps at the time of vm_map_fork.
2640 */
2641kern_return_t
2642vm_map_inherit(
2643 register vm_map_t map,
91447636
A
2644 register vm_map_offset_t start,
2645 register vm_map_offset_t end,
1c79356b
A
2646 register vm_inherit_t new_inheritance)
2647{
2648 register vm_map_entry_t entry;
2649 vm_map_entry_t temp_entry;
2650
2651 vm_map_lock(map);
2652
2653 VM_MAP_RANGE_CHECK(map, start, end);
2654
2655 if (vm_map_lookup_entry(map, start, &temp_entry)) {
2656 entry = temp_entry;
2657 vm_map_clip_start(map, entry, start);
2658 }
2659 else {
2660 temp_entry = temp_entry->vme_next;
2661 entry = temp_entry;
2662 }
2663
2664 /* first check entire range for submaps which can't support the */
2665 /* given inheritance. */
2666 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2667 if(entry->is_sub_map) {
91447636
A
2668 if(new_inheritance == VM_INHERIT_COPY) {
2669 vm_map_unlock(map);
1c79356b 2670 return(KERN_INVALID_ARGUMENT);
91447636 2671 }
1c79356b
A
2672 }
2673
2674 entry = entry->vme_next;
2675 }
2676
2677 entry = temp_entry;
2678
2679 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2680 vm_map_clip_end(map, entry, end);
2681
2682 entry->inheritance = new_inheritance;
2683
2684 entry = entry->vme_next;
2685 }
2686
2687 vm_map_unlock(map);
2688 return(KERN_SUCCESS);
2689}
2690
2691/*
2692 * vm_map_wire:
2693 *
2694 * Sets the pageability of the specified address range in the
2695 * target map as wired. Regions specified as not pageable require
2696 * locked-down physical memory and physical page maps. The
2697 * access_type variable indicates types of accesses that must not
2698 * generate page faults. This is checked against protection of
2699 * memory being locked-down.
2700 *
2701 * The map must not be locked, but a reference must remain to the
2702 * map throughout the call.
2703 */
91447636 2704static kern_return_t
1c79356b
A
2705vm_map_wire_nested(
2706 register vm_map_t map,
91447636
A
2707 register vm_map_offset_t start,
2708 register vm_map_offset_t end,
1c79356b
A
2709 register vm_prot_t access_type,
2710 boolean_t user_wire,
9bccf70c 2711 pmap_t map_pmap,
91447636 2712 vm_map_offset_t pmap_addr)
1c79356b
A
2713{
2714 register vm_map_entry_t entry;
2715 struct vm_map_entry *first_entry, tmp_entry;
91447636
A
2716 vm_map_t real_map;
2717 register vm_map_offset_t s,e;
1c79356b
A
2718 kern_return_t rc;
2719 boolean_t need_wakeup;
2720 boolean_t main_map = FALSE;
9bccf70c 2721 wait_interrupt_t interruptible_state;
0b4e3aa0 2722 thread_t cur_thread;
1c79356b 2723 unsigned int last_timestamp;
91447636 2724 vm_map_size_t size;
1c79356b
A
2725
2726 vm_map_lock(map);
2727 if(map_pmap == NULL)
2728 main_map = TRUE;
2729 last_timestamp = map->timestamp;
2730
2731 VM_MAP_RANGE_CHECK(map, start, end);
2732 assert(page_aligned(start));
2733 assert(page_aligned(end));
0b4e3aa0
A
2734 if (start == end) {
2735 /* We wired what the caller asked for, zero pages */
2736 vm_map_unlock(map);
2737 return KERN_SUCCESS;
2738 }
1c79356b
A
2739
2740 if (vm_map_lookup_entry(map, start, &first_entry)) {
2741 entry = first_entry;
2742 /* vm_map_clip_start will be done later. */
2743 } else {
2744 /* Start address is not in map */
2745 vm_map_unlock(map);
2746 return(KERN_INVALID_ADDRESS);
2747 }
2748
2749 s=start;
2750 need_wakeup = FALSE;
0b4e3aa0 2751 cur_thread = current_thread();
1c79356b
A
2752 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2753 /*
2754 * If another thread is wiring/unwiring this entry then
2755 * block after informing other thread to wake us up.
2756 */
2757 if (entry->in_transition) {
9bccf70c
A
2758 wait_result_t wait_result;
2759
1c79356b
A
2760 /*
2761 * We have not clipped the entry. Make sure that
2762 * the start address is in range so that the lookup
2763 * below will succeed.
2764 */
2765 s = entry->vme_start < start? start: entry->vme_start;
2766
2767 entry->needs_wakeup = TRUE;
2768
2769 /*
2770 * wake up anybody waiting on entries that we have
2771 * already wired.
2772 */
2773 if (need_wakeup) {
2774 vm_map_entry_wakeup(map);
2775 need_wakeup = FALSE;
2776 }
2777 /*
2778 * User wiring is interruptible
2779 */
9bccf70c 2780 wait_result = vm_map_entry_wait(map,
1c79356b
A
2781 (user_wire) ? THREAD_ABORTSAFE :
2782 THREAD_UNINT);
9bccf70c 2783 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
2784 /*
2785 * undo the wirings we have done so far
2786 * We do not clear the needs_wakeup flag,
2787 * because we cannot tell if we were the
2788 * only one waiting.
2789 */
9bccf70c 2790 vm_map_unlock(map);
1c79356b
A
2791 vm_map_unwire(map, start, s, user_wire);
2792 return(KERN_FAILURE);
2793 }
2794
1c79356b
A
2795 /*
2796 * Cannot avoid a lookup here. reset timestamp.
2797 */
2798 last_timestamp = map->timestamp;
2799
2800 /*
2801 * The entry could have been clipped, look it up again.
2802 * Worse that can happen is, it may not exist anymore.
2803 */
2804 if (!vm_map_lookup_entry(map, s, &first_entry)) {
2805 if (!user_wire)
2806 panic("vm_map_wire: re-lookup failed");
2807
2808 /*
2809 * User: undo everything upto the previous
2810 * entry. let vm_map_unwire worry about
2811 * checking the validity of the range.
2812 */
2813 vm_map_unlock(map);
2814 vm_map_unwire(map, start, s, user_wire);
2815 return(KERN_FAILURE);
2816 }
2817 entry = first_entry;
2818 continue;
2819 }
2820
2821 if(entry->is_sub_map) {
91447636
A
2822 vm_map_offset_t sub_start;
2823 vm_map_offset_t sub_end;
2824 vm_map_offset_t local_start;
2825 vm_map_offset_t local_end;
1c79356b
A
2826 pmap_t pmap;
2827
2828 vm_map_clip_start(map, entry, start);
2829 vm_map_clip_end(map, entry, end);
2830
9bccf70c 2831 sub_start = entry->offset;
1c79356b
A
2832 sub_end = entry->vme_end - entry->vme_start;
2833 sub_end += entry->offset;
2834
2835 local_end = entry->vme_end;
2836 if(map_pmap == NULL) {
2837 if(entry->use_pmap) {
2838 pmap = entry->object.sub_map->pmap;
9bccf70c
A
2839 /* ppc implementation requires that */
2840 /* submaps pmap address ranges line */
2841 /* up with parent map */
2842#ifdef notdef
2843 pmap_addr = sub_start;
2844#endif
2845 pmap_addr = start;
1c79356b
A
2846 } else {
2847 pmap = map->pmap;
9bccf70c 2848 pmap_addr = start;
1c79356b
A
2849 }
2850 if (entry->wired_count) {
2851 if (entry->wired_count
2852 >= MAX_WIRE_COUNT)
2853 panic("vm_map_wire: too many wirings");
2854
2855 if (user_wire &&
2856 entry->user_wired_count
2857 >= MAX_WIRE_COUNT) {
2858 vm_map_unlock(map);
2859 vm_map_unwire(map, start,
2860 entry->vme_start, user_wire);
2861 return(KERN_FAILURE);
2862 }
9bccf70c
A
2863 if(user_wire)
2864 entry->user_wired_count++;
2865 if((!user_wire) ||
2866 (entry->user_wired_count == 0))
1c79356b
A
2867 entry->wired_count++;
2868 entry = entry->vme_next;
2869 continue;
2870
2871 } else {
2872 vm_object_t object;
91447636
A
2873 vm_map_offset_t offset_hi;
2874 vm_map_offset_t offset_lo;
1c79356b
A
2875 vm_object_offset_t offset;
2876 vm_prot_t prot;
2877 boolean_t wired;
2878 vm_behavior_t behavior;
1c79356b
A
2879 vm_map_entry_t local_entry;
2880 vm_map_version_t version;
2881 vm_map_t lookup_map;
2882
2883 /* call vm_map_lookup_locked to */
2884 /* cause any needs copy to be */
2885 /* evaluated */
2886 local_start = entry->vme_start;
2887 lookup_map = map;
2888 vm_map_lock_write_to_read(map);
2889 if(vm_map_lookup_locked(
2890 &lookup_map, local_start,
9bccf70c 2891 access_type,
1c79356b
A
2892 &version, &object,
2893 &offset, &prot, &wired,
2894 &behavior, &offset_lo,
91447636 2895 &offset_hi, &real_map)) {
1c79356b 2896
91447636 2897 vm_map_unlock_read(lookup_map);
1c79356b
A
2898 vm_map_unwire(map, start,
2899 entry->vme_start, user_wire);
2900 return(KERN_FAILURE);
2901 }
91447636
A
2902 if(real_map != lookup_map)
2903 vm_map_unlock(real_map);
9bccf70c
A
2904 vm_map_unlock_read(lookup_map);
2905 vm_map_lock(map);
1c79356b 2906 vm_object_unlock(object);
9bccf70c
A
2907
2908 if (!vm_map_lookup_entry(map,
1c79356b
A
2909 local_start, &local_entry)) {
2910 vm_map_unlock(map);
2911 vm_map_unwire(map, start,
2912 entry->vme_start, user_wire);
2913 return(KERN_FAILURE);
2914 }
2915 /* did we have a change of type? */
9bccf70c
A
2916 if (!local_entry->is_sub_map) {
2917 last_timestamp = map->timestamp;
1c79356b 2918 continue;
9bccf70c 2919 }
1c79356b
A
2920 entry = local_entry;
2921 if (user_wire)
2922 entry->user_wired_count++;
9bccf70c
A
2923 if((!user_wire) ||
2924 (entry->user_wired_count == 1))
1c79356b
A
2925 entry->wired_count++;
2926
2927 entry->in_transition = TRUE;
2928
2929 vm_map_unlock(map);
2930 rc = vm_map_wire_nested(
2931 entry->object.sub_map,
2932 sub_start, sub_end,
2933 access_type,
9bccf70c 2934 user_wire, pmap, pmap_addr);
1c79356b 2935 vm_map_lock(map);
1c79356b
A
2936 }
2937 } else {
9bccf70c
A
2938 local_start = entry->vme_start;
2939 if (user_wire)
2940 entry->user_wired_count++;
2941 if((!user_wire) ||
2942 (entry->user_wired_count == 1))
2943 entry->wired_count++;
1c79356b
A
2944 vm_map_unlock(map);
2945 rc = vm_map_wire_nested(entry->object.sub_map,
2946 sub_start, sub_end,
2947 access_type,
55e303ae 2948 user_wire, map_pmap, pmap_addr);
1c79356b 2949 vm_map_lock(map);
1c79356b
A
2950 }
2951 s = entry->vme_start;
2952 e = entry->vme_end;
9bccf70c 2953
1c79356b
A
2954 /*
2955 * Find the entry again. It could have been clipped
2956 * after we unlocked the map.
2957 */
9bccf70c
A
2958 if (!vm_map_lookup_entry(map, local_start,
2959 &first_entry))
2960 panic("vm_map_wire: re-lookup failed");
2961 entry = first_entry;
1c79356b
A
2962
2963 last_timestamp = map->timestamp;
2964 while ((entry != vm_map_to_entry(map)) &&
2965 (entry->vme_start < e)) {
2966 assert(entry->in_transition);
2967 entry->in_transition = FALSE;
2968 if (entry->needs_wakeup) {
2969 entry->needs_wakeup = FALSE;
2970 need_wakeup = TRUE;
2971 }
2972 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
1c79356b
A
2973 if (user_wire)
2974 entry->user_wired_count--;
9bccf70c
A
2975 if ((!user_wire) ||
2976 (entry->user_wired_count == 0))
2977 entry->wired_count--;
1c79356b
A
2978 }
2979 entry = entry->vme_next;
2980 }
2981 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2982 vm_map_unlock(map);
2983 if (need_wakeup)
2984 vm_map_entry_wakeup(map);
2985 /*
2986 * undo everything upto the previous entry.
2987 */
2988 (void)vm_map_unwire(map, start, s, user_wire);
2989 return rc;
2990 }
2991 continue;
2992 }
2993
2994 /*
2995 * If this entry is already wired then increment
2996 * the appropriate wire reference count.
2997 */
9bccf70c 2998 if (entry->wired_count) {
1c79356b
A
2999 /* sanity check: wired_count is a short */
3000 if (entry->wired_count >= MAX_WIRE_COUNT)
3001 panic("vm_map_wire: too many wirings");
3002
3003 if (user_wire &&
3004 entry->user_wired_count >= MAX_WIRE_COUNT) {
3005 vm_map_unlock(map);
3006 vm_map_unwire(map, start,
3007 entry->vme_start, user_wire);
3008 return(KERN_FAILURE);
3009 }
3010 /*
3011 * entry is already wired down, get our reference
3012 * after clipping to our range.
3013 */
3014 vm_map_clip_start(map, entry, start);
3015 vm_map_clip_end(map, entry, end);
9bccf70c
A
3016 if (user_wire)
3017 entry->user_wired_count++;
3018 if ((!user_wire) || (entry->user_wired_count == 1))
1c79356b
A
3019 entry->wired_count++;
3020
3021 entry = entry->vme_next;
3022 continue;
3023 }
3024
3025 /*
3026 * Unwired entry or wire request transmitted via submap
3027 */
3028
3029
3030 /*
3031 * Perform actions of vm_map_lookup that need the write
3032 * lock on the map: create a shadow object for a
3033 * copy-on-write region, or an object for a zero-fill
3034 * region.
3035 */
3036 size = entry->vme_end - entry->vme_start;
3037 /*
3038 * If wiring a copy-on-write page, we need to copy it now
3039 * even if we're only (currently) requesting read access.
3040 * This is aggressive, but once it's wired we can't move it.
3041 */
3042 if (entry->needs_copy) {
3043 vm_object_shadow(&entry->object.vm_object,
3044 &entry->offset, size);
3045 entry->needs_copy = FALSE;
3046 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
3047 entry->object.vm_object = vm_object_allocate(size);
3048 entry->offset = (vm_object_offset_t)0;
3049 }
3050
3051 vm_map_clip_start(map, entry, start);
3052 vm_map_clip_end(map, entry, end);
3053
3054 s = entry->vme_start;
3055 e = entry->vme_end;
3056
3057 /*
3058 * Check for holes and protection mismatch.
3059 * Holes: Next entry should be contiguous unless this
3060 * is the end of the region.
3061 * Protection: Access requested must be allowed, unless
3062 * wiring is by protection class
3063 */
3064 if ((((entry->vme_end < end) &&
3065 ((entry->vme_next == vm_map_to_entry(map)) ||
3066 (entry->vme_next->vme_start > entry->vme_end))) ||
3067 ((entry->protection & access_type) != access_type))) {
3068 /*
3069 * Found a hole or protection problem.
3070 * Unwire the region we wired so far.
3071 */
3072 if (start != entry->vme_start) {
3073 vm_map_unlock(map);
3074 vm_map_unwire(map, start, s, user_wire);
3075 } else {
3076 vm_map_unlock(map);
3077 }
3078 return((entry->protection&access_type) != access_type?
3079 KERN_PROTECTION_FAILURE: KERN_INVALID_ADDRESS);
3080 }
3081
3082 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3083
9bccf70c
A
3084 if (user_wire)
3085 entry->user_wired_count++;
3086 if ((!user_wire) || (entry->user_wired_count == 1))
1c79356b 3087 entry->wired_count++;
1c79356b
A
3088
3089 entry->in_transition = TRUE;
3090
3091 /*
3092 * This entry might get split once we unlock the map.
3093 * In vm_fault_wire(), we need the current range as
3094 * defined by this entry. In order for this to work
3095 * along with a simultaneous clip operation, we make a
3096 * temporary copy of this entry and use that for the
3097 * wiring. Note that the underlying objects do not
3098 * change during a clip.
3099 */
3100 tmp_entry = *entry;
3101
3102 /*
3103 * The in_transition state guarentees that the entry
3104 * (or entries for this range, if split occured) will be
3105 * there when the map lock is acquired for the second time.
3106 */
3107 vm_map_unlock(map);
0b4e3aa0 3108
9bccf70c
A
3109 if (!user_wire && cur_thread != THREAD_NULL)
3110 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
3111 else
3112 interruptible_state = THREAD_UNINT;
9bccf70c 3113
1c79356b 3114 if(map_pmap)
9bccf70c
A
3115 rc = vm_fault_wire(map,
3116 &tmp_entry, map_pmap, pmap_addr);
1c79356b 3117 else
9bccf70c
A
3118 rc = vm_fault_wire(map,
3119 &tmp_entry, map->pmap,
3120 tmp_entry.vme_start);
0b4e3aa0
A
3121
3122 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 3123 thread_interrupt_level(interruptible_state);
0b4e3aa0 3124
1c79356b
A
3125 vm_map_lock(map);
3126
3127 if (last_timestamp+1 != map->timestamp) {
3128 /*
3129 * Find the entry again. It could have been clipped
3130 * after we unlocked the map.
3131 */
3132 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3133 &first_entry))
3134 panic("vm_map_wire: re-lookup failed");
3135
3136 entry = first_entry;
3137 }
3138
3139 last_timestamp = map->timestamp;
3140
3141 while ((entry != vm_map_to_entry(map)) &&
3142 (entry->vme_start < tmp_entry.vme_end)) {
3143 assert(entry->in_transition);
3144 entry->in_transition = FALSE;
3145 if (entry->needs_wakeup) {
3146 entry->needs_wakeup = FALSE;
3147 need_wakeup = TRUE;
3148 }
3149 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
9bccf70c
A
3150 if (user_wire)
3151 entry->user_wired_count--;
3152 if ((!user_wire) ||
3153 (entry->user_wired_count == 0))
1c79356b 3154 entry->wired_count--;
1c79356b
A
3155 }
3156 entry = entry->vme_next;
3157 }
3158
3159 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3160 vm_map_unlock(map);
3161 if (need_wakeup)
3162 vm_map_entry_wakeup(map);
3163 /*
3164 * undo everything upto the previous entry.
3165 */
3166 (void)vm_map_unwire(map, start, s, user_wire);
3167 return rc;
3168 }
3169 } /* end while loop through map entries */
3170 vm_map_unlock(map);
3171
3172 /*
3173 * wake up anybody waiting on entries we wired.
3174 */
3175 if (need_wakeup)
3176 vm_map_entry_wakeup(map);
3177
3178 return(KERN_SUCCESS);
3179
3180}
3181
3182kern_return_t
3183vm_map_wire(
3184 register vm_map_t map,
91447636
A
3185 register vm_map_offset_t start,
3186 register vm_map_offset_t end,
1c79356b
A
3187 register vm_prot_t access_type,
3188 boolean_t user_wire)
3189{
3190
3191 kern_return_t kret;
3192
3193#ifdef ppc
3194 /*
3195 * the calls to mapping_prealloc and mapping_relpre
3196 * (along with the VM_MAP_RANGE_CHECK to insure a
3197 * resonable range was passed in) are
3198 * currently necessary because
3199 * we haven't enabled kernel pre-emption
3200 * and/or the pmap_enter cannot purge and re-use
3201 * existing mappings
3202 */
3203 VM_MAP_RANGE_CHECK(map, start, end);
3204 mapping_prealloc(end - start);
3205#endif
3206 kret = vm_map_wire_nested(map, start, end, access_type,
9bccf70c 3207 user_wire, (pmap_t)NULL, 0);
1c79356b
A
3208#ifdef ppc
3209 mapping_relpre();
3210#endif
3211 return kret;
3212}
3213
3214/*
3215 * vm_map_unwire:
3216 *
3217 * Sets the pageability of the specified address range in the target
3218 * as pageable. Regions specified must have been wired previously.
3219 *
3220 * The map must not be locked, but a reference must remain to the map
3221 * throughout the call.
3222 *
3223 * Kernel will panic on failures. User unwire ignores holes and
3224 * unwired and intransition entries to avoid losing memory by leaving
3225 * it unwired.
3226 */
91447636 3227static kern_return_t
1c79356b
A
3228vm_map_unwire_nested(
3229 register vm_map_t map,
91447636
A
3230 register vm_map_offset_t start,
3231 register vm_map_offset_t end,
1c79356b 3232 boolean_t user_wire,
9bccf70c 3233 pmap_t map_pmap,
91447636 3234 vm_map_offset_t pmap_addr)
1c79356b
A
3235{
3236 register vm_map_entry_t entry;
3237 struct vm_map_entry *first_entry, tmp_entry;
3238 boolean_t need_wakeup;
3239 boolean_t main_map = FALSE;
3240 unsigned int last_timestamp;
3241
3242 vm_map_lock(map);
3243 if(map_pmap == NULL)
3244 main_map = TRUE;
3245 last_timestamp = map->timestamp;
3246
3247 VM_MAP_RANGE_CHECK(map, start, end);
3248 assert(page_aligned(start));
3249 assert(page_aligned(end));
3250
3251 if (vm_map_lookup_entry(map, start, &first_entry)) {
3252 entry = first_entry;
3253 /* vm_map_clip_start will be done later. */
3254 }
3255 else {
3256 /* Start address is not in map. */
3257 vm_map_unlock(map);
3258 return(KERN_INVALID_ADDRESS);
3259 }
3260
3261 need_wakeup = FALSE;
3262 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3263 if (entry->in_transition) {
3264 /*
3265 * 1)
3266 * Another thread is wiring down this entry. Note
3267 * that if it is not for the other thread we would
3268 * be unwiring an unwired entry. This is not
3269 * permitted. If we wait, we will be unwiring memory
3270 * we did not wire.
3271 *
3272 * 2)
3273 * Another thread is unwiring this entry. We did not
3274 * have a reference to it, because if we did, this
3275 * entry will not be getting unwired now.
3276 */
3277 if (!user_wire)
3278 panic("vm_map_unwire: in_transition entry");
3279
3280 entry = entry->vme_next;
3281 continue;
3282 }
3283
3284 if(entry->is_sub_map) {
91447636
A
3285 vm_map_offset_t sub_start;
3286 vm_map_offset_t sub_end;
3287 vm_map_offset_t local_end;
1c79356b
A
3288 pmap_t pmap;
3289
3290
3291 vm_map_clip_start(map, entry, start);
3292 vm_map_clip_end(map, entry, end);
3293
3294 sub_start = entry->offset;
3295 sub_end = entry->vme_end - entry->vme_start;
3296 sub_end += entry->offset;
3297 local_end = entry->vme_end;
3298 if(map_pmap == NULL) {
3299 if(entry->use_pmap) {
3300 pmap = entry->object.sub_map->pmap;
9bccf70c 3301 pmap_addr = sub_start;
1c79356b
A
3302 } else {
3303 pmap = map->pmap;
9bccf70c 3304 pmap_addr = start;
1c79356b
A
3305 }
3306 if (entry->wired_count == 0 ||
3307 (user_wire && entry->user_wired_count == 0)) {
3308 if (!user_wire)
3309 panic("vm_map_unwire: entry is unwired");
3310 entry = entry->vme_next;
3311 continue;
3312 }
3313
3314 /*
3315 * Check for holes
3316 * Holes: Next entry should be contiguous unless
3317 * this is the end of the region.
3318 */
3319 if (((entry->vme_end < end) &&
3320 ((entry->vme_next == vm_map_to_entry(map)) ||
3321 (entry->vme_next->vme_start
3322 > entry->vme_end)))) {
3323 if (!user_wire)
3324 panic("vm_map_unwire: non-contiguous region");
3325/*
3326 entry = entry->vme_next;
3327 continue;
3328*/
3329 }
3330
3331 if (!user_wire || (--entry->user_wired_count == 0))
3332 entry->wired_count--;
3333
3334 if (entry->wired_count != 0) {
3335 entry = entry->vme_next;
3336 continue;
3337 }
3338
3339 entry->in_transition = TRUE;
3340 tmp_entry = *entry;/* see comment in vm_map_wire() */
3341
3342 /*
3343 * We can unlock the map now. The in_transition state
3344 * guarantees existance of the entry.
3345 */
3346 vm_map_unlock(map);
3347 vm_map_unwire_nested(entry->object.sub_map,
9bccf70c 3348 sub_start, sub_end, user_wire, pmap, pmap_addr);
1c79356b
A
3349 vm_map_lock(map);
3350
3351 if (last_timestamp+1 != map->timestamp) {
3352 /*
3353 * Find the entry again. It could have been
3354 * clipped or deleted after we unlocked the map.
3355 */
3356 if (!vm_map_lookup_entry(map,
3357 tmp_entry.vme_start,
3358 &first_entry)) {
3359 if (!user_wire)
3360 panic("vm_map_unwire: re-lookup failed");
3361 entry = first_entry->vme_next;
3362 } else
3363 entry = first_entry;
3364 }
3365 last_timestamp = map->timestamp;
3366
3367 /*
3368 * clear transition bit for all constituent entries
3369 * that were in the original entry (saved in
3370 * tmp_entry). Also check for waiters.
3371 */
3372 while ((entry != vm_map_to_entry(map)) &&
3373 (entry->vme_start < tmp_entry.vme_end)) {
3374 assert(entry->in_transition);
3375 entry->in_transition = FALSE;
3376 if (entry->needs_wakeup) {
3377 entry->needs_wakeup = FALSE;
3378 need_wakeup = TRUE;
3379 }
3380 entry = entry->vme_next;
3381 }
3382 continue;
3383 } else {
3384 vm_map_unlock(map);
55e303ae
A
3385 vm_map_unwire_nested(entry->object.sub_map,
3386 sub_start, sub_end, user_wire, map_pmap,
3387 pmap_addr);
1c79356b
A
3388 vm_map_lock(map);
3389
3390 if (last_timestamp+1 != map->timestamp) {
3391 /*
3392 * Find the entry again. It could have been
3393 * clipped or deleted after we unlocked the map.
3394 */
3395 if (!vm_map_lookup_entry(map,
3396 tmp_entry.vme_start,
3397 &first_entry)) {
3398 if (!user_wire)
3399 panic("vm_map_unwire: re-lookup failed");
3400 entry = first_entry->vme_next;
3401 } else
3402 entry = first_entry;
3403 }
3404 last_timestamp = map->timestamp;
3405 }
3406 }
3407
3408
9bccf70c
A
3409 if ((entry->wired_count == 0) ||
3410 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
3411 if (!user_wire)
3412 panic("vm_map_unwire: entry is unwired");
3413
3414 entry = entry->vme_next;
3415 continue;
3416 }
3417
3418 assert(entry->wired_count > 0 &&
3419 (!user_wire || entry->user_wired_count > 0));
3420
3421 vm_map_clip_start(map, entry, start);
3422 vm_map_clip_end(map, entry, end);
3423
3424 /*
3425 * Check for holes
3426 * Holes: Next entry should be contiguous unless
3427 * this is the end of the region.
3428 */
3429 if (((entry->vme_end < end) &&
3430 ((entry->vme_next == vm_map_to_entry(map)) ||
3431 (entry->vme_next->vme_start > entry->vme_end)))) {
3432
3433 if (!user_wire)
3434 panic("vm_map_unwire: non-contiguous region");
3435 entry = entry->vme_next;
3436 continue;
3437 }
3438
9bccf70c 3439 if (!user_wire || (--entry->user_wired_count == 0))
1c79356b
A
3440 entry->wired_count--;
3441
9bccf70c 3442 if (entry->wired_count != 0) {
1c79356b
A
3443 entry = entry->vme_next;
3444 continue;
1c79356b
A
3445 }
3446
3447 entry->in_transition = TRUE;
3448 tmp_entry = *entry; /* see comment in vm_map_wire() */
3449
3450 /*
3451 * We can unlock the map now. The in_transition state
3452 * guarantees existance of the entry.
3453 */
3454 vm_map_unlock(map);
3455 if(map_pmap) {
9bccf70c
A
3456 vm_fault_unwire(map,
3457 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 3458 } else {
9bccf70c
A
3459 vm_fault_unwire(map,
3460 &tmp_entry, FALSE, map->pmap,
3461 tmp_entry.vme_start);
1c79356b
A
3462 }
3463 vm_map_lock(map);
3464
3465 if (last_timestamp+1 != map->timestamp) {
3466 /*
3467 * Find the entry again. It could have been clipped
3468 * or deleted after we unlocked the map.
3469 */
3470 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3471 &first_entry)) {
3472 if (!user_wire)
3473 panic("vm_map_unwire: re-lookup failed");
3474 entry = first_entry->vme_next;
3475 } else
3476 entry = first_entry;
3477 }
3478 last_timestamp = map->timestamp;
3479
3480 /*
3481 * clear transition bit for all constituent entries that
3482 * were in the original entry (saved in tmp_entry). Also
3483 * check for waiters.
3484 */
3485 while ((entry != vm_map_to_entry(map)) &&
3486 (entry->vme_start < tmp_entry.vme_end)) {
3487 assert(entry->in_transition);
3488 entry->in_transition = FALSE;
3489 if (entry->needs_wakeup) {
3490 entry->needs_wakeup = FALSE;
3491 need_wakeup = TRUE;
3492 }
3493 entry = entry->vme_next;
3494 }
3495 }
91447636
A
3496
3497 /*
3498 * We might have fragmented the address space when we wired this
3499 * range of addresses. Attempt to re-coalesce these VM map entries
3500 * with their neighbors now that they're no longer wired.
3501 * Under some circumstances, address space fragmentation can
3502 * prevent VM object shadow chain collapsing, which can cause
3503 * swap space leaks.
3504 */
3505 vm_map_simplify_range(map, start, end);
3506
1c79356b
A
3507 vm_map_unlock(map);
3508 /*
3509 * wake up anybody waiting on entries that we have unwired.
3510 */
3511 if (need_wakeup)
3512 vm_map_entry_wakeup(map);
3513 return(KERN_SUCCESS);
3514
3515}
3516
3517kern_return_t
3518vm_map_unwire(
3519 register vm_map_t map,
91447636
A
3520 register vm_map_offset_t start,
3521 register vm_map_offset_t end,
1c79356b
A
3522 boolean_t user_wire)
3523{
9bccf70c
A
3524 return vm_map_unwire_nested(map, start, end,
3525 user_wire, (pmap_t)NULL, 0);
1c79356b
A
3526}
3527
3528
3529/*
3530 * vm_map_entry_delete: [ internal use only ]
3531 *
3532 * Deallocate the given entry from the target map.
3533 */
91447636 3534static void
1c79356b
A
3535vm_map_entry_delete(
3536 register vm_map_t map,
3537 register vm_map_entry_t entry)
3538{
91447636 3539 register vm_map_offset_t s, e;
1c79356b
A
3540 register vm_object_t object;
3541 register vm_map_t submap;
1c79356b
A
3542
3543 s = entry->vme_start;
3544 e = entry->vme_end;
3545 assert(page_aligned(s));
3546 assert(page_aligned(e));
3547 assert(entry->wired_count == 0);
3548 assert(entry->user_wired_count == 0);
3549
3550 if (entry->is_sub_map) {
3551 object = NULL;
3552 submap = entry->object.sub_map;
3553 } else {
3554 submap = NULL;
3555 object = entry->object.vm_object;
3556 }
3557
3558 vm_map_entry_unlink(map, entry);
3559 map->size -= e - s;
3560
3561 vm_map_entry_dispose(map, entry);
3562
3563 vm_map_unlock(map);
3564 /*
3565 * Deallocate the object only after removing all
3566 * pmap entries pointing to its pages.
3567 */
3568 if (submap)
3569 vm_map_deallocate(submap);
3570 else
3571 vm_object_deallocate(object);
3572
3573}
3574
0c530ab8 3575
1c79356b
A
3576void
3577vm_map_submap_pmap_clean(
3578 vm_map_t map,
91447636
A
3579 vm_map_offset_t start,
3580 vm_map_offset_t end,
1c79356b 3581 vm_map_t sub_map,
91447636 3582 vm_map_offset_t offset)
1c79356b 3583{
91447636
A
3584 vm_map_offset_t submap_start;
3585 vm_map_offset_t submap_end;
3586 vm_map_size_t remove_size;
1c79356b
A
3587 vm_map_entry_t entry;
3588
3589 submap_end = offset + (end - start);
3590 submap_start = offset;
3591 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
3592
3593 remove_size = (entry->vme_end - entry->vme_start);
3594 if(offset > entry->vme_start)
3595 remove_size -= offset - entry->vme_start;
3596
3597
3598 if(submap_end < entry->vme_end) {
3599 remove_size -=
3600 entry->vme_end - submap_end;
3601 }
3602 if(entry->is_sub_map) {
3603 vm_map_submap_pmap_clean(
3604 sub_map,
3605 start,
3606 start + remove_size,
3607 entry->object.sub_map,
3608 entry->offset);
3609 } else {
9bccf70c
A
3610
3611 if((map->mapped) && (map->ref_count)
3612 && (entry->object.vm_object != NULL)) {
3613 vm_object_pmap_protect(
3614 entry->object.vm_object,
3615 entry->offset,
3616 remove_size,
3617 PMAP_NULL,
3618 entry->vme_start,
3619 VM_PROT_NONE);
3620 } else {
3621 pmap_remove(map->pmap,
55e303ae
A
3622 (addr64_t)start,
3623 (addr64_t)(start + remove_size));
9bccf70c 3624 }
1c79356b
A
3625 }
3626 }
3627
3628 entry = entry->vme_next;
3629
3630 while((entry != vm_map_to_entry(sub_map))
3631 && (entry->vme_start < submap_end)) {
3632 remove_size = (entry->vme_end - entry->vme_start);
3633 if(submap_end < entry->vme_end) {
3634 remove_size -= entry->vme_end - submap_end;
3635 }
3636 if(entry->is_sub_map) {
3637 vm_map_submap_pmap_clean(
3638 sub_map,
3639 (start + entry->vme_start) - offset,
3640 ((start + entry->vme_start) - offset) + remove_size,
3641 entry->object.sub_map,
3642 entry->offset);
3643 } else {
9bccf70c
A
3644 if((map->mapped) && (map->ref_count)
3645 && (entry->object.vm_object != NULL)) {
3646 vm_object_pmap_protect(
3647 entry->object.vm_object,
3648 entry->offset,
3649 remove_size,
3650 PMAP_NULL,
3651 entry->vme_start,
3652 VM_PROT_NONE);
3653 } else {
3654 pmap_remove(map->pmap,
55e303ae
A
3655 (addr64_t)((start + entry->vme_start)
3656 - offset),
3657 (addr64_t)(((start + entry->vme_start)
3658 - offset) + remove_size));
9bccf70c 3659 }
1c79356b
A
3660 }
3661 entry = entry->vme_next;
3662 }
3663 return;
3664}
3665
3666/*
3667 * vm_map_delete: [ internal use only ]
3668 *
3669 * Deallocates the given address range from the target map.
3670 * Removes all user wirings. Unwires one kernel wiring if
3671 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
3672 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
3673 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
3674 *
3675 * This routine is called with map locked and leaves map locked.
3676 */
91447636 3677static kern_return_t
1c79356b 3678vm_map_delete(
91447636
A
3679 vm_map_t map,
3680 vm_map_offset_t start,
3681 vm_map_offset_t end,
3682 int flags,
3683 vm_map_t zap_map)
1c79356b
A
3684{
3685 vm_map_entry_t entry, next;
3686 struct vm_map_entry *first_entry, tmp_entry;
91447636 3687 register vm_map_offset_t s, e;
1c79356b
A
3688 register vm_object_t object;
3689 boolean_t need_wakeup;
3690 unsigned int last_timestamp = ~0; /* unlikely value */
3691 int interruptible;
1c79356b
A
3692
3693 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
3694 THREAD_ABORTSAFE : THREAD_UNINT;
3695
3696 /*
3697 * All our DMA I/O operations in IOKit are currently done by
3698 * wiring through the map entries of the task requesting the I/O.
3699 * Because of this, we must always wait for kernel wirings
3700 * to go away on the entries before deleting them.
3701 *
3702 * Any caller who wants to actually remove a kernel wiring
3703 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
3704 * properly remove one wiring instead of blasting through
3705 * them all.
3706 */
3707 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
3708
3709 /*
3710 * Find the start of the region, and clip it
3711 */
3712 if (vm_map_lookup_entry(map, start, &first_entry)) {
3713 entry = first_entry;
3714 vm_map_clip_start(map, entry, start);
3715
3716 /*
3717 * Fix the lookup hint now, rather than each
3718 * time through the loop.
3719 */
0c530ab8 3720 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
3721 } else {
3722 entry = first_entry->vme_next;
3723 }
3724
3725 need_wakeup = FALSE;
3726 /*
3727 * Step through all entries in this region
3728 */
3729 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3730
3731 vm_map_clip_end(map, entry, end);
3732 if (entry->in_transition) {
9bccf70c
A
3733 wait_result_t wait_result;
3734
1c79356b
A
3735 /*
3736 * Another thread is wiring/unwiring this entry.
3737 * Let the other thread know we are waiting.
3738 */
3739 s = entry->vme_start;
3740 entry->needs_wakeup = TRUE;
3741
3742 /*
3743 * wake up anybody waiting on entries that we have
3744 * already unwired/deleted.
3745 */
3746 if (need_wakeup) {
3747 vm_map_entry_wakeup(map);
3748 need_wakeup = FALSE;
3749 }
3750
9bccf70c 3751 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
3752
3753 if (interruptible &&
9bccf70c 3754 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
3755 /*
3756 * We do not clear the needs_wakeup flag,
3757 * since we cannot tell if we were the only one.
3758 */
9bccf70c 3759 vm_map_unlock(map);
1c79356b 3760 return KERN_ABORTED;
9bccf70c 3761 }
1c79356b
A
3762
3763 /*
3764 * The entry could have been clipped or it
3765 * may not exist anymore. Look it up again.
3766 */
3767 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3768 assert((map != kernel_map) &&
3769 (!entry->is_sub_map));
3770 /*
3771 * User: use the next entry
3772 */
3773 entry = first_entry->vme_next;
3774 } else {
3775 entry = first_entry;
0c530ab8 3776 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 3777 }
9bccf70c 3778 last_timestamp = map->timestamp;
1c79356b
A
3779 continue;
3780 } /* end in_transition */
3781
3782 if (entry->wired_count) {
3783 /*
3784 * Remove a kernel wiring if requested or if
3785 * there are user wirings.
3786 */
3787 if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
3788 (entry->user_wired_count > 0))
3789 entry->wired_count--;
3790
3791 /* remove all user wire references */
3792 entry->user_wired_count = 0;
3793
3794 if (entry->wired_count != 0) {
3795 assert((map != kernel_map) &&
3796 (!entry->is_sub_map));
3797 /*
3798 * Cannot continue. Typical case is when
3799 * a user thread has physical io pending on
3800 * on this page. Either wait for the
3801 * kernel wiring to go away or return an
3802 * error.
3803 */
3804 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 3805 wait_result_t wait_result;
1c79356b
A
3806
3807 s = entry->vme_start;
3808 entry->needs_wakeup = TRUE;
9bccf70c
A
3809 wait_result = vm_map_entry_wait(map,
3810 interruptible);
1c79356b
A
3811
3812 if (interruptible &&
9bccf70c 3813 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
3814 /*
3815 * We do not clear the
3816 * needs_wakeup flag, since we
3817 * cannot tell if we were the
3818 * only one.
3819 */
9bccf70c 3820 vm_map_unlock(map);
1c79356b 3821 return KERN_ABORTED;
9bccf70c 3822 }
1c79356b
A
3823
3824 /*
3825 * The entry could have been clipped or
3826 * it may not exist anymore. Look it
3827 * up again.
3828 */
3829 if (!vm_map_lookup_entry(map, s,
3830 &first_entry)) {
3831 assert((map != kernel_map) &&
3832 (!entry->is_sub_map));
3833 /*
3834 * User: use the next entry
3835 */
3836 entry = first_entry->vme_next;
3837 } else {
3838 entry = first_entry;
0c530ab8 3839 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 3840 }
9bccf70c 3841 last_timestamp = map->timestamp;
1c79356b
A
3842 continue;
3843 }
3844 else {
3845 return KERN_FAILURE;
3846 }
3847 }
3848
3849 entry->in_transition = TRUE;
3850 /*
3851 * copy current entry. see comment in vm_map_wire()
3852 */
3853 tmp_entry = *entry;
3854 s = entry->vme_start;
3855 e = entry->vme_end;
3856
3857 /*
3858 * We can unlock the map now. The in_transition
3859 * state guarentees existance of the entry.
3860 */
3861 vm_map_unlock(map);
3862 vm_fault_unwire(map, &tmp_entry,
3863 tmp_entry.object.vm_object == kernel_object,
9bccf70c 3864 map->pmap, tmp_entry.vme_start);
1c79356b
A
3865 vm_map_lock(map);
3866
3867 if (last_timestamp+1 != map->timestamp) {
3868 /*
3869 * Find the entry again. It could have
3870 * been clipped after we unlocked the map.
3871 */
3872 if (!vm_map_lookup_entry(map, s, &first_entry)){
3873 assert((map != kernel_map) &&
3874 (!entry->is_sub_map));
3875 first_entry = first_entry->vme_next;
3876 } else {
0c530ab8 3877 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
3878 }
3879 } else {
0c530ab8 3880 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
3881 first_entry = entry;
3882 }
3883
3884 last_timestamp = map->timestamp;
3885
3886 entry = first_entry;
3887 while ((entry != vm_map_to_entry(map)) &&
3888 (entry->vme_start < tmp_entry.vme_end)) {
3889 assert(entry->in_transition);
3890 entry->in_transition = FALSE;
3891 if (entry->needs_wakeup) {
3892 entry->needs_wakeup = FALSE;
3893 need_wakeup = TRUE;
3894 }
3895 entry = entry->vme_next;
3896 }
3897 /*
3898 * We have unwired the entry(s). Go back and
3899 * delete them.
3900 */
3901 entry = first_entry;
3902 continue;
3903 }
3904
3905 /* entry is unwired */
3906 assert(entry->wired_count == 0);
3907 assert(entry->user_wired_count == 0);
3908
3909 if ((!entry->is_sub_map &&
3910 entry->object.vm_object != kernel_object) ||
3911 entry->is_sub_map) {
3912 if(entry->is_sub_map) {
3913 if(entry->use_pmap) {
0c530ab8
A
3914#ifndef NO_NESTED_PMAP
3915 pmap_unnest(map->pmap,
3916 (addr64_t)entry->vme_start);
3917#endif /* NO_NESTED_PMAP */
9bccf70c
A
3918 if((map->mapped) && (map->ref_count)) {
3919 /* clean up parent map/maps */
3920 vm_map_submap_pmap_clean(
3921 map, entry->vme_start,
3922 entry->vme_end,
3923 entry->object.sub_map,
3924 entry->offset);
3925 }
1c79356b
A
3926 } else {
3927 vm_map_submap_pmap_clean(
3928 map, entry->vme_start, entry->vme_end,
3929 entry->object.sub_map,
3930 entry->offset);
3931 }
3932 } else {
55e303ae
A
3933 object = entry->object.vm_object;
3934 if((map->mapped) && (map->ref_count)) {
3935 vm_object_pmap_protect(
3936 object, entry->offset,
3937 entry->vme_end - entry->vme_start,
3938 PMAP_NULL,
3939 entry->vme_start,
3940 VM_PROT_NONE);
91447636
A
3941 } else {
3942 pmap_remove(map->pmap,
0c530ab8
A
3943 (addr64_t)entry->vme_start,
3944 (addr64_t)entry->vme_end);
91447636 3945 }
1c79356b
A
3946 }
3947 }
3948
91447636
A
3949 /*
3950 * All pmap mappings for this map entry must have been
3951 * cleared by now.
3952 */
3953 assert(vm_map_pmap_is_empty(map,
3954 entry->vme_start,
3955 entry->vme_end));
3956
1c79356b
A
3957 next = entry->vme_next;
3958 s = next->vme_start;
3959 last_timestamp = map->timestamp;
91447636
A
3960
3961 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
3962 zap_map != VM_MAP_NULL) {
3963 /*
3964 * The caller wants to save the affected VM map entries
3965 * into the "zap_map". The caller will take care of
3966 * these entries.
3967 */
3968 /* unlink the entry from "map" ... */
3969 vm_map_entry_unlink(map, entry);
3970 /* ... and add it to the end of the "zap_map" */
3971 vm_map_entry_link(zap_map,
3972 vm_map_last_entry(zap_map),
3973 entry);
3974 } else {
3975 vm_map_entry_delete(map, entry);
3976 /* vm_map_entry_delete unlocks the map */
3977 vm_map_lock(map);
3978 }
3979
1c79356b
A
3980 entry = next;
3981
3982 if(entry == vm_map_to_entry(map)) {
3983 break;
3984 }
3985 if (last_timestamp+1 != map->timestamp) {
3986 /*
3987 * we are responsible for deleting everything
3988 * from the give space, if someone has interfered
3989 * we pick up where we left off, back fills should
3990 * be all right for anyone except map_delete and
3991 * we have to assume that the task has been fully
3992 * disabled before we get here
3993 */
3994 if (!vm_map_lookup_entry(map, s, &entry)){
3995 entry = entry->vme_next;
3996 } else {
0c530ab8 3997 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
3998 }
3999 /*
4000 * others can not only allocate behind us, we can
4001 * also see coalesce while we don't have the map lock
4002 */
4003 if(entry == vm_map_to_entry(map)) {
4004 break;
4005 }
4006 vm_map_clip_start(map, entry, s);
4007 }
4008 last_timestamp = map->timestamp;
4009 }
4010
4011 if (map->wait_for_space)
4012 thread_wakeup((event_t) map);
4013 /*
4014 * wake up anybody waiting on entries that we have already deleted.
4015 */
4016 if (need_wakeup)
4017 vm_map_entry_wakeup(map);
4018
4019 return KERN_SUCCESS;
4020}
4021
0c530ab8 4022
1c79356b
A
4023/*
4024 * vm_map_remove:
4025 *
4026 * Remove the given address range from the target map.
4027 * This is the exported form of vm_map_delete.
4028 */
4029kern_return_t
4030vm_map_remove(
4031 register vm_map_t map,
91447636
A
4032 register vm_map_offset_t start,
4033 register vm_map_offset_t end,
1c79356b
A
4034 register boolean_t flags)
4035{
4036 register kern_return_t result;
9bccf70c 4037
1c79356b
A
4038 vm_map_lock(map);
4039 VM_MAP_RANGE_CHECK(map, start, end);
91447636 4040 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 4041 vm_map_unlock(map);
91447636 4042
1c79356b
A
4043 return(result);
4044}
4045
4046
1c79356b
A
4047/*
4048 * Routine: vm_map_copy_discard
4049 *
4050 * Description:
4051 * Dispose of a map copy object (returned by
4052 * vm_map_copyin).
4053 */
4054void
4055vm_map_copy_discard(
4056 vm_map_copy_t copy)
4057{
4058 TR_DECL("vm_map_copy_discard");
4059
4060/* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
91447636 4061
1c79356b
A
4062 if (copy == VM_MAP_COPY_NULL)
4063 return;
4064
4065 switch (copy->type) {
4066 case VM_MAP_COPY_ENTRY_LIST:
4067 while (vm_map_copy_first_entry(copy) !=
4068 vm_map_copy_to_entry(copy)) {
4069 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
4070
4071 vm_map_copy_entry_unlink(copy, entry);
4072 vm_object_deallocate(entry->object.vm_object);
4073 vm_map_copy_entry_dispose(copy, entry);
4074 }
4075 break;
4076 case VM_MAP_COPY_OBJECT:
4077 vm_object_deallocate(copy->cpy_object);
4078 break;
1c79356b
A
4079 case VM_MAP_COPY_KERNEL_BUFFER:
4080
4081 /*
4082 * The vm_map_copy_t and possibly the data buffer were
4083 * allocated by a single call to kalloc(), i.e. the
4084 * vm_map_copy_t was not allocated out of the zone.
4085 */
91447636 4086 kfree(copy, copy->cpy_kalloc_size);
1c79356b
A
4087 return;
4088 }
91447636 4089 zfree(vm_map_copy_zone, copy);
1c79356b
A
4090}
4091
4092/*
4093 * Routine: vm_map_copy_copy
4094 *
4095 * Description:
4096 * Move the information in a map copy object to
4097 * a new map copy object, leaving the old one
4098 * empty.
4099 *
4100 * This is used by kernel routines that need
4101 * to look at out-of-line data (in copyin form)
4102 * before deciding whether to return SUCCESS.
4103 * If the routine returns FAILURE, the original
4104 * copy object will be deallocated; therefore,
4105 * these routines must make a copy of the copy
4106 * object and leave the original empty so that
4107 * deallocation will not fail.
4108 */
4109vm_map_copy_t
4110vm_map_copy_copy(
4111 vm_map_copy_t copy)
4112{
4113 vm_map_copy_t new_copy;
4114
4115 if (copy == VM_MAP_COPY_NULL)
4116 return VM_MAP_COPY_NULL;
4117
4118 /*
4119 * Allocate a new copy object, and copy the information
4120 * from the old one into it.
4121 */
4122
4123 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4124 *new_copy = *copy;
4125
4126 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
4127 /*
4128 * The links in the entry chain must be
4129 * changed to point to the new copy object.
4130 */
4131 vm_map_copy_first_entry(copy)->vme_prev
4132 = vm_map_copy_to_entry(new_copy);
4133 vm_map_copy_last_entry(copy)->vme_next
4134 = vm_map_copy_to_entry(new_copy);
4135 }
4136
4137 /*
4138 * Change the old copy object into one that contains
4139 * nothing to be deallocated.
4140 */
4141 copy->type = VM_MAP_COPY_OBJECT;
4142 copy->cpy_object = VM_OBJECT_NULL;
4143
4144 /*
4145 * Return the new object.
4146 */
4147 return new_copy;
4148}
4149
91447636 4150static kern_return_t
1c79356b
A
4151vm_map_overwrite_submap_recurse(
4152 vm_map_t dst_map,
91447636
A
4153 vm_map_offset_t dst_addr,
4154 vm_map_size_t dst_size)
1c79356b 4155{
91447636 4156 vm_map_offset_t dst_end;
1c79356b
A
4157 vm_map_entry_t tmp_entry;
4158 vm_map_entry_t entry;
4159 kern_return_t result;
4160 boolean_t encountered_sub_map = FALSE;
4161
4162
4163
4164 /*
4165 * Verify that the destination is all writeable
4166 * initially. We have to trunc the destination
4167 * address and round the copy size or we'll end up
4168 * splitting entries in strange ways.
4169 */
4170
91447636 4171 dst_end = vm_map_round_page(dst_addr + dst_size);
9bccf70c 4172 vm_map_lock(dst_map);
1c79356b
A
4173
4174start_pass_1:
1c79356b
A
4175 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4176 vm_map_unlock(dst_map);
4177 return(KERN_INVALID_ADDRESS);
4178 }
4179
91447636 4180 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
1c79356b
A
4181
4182 for (entry = tmp_entry;;) {
4183 vm_map_entry_t next;
4184
4185 next = entry->vme_next;
4186 while(entry->is_sub_map) {
91447636
A
4187 vm_map_offset_t sub_start;
4188 vm_map_offset_t sub_end;
4189 vm_map_offset_t local_end;
1c79356b
A
4190
4191 if (entry->in_transition) {
4192 /*
4193 * Say that we are waiting, and wait for entry.
4194 */
4195 entry->needs_wakeup = TRUE;
4196 vm_map_entry_wait(dst_map, THREAD_UNINT);
4197
4198 goto start_pass_1;
4199 }
4200
4201 encountered_sub_map = TRUE;
4202 sub_start = entry->offset;
4203
4204 if(entry->vme_end < dst_end)
4205 sub_end = entry->vme_end;
4206 else
4207 sub_end = dst_end;
4208 sub_end -= entry->vme_start;
4209 sub_end += entry->offset;
4210 local_end = entry->vme_end;
4211 vm_map_unlock(dst_map);
4212
4213 result = vm_map_overwrite_submap_recurse(
4214 entry->object.sub_map,
4215 sub_start,
4216 sub_end - sub_start);
4217
4218 if(result != KERN_SUCCESS)
4219 return result;
4220 if (dst_end <= entry->vme_end)
4221 return KERN_SUCCESS;
4222 vm_map_lock(dst_map);
4223 if(!vm_map_lookup_entry(dst_map, local_end,
4224 &tmp_entry)) {
4225 vm_map_unlock(dst_map);
4226 return(KERN_INVALID_ADDRESS);
4227 }
4228 entry = tmp_entry;
4229 next = entry->vme_next;
4230 }
4231
4232 if ( ! (entry->protection & VM_PROT_WRITE)) {
4233 vm_map_unlock(dst_map);
4234 return(KERN_PROTECTION_FAILURE);
4235 }
4236
4237 /*
4238 * If the entry is in transition, we must wait
4239 * for it to exit that state. Anything could happen
4240 * when we unlock the map, so start over.
4241 */
4242 if (entry->in_transition) {
4243
4244 /*
4245 * Say that we are waiting, and wait for entry.
4246 */
4247 entry->needs_wakeup = TRUE;
4248 vm_map_entry_wait(dst_map, THREAD_UNINT);
4249
4250 goto start_pass_1;
4251 }
4252
4253/*
4254 * our range is contained completely within this map entry
4255 */
4256 if (dst_end <= entry->vme_end) {
4257 vm_map_unlock(dst_map);
4258 return KERN_SUCCESS;
4259 }
4260/*
4261 * check that range specified is contiguous region
4262 */
4263 if ((next == vm_map_to_entry(dst_map)) ||
4264 (next->vme_start != entry->vme_end)) {
4265 vm_map_unlock(dst_map);
4266 return(KERN_INVALID_ADDRESS);
4267 }
4268
4269 /*
4270 * Check for permanent objects in the destination.
4271 */
4272 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
4273 ((!entry->object.vm_object->internal) ||
4274 (entry->object.vm_object->true_share))) {
4275 if(encountered_sub_map) {
4276 vm_map_unlock(dst_map);
4277 return(KERN_FAILURE);
4278 }
4279 }
4280
4281
4282 entry = next;
4283 }/* for */
4284 vm_map_unlock(dst_map);
4285 return(KERN_SUCCESS);
4286}
4287
4288/*
4289 * Routine: vm_map_copy_overwrite
4290 *
4291 * Description:
4292 * Copy the memory described by the map copy
4293 * object (copy; returned by vm_map_copyin) onto
4294 * the specified destination region (dst_map, dst_addr).
4295 * The destination must be writeable.
4296 *
4297 * Unlike vm_map_copyout, this routine actually
4298 * writes over previously-mapped memory. If the
4299 * previous mapping was to a permanent (user-supplied)
4300 * memory object, it is preserved.
4301 *
4302 * The attributes (protection and inheritance) of the
4303 * destination region are preserved.
4304 *
4305 * If successful, consumes the copy object.
4306 * Otherwise, the caller is responsible for it.
4307 *
4308 * Implementation notes:
4309 * To overwrite aligned temporary virtual memory, it is
4310 * sufficient to remove the previous mapping and insert
4311 * the new copy. This replacement is done either on
4312 * the whole region (if no permanent virtual memory
4313 * objects are embedded in the destination region) or
4314 * in individual map entries.
4315 *
4316 * To overwrite permanent virtual memory , it is necessary
4317 * to copy each page, as the external memory management
4318 * interface currently does not provide any optimizations.
4319 *
4320 * Unaligned memory also has to be copied. It is possible
4321 * to use 'vm_trickery' to copy the aligned data. This is
4322 * not done but not hard to implement.
4323 *
4324 * Once a page of permanent memory has been overwritten,
4325 * it is impossible to interrupt this function; otherwise,
4326 * the call would be neither atomic nor location-independent.
4327 * The kernel-state portion of a user thread must be
4328 * interruptible.
4329 *
4330 * It may be expensive to forward all requests that might
4331 * overwrite permanent memory (vm_write, vm_copy) to
4332 * uninterruptible kernel threads. This routine may be
4333 * called by interruptible threads; however, success is
4334 * not guaranteed -- if the request cannot be performed
4335 * atomically and interruptibly, an error indication is
4336 * returned.
4337 */
4338
91447636 4339static kern_return_t
1c79356b 4340vm_map_copy_overwrite_nested(
91447636
A
4341 vm_map_t dst_map,
4342 vm_map_address_t dst_addr,
4343 vm_map_copy_t copy,
4344 boolean_t interruptible,
4345 pmap_t pmap)
1c79356b 4346{
91447636
A
4347 vm_map_offset_t dst_end;
4348 vm_map_entry_t tmp_entry;
4349 vm_map_entry_t entry;
4350 kern_return_t kr;
4351 boolean_t aligned = TRUE;
4352 boolean_t contains_permanent_objects = FALSE;
4353 boolean_t encountered_sub_map = FALSE;
4354 vm_map_offset_t base_addr;
4355 vm_map_size_t copy_size;
4356 vm_map_size_t total_size;
1c79356b
A
4357
4358
4359 /*
4360 * Check for null copy object.
4361 */
4362
4363 if (copy == VM_MAP_COPY_NULL)
4364 return(KERN_SUCCESS);
4365
4366 /*
4367 * Check for special kernel buffer allocated
4368 * by new_ipc_kmsg_copyin.
4369 */
4370
4371 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0
A
4372 return(vm_map_copyout_kernel_buffer(
4373 dst_map, &dst_addr,
4374 copy, TRUE));
1c79356b
A
4375 }
4376
4377 /*
4378 * Only works for entry lists at the moment. Will
4379 * support page lists later.
4380 */
4381
4382 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
4383
4384 if (copy->size == 0) {
4385 vm_map_copy_discard(copy);
4386 return(KERN_SUCCESS);
4387 }
4388
4389 /*
4390 * Verify that the destination is all writeable
4391 * initially. We have to trunc the destination
4392 * address and round the copy size or we'll end up
4393 * splitting entries in strange ways.
4394 */
4395
4396 if (!page_aligned(copy->size) ||
4397 !page_aligned (copy->offset) ||
4398 !page_aligned (dst_addr))
4399 {
4400 aligned = FALSE;
91447636 4401 dst_end = vm_map_round_page(dst_addr + copy->size);
1c79356b
A
4402 } else {
4403 dst_end = dst_addr + copy->size;
4404 }
4405
1c79356b 4406 vm_map_lock(dst_map);
9bccf70c 4407
91447636
A
4408 /* LP64todo - remove this check when vm_map_commpage64()
4409 * no longer has to stuff in a map_entry for the commpage
4410 * above the map's max_offset.
4411 */
4412 if (dst_addr >= dst_map->max_offset) {
4413 vm_map_unlock(dst_map);
4414 return(KERN_INVALID_ADDRESS);
4415 }
4416
9bccf70c 4417start_pass_1:
1c79356b
A
4418 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4419 vm_map_unlock(dst_map);
4420 return(KERN_INVALID_ADDRESS);
4421 }
91447636 4422 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
1c79356b
A
4423 for (entry = tmp_entry;;) {
4424 vm_map_entry_t next = entry->vme_next;
4425
4426 while(entry->is_sub_map) {
91447636
A
4427 vm_map_offset_t sub_start;
4428 vm_map_offset_t sub_end;
4429 vm_map_offset_t local_end;
1c79356b
A
4430
4431 if (entry->in_transition) {
4432
4433 /*
4434 * Say that we are waiting, and wait for entry.
4435 */
4436 entry->needs_wakeup = TRUE;
4437 vm_map_entry_wait(dst_map, THREAD_UNINT);
4438
4439 goto start_pass_1;
4440 }
4441
4442 local_end = entry->vme_end;
4443 if (!(entry->needs_copy)) {
4444 /* if needs_copy we are a COW submap */
4445 /* in such a case we just replace so */
4446 /* there is no need for the follow- */
4447 /* ing check. */
4448 encountered_sub_map = TRUE;
4449 sub_start = entry->offset;
4450
4451 if(entry->vme_end < dst_end)
4452 sub_end = entry->vme_end;
4453 else
4454 sub_end = dst_end;
4455 sub_end -= entry->vme_start;
4456 sub_end += entry->offset;
4457 vm_map_unlock(dst_map);
4458
4459 kr = vm_map_overwrite_submap_recurse(
4460 entry->object.sub_map,
4461 sub_start,
4462 sub_end - sub_start);
4463 if(kr != KERN_SUCCESS)
4464 return kr;
4465 vm_map_lock(dst_map);
4466 }
4467
4468 if (dst_end <= entry->vme_end)
4469 goto start_overwrite;
4470 if(!vm_map_lookup_entry(dst_map, local_end,
4471 &entry)) {
4472 vm_map_unlock(dst_map);
4473 return(KERN_INVALID_ADDRESS);
4474 }
4475 next = entry->vme_next;
4476 }
4477
4478 if ( ! (entry->protection & VM_PROT_WRITE)) {
4479 vm_map_unlock(dst_map);
4480 return(KERN_PROTECTION_FAILURE);
4481 }
4482
4483 /*
4484 * If the entry is in transition, we must wait
4485 * for it to exit that state. Anything could happen
4486 * when we unlock the map, so start over.
4487 */
4488 if (entry->in_transition) {
4489
4490 /*
4491 * Say that we are waiting, and wait for entry.
4492 */
4493 entry->needs_wakeup = TRUE;
4494 vm_map_entry_wait(dst_map, THREAD_UNINT);
4495
4496 goto start_pass_1;
4497 }
4498
4499/*
4500 * our range is contained completely within this map entry
4501 */
4502 if (dst_end <= entry->vme_end)
4503 break;
4504/*
4505 * check that range specified is contiguous region
4506 */
4507 if ((next == vm_map_to_entry(dst_map)) ||
4508 (next->vme_start != entry->vme_end)) {
4509 vm_map_unlock(dst_map);
4510 return(KERN_INVALID_ADDRESS);
4511 }
4512
4513
4514 /*
4515 * Check for permanent objects in the destination.
4516 */
4517 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
4518 ((!entry->object.vm_object->internal) ||
4519 (entry->object.vm_object->true_share))) {
4520 contains_permanent_objects = TRUE;
4521 }
4522
4523 entry = next;
4524 }/* for */
4525
4526start_overwrite:
4527 /*
4528 * If there are permanent objects in the destination, then
4529 * the copy cannot be interrupted.
4530 */
4531
4532 if (interruptible && contains_permanent_objects) {
4533 vm_map_unlock(dst_map);
4534 return(KERN_FAILURE); /* XXX */
4535 }
4536
4537 /*
4538 *
4539 * Make a second pass, overwriting the data
4540 * At the beginning of each loop iteration,
4541 * the next entry to be overwritten is "tmp_entry"
4542 * (initially, the value returned from the lookup above),
4543 * and the starting address expected in that entry
4544 * is "start".
4545 */
4546
4547 total_size = copy->size;
4548 if(encountered_sub_map) {
4549 copy_size = 0;
4550 /* re-calculate tmp_entry since we've had the map */
4551 /* unlocked */
4552 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
4553 vm_map_unlock(dst_map);
4554 return(KERN_INVALID_ADDRESS);
4555 }
4556 } else {
4557 copy_size = copy->size;
4558 }
4559
4560 base_addr = dst_addr;
4561 while(TRUE) {
4562 /* deconstruct the copy object and do in parts */
4563 /* only in sub_map, interruptable case */
4564 vm_map_entry_t copy_entry;
91447636
A
4565 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
4566 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 4567 int nentries;
91447636
A
4568 int remaining_entries = 0;
4569 int new_offset = 0;
1c79356b
A
4570
4571 for (entry = tmp_entry; copy_size == 0;) {
4572 vm_map_entry_t next;
4573
4574 next = entry->vme_next;
4575
4576 /* tmp_entry and base address are moved along */
4577 /* each time we encounter a sub-map. Otherwise */
4578 /* entry can outpase tmp_entry, and the copy_size */
4579 /* may reflect the distance between them */
4580 /* if the current entry is found to be in transition */
4581 /* we will start over at the beginning or the last */
4582 /* encounter of a submap as dictated by base_addr */
4583 /* we will zero copy_size accordingly. */
4584 if (entry->in_transition) {
4585 /*
4586 * Say that we are waiting, and wait for entry.
4587 */
4588 entry->needs_wakeup = TRUE;
4589 vm_map_entry_wait(dst_map, THREAD_UNINT);
4590
1c79356b
A
4591 if(!vm_map_lookup_entry(dst_map, base_addr,
4592 &tmp_entry)) {
4593 vm_map_unlock(dst_map);
4594 return(KERN_INVALID_ADDRESS);
4595 }
4596 copy_size = 0;
4597 entry = tmp_entry;
4598 continue;
4599 }
4600 if(entry->is_sub_map) {
91447636
A
4601 vm_map_offset_t sub_start;
4602 vm_map_offset_t sub_end;
4603 vm_map_offset_t local_end;
1c79356b
A
4604
4605 if (entry->needs_copy) {
4606 /* if this is a COW submap */
4607 /* just back the range with a */
4608 /* anonymous entry */
4609 if(entry->vme_end < dst_end)
4610 sub_end = entry->vme_end;
4611 else
4612 sub_end = dst_end;
4613 if(entry->vme_start < base_addr)
4614 sub_start = base_addr;
4615 else
4616 sub_start = entry->vme_start;
4617 vm_map_clip_end(
4618 dst_map, entry, sub_end);
4619 vm_map_clip_start(
4620 dst_map, entry, sub_start);
4621 entry->is_sub_map = FALSE;
4622 vm_map_deallocate(
4623 entry->object.sub_map);
4624 entry->object.sub_map = NULL;
4625 entry->is_shared = FALSE;
4626 entry->needs_copy = FALSE;
4627 entry->offset = 0;
4628 entry->protection = VM_PROT_ALL;
4629 entry->max_protection = VM_PROT_ALL;
4630 entry->wired_count = 0;
4631 entry->user_wired_count = 0;
4632 if(entry->inheritance
4633 == VM_INHERIT_SHARE)
4634 entry->inheritance = VM_INHERIT_COPY;
4635 continue;
4636 }
4637 /* first take care of any non-sub_map */
4638 /* entries to send */
4639 if(base_addr < entry->vme_start) {
4640 /* stuff to send */
4641 copy_size =
4642 entry->vme_start - base_addr;
4643 break;
4644 }
4645 sub_start = entry->offset;
4646
4647 if(entry->vme_end < dst_end)
4648 sub_end = entry->vme_end;
4649 else
4650 sub_end = dst_end;
4651 sub_end -= entry->vme_start;
4652 sub_end += entry->offset;
4653 local_end = entry->vme_end;
4654 vm_map_unlock(dst_map);
4655 copy_size = sub_end - sub_start;
4656
4657 /* adjust the copy object */
4658 if (total_size > copy_size) {
91447636
A
4659 vm_map_size_t local_size = 0;
4660 vm_map_size_t entry_size;
1c79356b
A
4661
4662 nentries = 1;
4663 new_offset = copy->offset;
4664 copy_entry = vm_map_copy_first_entry(copy);
4665 while(copy_entry !=
4666 vm_map_copy_to_entry(copy)){
4667 entry_size = copy_entry->vme_end -
4668 copy_entry->vme_start;
4669 if((local_size < copy_size) &&
4670 ((local_size + entry_size)
4671 >= copy_size)) {
4672 vm_map_copy_clip_end(copy,
4673 copy_entry,
4674 copy_entry->vme_start +
4675 (copy_size - local_size));
4676 entry_size = copy_entry->vme_end -
4677 copy_entry->vme_start;
4678 local_size += entry_size;
4679 new_offset += entry_size;
4680 }
4681 if(local_size >= copy_size) {
4682 next_copy = copy_entry->vme_next;
4683 copy_entry->vme_next =
4684 vm_map_copy_to_entry(copy);
4685 previous_prev =
4686 copy->cpy_hdr.links.prev;
4687 copy->cpy_hdr.links.prev = copy_entry;
4688 copy->size = copy_size;
4689 remaining_entries =
4690 copy->cpy_hdr.nentries;
4691 remaining_entries -= nentries;
4692 copy->cpy_hdr.nentries = nentries;
4693 break;
4694 } else {
4695 local_size += entry_size;
4696 new_offset += entry_size;
4697 nentries++;
4698 }
4699 copy_entry = copy_entry->vme_next;
4700 }
4701 }
4702
4703 if((entry->use_pmap) && (pmap == NULL)) {
4704 kr = vm_map_copy_overwrite_nested(
4705 entry->object.sub_map,
4706 sub_start,
4707 copy,
4708 interruptible,
4709 entry->object.sub_map->pmap);
4710 } else if (pmap != NULL) {
4711 kr = vm_map_copy_overwrite_nested(
4712 entry->object.sub_map,
4713 sub_start,
4714 copy,
4715 interruptible, pmap);
4716 } else {
4717 kr = vm_map_copy_overwrite_nested(
4718 entry->object.sub_map,
4719 sub_start,
4720 copy,
4721 interruptible,
4722 dst_map->pmap);
4723 }
4724 if(kr != KERN_SUCCESS) {
4725 if(next_copy != NULL) {
4726 copy->cpy_hdr.nentries +=
4727 remaining_entries;
4728 copy->cpy_hdr.links.prev->vme_next =
4729 next_copy;
4730 copy->cpy_hdr.links.prev
4731 = previous_prev;
4732 copy->size = total_size;
4733 }
4734 return kr;
4735 }
4736 if (dst_end <= local_end) {
4737 return(KERN_SUCCESS);
4738 }
4739 /* otherwise copy no longer exists, it was */
4740 /* destroyed after successful copy_overwrite */
4741 copy = (vm_map_copy_t)
4742 zalloc(vm_map_copy_zone);
4743 vm_map_copy_first_entry(copy) =
4744 vm_map_copy_last_entry(copy) =
4745 vm_map_copy_to_entry(copy);
4746 copy->type = VM_MAP_COPY_ENTRY_LIST;
4747 copy->offset = new_offset;
4748
4749 total_size -= copy_size;
4750 copy_size = 0;
4751 /* put back remainder of copy in container */
4752 if(next_copy != NULL) {
4753 copy->cpy_hdr.nentries = remaining_entries;
4754 copy->cpy_hdr.links.next = next_copy;
4755 copy->cpy_hdr.links.prev = previous_prev;
4756 copy->size = total_size;
4757 next_copy->vme_prev =
4758 vm_map_copy_to_entry(copy);
4759 next_copy = NULL;
4760 }
4761 base_addr = local_end;
4762 vm_map_lock(dst_map);
4763 if(!vm_map_lookup_entry(dst_map,
4764 local_end, &tmp_entry)) {
4765 vm_map_unlock(dst_map);
4766 return(KERN_INVALID_ADDRESS);
4767 }
4768 entry = tmp_entry;
4769 continue;
4770 }
4771 if (dst_end <= entry->vme_end) {
4772 copy_size = dst_end - base_addr;
4773 break;
4774 }
4775
4776 if ((next == vm_map_to_entry(dst_map)) ||
4777 (next->vme_start != entry->vme_end)) {
4778 vm_map_unlock(dst_map);
4779 return(KERN_INVALID_ADDRESS);
4780 }
4781
4782 entry = next;
4783 }/* for */
4784
4785 next_copy = NULL;
4786 nentries = 1;
4787
4788 /* adjust the copy object */
4789 if (total_size > copy_size) {
91447636
A
4790 vm_map_size_t local_size = 0;
4791 vm_map_size_t entry_size;
1c79356b
A
4792
4793 new_offset = copy->offset;
4794 copy_entry = vm_map_copy_first_entry(copy);
4795 while(copy_entry != vm_map_copy_to_entry(copy)) {
4796 entry_size = copy_entry->vme_end -
4797 copy_entry->vme_start;
4798 if((local_size < copy_size) &&
4799 ((local_size + entry_size)
4800 >= copy_size)) {
4801 vm_map_copy_clip_end(copy, copy_entry,
4802 copy_entry->vme_start +
4803 (copy_size - local_size));
4804 entry_size = copy_entry->vme_end -
4805 copy_entry->vme_start;
4806 local_size += entry_size;
4807 new_offset += entry_size;
4808 }
4809 if(local_size >= copy_size) {
4810 next_copy = copy_entry->vme_next;
4811 copy_entry->vme_next =
4812 vm_map_copy_to_entry(copy);
4813 previous_prev =
4814 copy->cpy_hdr.links.prev;
4815 copy->cpy_hdr.links.prev = copy_entry;
4816 copy->size = copy_size;
4817 remaining_entries =
4818 copy->cpy_hdr.nentries;
4819 remaining_entries -= nentries;
4820 copy->cpy_hdr.nentries = nentries;
4821 break;
4822 } else {
4823 local_size += entry_size;
4824 new_offset += entry_size;
4825 nentries++;
4826 }
4827 copy_entry = copy_entry->vme_next;
4828 }
4829 }
4830
4831 if (aligned) {
4832 pmap_t local_pmap;
4833
4834 if(pmap)
4835 local_pmap = pmap;
4836 else
4837 local_pmap = dst_map->pmap;
4838
4839 if ((kr = vm_map_copy_overwrite_aligned(
4840 dst_map, tmp_entry, copy,
4841 base_addr, local_pmap)) != KERN_SUCCESS) {
4842 if(next_copy != NULL) {
4843 copy->cpy_hdr.nentries +=
4844 remaining_entries;
4845 copy->cpy_hdr.links.prev->vme_next =
4846 next_copy;
4847 copy->cpy_hdr.links.prev =
4848 previous_prev;
4849 copy->size += copy_size;
4850 }
4851 return kr;
4852 }
4853 vm_map_unlock(dst_map);
4854 } else {
4855 /*
4856 * Performance gain:
4857 *
4858 * if the copy and dst address are misaligned but the same
4859 * offset within the page we can copy_not_aligned the
4860 * misaligned parts and copy aligned the rest. If they are
4861 * aligned but len is unaligned we simply need to copy
4862 * the end bit unaligned. We'll need to split the misaligned
4863 * bits of the region in this case !
4864 */
4865 /* ALWAYS UNLOCKS THE dst_map MAP */
4866 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
4867 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
4868 if(next_copy != NULL) {
4869 copy->cpy_hdr.nentries +=
4870 remaining_entries;
4871 copy->cpy_hdr.links.prev->vme_next =
4872 next_copy;
4873 copy->cpy_hdr.links.prev =
4874 previous_prev;
4875 copy->size += copy_size;
4876 }
4877 return kr;
4878 }
4879 }
4880 total_size -= copy_size;
4881 if(total_size == 0)
4882 break;
4883 base_addr += copy_size;
4884 copy_size = 0;
4885 copy->offset = new_offset;
4886 if(next_copy != NULL) {
4887 copy->cpy_hdr.nentries = remaining_entries;
4888 copy->cpy_hdr.links.next = next_copy;
4889 copy->cpy_hdr.links.prev = previous_prev;
4890 next_copy->vme_prev = vm_map_copy_to_entry(copy);
4891 copy->size = total_size;
4892 }
4893 vm_map_lock(dst_map);
4894 while(TRUE) {
4895 if (!vm_map_lookup_entry(dst_map,
4896 base_addr, &tmp_entry)) {
4897 vm_map_unlock(dst_map);
4898 return(KERN_INVALID_ADDRESS);
4899 }
4900 if (tmp_entry->in_transition) {
4901 entry->needs_wakeup = TRUE;
4902 vm_map_entry_wait(dst_map, THREAD_UNINT);
4903 } else {
4904 break;
4905 }
4906 }
91447636 4907 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
1c79356b
A
4908
4909 entry = tmp_entry;
4910 } /* while */
4911
4912 /*
4913 * Throw away the vm_map_copy object
4914 */
4915 vm_map_copy_discard(copy);
4916
4917 return(KERN_SUCCESS);
4918}/* vm_map_copy_overwrite */
4919
4920kern_return_t
4921vm_map_copy_overwrite(
4922 vm_map_t dst_map,
91447636 4923 vm_map_offset_t dst_addr,
1c79356b
A
4924 vm_map_copy_t copy,
4925 boolean_t interruptible)
4926{
4927 return vm_map_copy_overwrite_nested(
4928 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
4929}
4930
4931
4932/*
91447636 4933 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
4934 *
4935 * Decription:
4936 * Physically copy unaligned data
4937 *
4938 * Implementation:
4939 * Unaligned parts of pages have to be physically copied. We use
4940 * a modified form of vm_fault_copy (which understands none-aligned
4941 * page offsets and sizes) to do the copy. We attempt to copy as
4942 * much memory in one go as possibly, however vm_fault_copy copies
4943 * within 1 memory object so we have to find the smaller of "amount left"
4944 * "source object data size" and "target object data size". With
4945 * unaligned data we don't need to split regions, therefore the source
4946 * (copy) object should be one map entry, the target range may be split
4947 * over multiple map entries however. In any event we are pessimistic
4948 * about these assumptions.
4949 *
4950 * Assumptions:
4951 * dst_map is locked on entry and is return locked on success,
4952 * unlocked on error.
4953 */
4954
91447636 4955static kern_return_t
1c79356b
A
4956vm_map_copy_overwrite_unaligned(
4957 vm_map_t dst_map,
4958 vm_map_entry_t entry,
4959 vm_map_copy_t copy,
91447636 4960 vm_map_offset_t start)
1c79356b
A
4961{
4962 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
4963 vm_map_version_t version;
4964 vm_object_t dst_object;
4965 vm_object_offset_t dst_offset;
4966 vm_object_offset_t src_offset;
4967 vm_object_offset_t entry_offset;
91447636
A
4968 vm_map_offset_t entry_end;
4969 vm_map_size_t src_size,
1c79356b
A
4970 dst_size,
4971 copy_size,
4972 amount_left;
4973 kern_return_t kr = KERN_SUCCESS;
4974
4975 vm_map_lock_write_to_read(dst_map);
4976
91447636 4977 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
4978 amount_left = copy->size;
4979/*
4980 * unaligned so we never clipped this entry, we need the offset into
4981 * the vm_object not just the data.
4982 */
4983 while (amount_left > 0) {
4984
4985 if (entry == vm_map_to_entry(dst_map)) {
4986 vm_map_unlock_read(dst_map);
4987 return KERN_INVALID_ADDRESS;
4988 }
4989
4990 /* "start" must be within the current map entry */
4991 assert ((start>=entry->vme_start) && (start<entry->vme_end));
4992
4993 dst_offset = start - entry->vme_start;
4994
4995 dst_size = entry->vme_end - start;
4996
4997 src_size = copy_entry->vme_end -
4998 (copy_entry->vme_start + src_offset);
4999
5000 if (dst_size < src_size) {
5001/*
5002 * we can only copy dst_size bytes before
5003 * we have to get the next destination entry
5004 */
5005 copy_size = dst_size;
5006 } else {
5007/*
5008 * we can only copy src_size bytes before
5009 * we have to get the next source copy entry
5010 */
5011 copy_size = src_size;
5012 }
5013
5014 if (copy_size > amount_left) {
5015 copy_size = amount_left;
5016 }
5017/*
5018 * Entry needs copy, create a shadow shadow object for
5019 * Copy on write region.
5020 */
5021 if (entry->needs_copy &&
5022 ((entry->protection & VM_PROT_WRITE) != 0))
5023 {
5024 if (vm_map_lock_read_to_write(dst_map)) {
5025 vm_map_lock_read(dst_map);
5026 goto RetryLookup;
5027 }
5028 vm_object_shadow(&entry->object.vm_object,
5029 &entry->offset,
91447636 5030 (vm_map_size_t)(entry->vme_end
1c79356b
A
5031 - entry->vme_start));
5032 entry->needs_copy = FALSE;
5033 vm_map_lock_write_to_read(dst_map);
5034 }
5035 dst_object = entry->object.vm_object;
5036/*
5037 * unlike with the virtual (aligned) copy we're going
5038 * to fault on it therefore we need a target object.
5039 */
5040 if (dst_object == VM_OBJECT_NULL) {
5041 if (vm_map_lock_read_to_write(dst_map)) {
5042 vm_map_lock_read(dst_map);
5043 goto RetryLookup;
5044 }
91447636 5045 dst_object = vm_object_allocate((vm_map_size_t)
1c79356b
A
5046 entry->vme_end - entry->vme_start);
5047 entry->object.vm_object = dst_object;
5048 entry->offset = 0;
5049 vm_map_lock_write_to_read(dst_map);
5050 }
5051/*
5052 * Take an object reference and unlock map. The "entry" may
5053 * disappear or change when the map is unlocked.
5054 */
5055 vm_object_reference(dst_object);
5056 version.main_timestamp = dst_map->timestamp;
5057 entry_offset = entry->offset;
5058 entry_end = entry->vme_end;
5059 vm_map_unlock_read(dst_map);
5060/*
5061 * Copy as much as possible in one pass
5062 */
5063 kr = vm_fault_copy(
5064 copy_entry->object.vm_object,
5065 copy_entry->offset + src_offset,
5066 &copy_size,
5067 dst_object,
5068 entry_offset + dst_offset,
5069 dst_map,
5070 &version,
5071 THREAD_UNINT );
5072
5073 start += copy_size;
5074 src_offset += copy_size;
5075 amount_left -= copy_size;
5076/*
5077 * Release the object reference
5078 */
5079 vm_object_deallocate(dst_object);
5080/*
5081 * If a hard error occurred, return it now
5082 */
5083 if (kr != KERN_SUCCESS)
5084 return kr;
5085
5086 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
5087 || amount_left == 0)
5088 {
5089/*
5090 * all done with this copy entry, dispose.
5091 */
5092 vm_map_copy_entry_unlink(copy, copy_entry);
5093 vm_object_deallocate(copy_entry->object.vm_object);
5094 vm_map_copy_entry_dispose(copy, copy_entry);
5095
5096 if ((copy_entry = vm_map_copy_first_entry(copy))
5097 == vm_map_copy_to_entry(copy) && amount_left) {
5098/*
5099 * not finished copying but run out of source
5100 */
5101 return KERN_INVALID_ADDRESS;
5102 }
5103 src_offset = 0;
5104 }
5105
5106 if (amount_left == 0)
5107 return KERN_SUCCESS;
5108
5109 vm_map_lock_read(dst_map);
5110 if (version.main_timestamp == dst_map->timestamp) {
5111 if (start == entry_end) {
5112/*
5113 * destination region is split. Use the version
5114 * information to avoid a lookup in the normal
5115 * case.
5116 */
5117 entry = entry->vme_next;
5118/*
5119 * should be contiguous. Fail if we encounter
5120 * a hole in the destination.
5121 */
5122 if (start != entry->vme_start) {
5123 vm_map_unlock_read(dst_map);
5124 return KERN_INVALID_ADDRESS ;
5125 }
5126 }
5127 } else {
5128/*
5129 * Map version check failed.
5130 * we must lookup the entry because somebody
5131 * might have changed the map behind our backs.
5132 */
5133RetryLookup:
5134 if (!vm_map_lookup_entry(dst_map, start, &entry))
5135 {
5136 vm_map_unlock_read(dst_map);
5137 return KERN_INVALID_ADDRESS ;
5138 }
5139 }
5140 }/* while */
5141
1c79356b
A
5142 return KERN_SUCCESS;
5143}/* vm_map_copy_overwrite_unaligned */
5144
5145/*
91447636 5146 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
5147 *
5148 * Description:
5149 * Does all the vm_trickery possible for whole pages.
5150 *
5151 * Implementation:
5152 *
5153 * If there are no permanent objects in the destination,
5154 * and the source and destination map entry zones match,
5155 * and the destination map entry is not shared,
5156 * then the map entries can be deleted and replaced
5157 * with those from the copy. The following code is the
5158 * basic idea of what to do, but there are lots of annoying
5159 * little details about getting protection and inheritance
5160 * right. Should add protection, inheritance, and sharing checks
5161 * to the above pass and make sure that no wiring is involved.
5162 */
5163
91447636 5164static kern_return_t
1c79356b
A
5165vm_map_copy_overwrite_aligned(
5166 vm_map_t dst_map,
5167 vm_map_entry_t tmp_entry,
5168 vm_map_copy_t copy,
91447636
A
5169 vm_map_offset_t start,
5170#if !BAD_OPTIMIZATION
5171 __unused
5172#endif /* !BAD_OPTIMIZATION */
1c79356b
A
5173 pmap_t pmap)
5174{
5175 vm_object_t object;
5176 vm_map_entry_t copy_entry;
91447636
A
5177 vm_map_size_t copy_size;
5178 vm_map_size_t size;
1c79356b
A
5179 vm_map_entry_t entry;
5180
5181 while ((copy_entry = vm_map_copy_first_entry(copy))
5182 != vm_map_copy_to_entry(copy))
5183 {
5184 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5185
5186 entry = tmp_entry;
5187 if (entry == vm_map_to_entry(dst_map)) {
5188 vm_map_unlock(dst_map);
5189 return KERN_INVALID_ADDRESS;
5190 }
5191 size = (entry->vme_end - entry->vme_start);
5192 /*
5193 * Make sure that no holes popped up in the
5194 * address map, and that the protection is
5195 * still valid, in case the map was unlocked
5196 * earlier.
5197 */
5198
5199 if ((entry->vme_start != start) || ((entry->is_sub_map)
5200 && !entry->needs_copy)) {
5201 vm_map_unlock(dst_map);
5202 return(KERN_INVALID_ADDRESS);
5203 }
5204 assert(entry != vm_map_to_entry(dst_map));
5205
5206 /*
5207 * Check protection again
5208 */
5209
5210 if ( ! (entry->protection & VM_PROT_WRITE)) {
5211 vm_map_unlock(dst_map);
5212 return(KERN_PROTECTION_FAILURE);
5213 }
5214
5215 /*
5216 * Adjust to source size first
5217 */
5218
5219 if (copy_size < size) {
5220 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
5221 size = copy_size;
5222 }
5223
5224 /*
5225 * Adjust to destination size
5226 */
5227
5228 if (size < copy_size) {
5229 vm_map_copy_clip_end(copy, copy_entry,
5230 copy_entry->vme_start + size);
5231 copy_size = size;
5232 }
5233
5234 assert((entry->vme_end - entry->vme_start) == size);
5235 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
5236 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
5237
5238 /*
5239 * If the destination contains temporary unshared memory,
5240 * we can perform the copy by throwing it away and
5241 * installing the source data.
5242 */
5243
5244 object = entry->object.vm_object;
5245 if ((!entry->is_shared &&
5246 ((object == VM_OBJECT_NULL) ||
5247 (object->internal && !object->true_share))) ||
5248 entry->needs_copy) {
5249 vm_object_t old_object = entry->object.vm_object;
5250 vm_object_offset_t old_offset = entry->offset;
5251 vm_object_offset_t offset;
5252
5253 /*
5254 * Ensure that the source and destination aren't
5255 * identical
5256 */
5257 if (old_object == copy_entry->object.vm_object &&
5258 old_offset == copy_entry->offset) {
5259 vm_map_copy_entry_unlink(copy, copy_entry);
5260 vm_map_copy_entry_dispose(copy, copy_entry);
5261
5262 if (old_object != VM_OBJECT_NULL)
5263 vm_object_deallocate(old_object);
5264
5265 start = tmp_entry->vme_end;
5266 tmp_entry = tmp_entry->vme_next;
5267 continue;
5268 }
5269
5270 if (old_object != VM_OBJECT_NULL) {
5271 if(entry->is_sub_map) {
9bccf70c 5272 if(entry->use_pmap) {
0c530ab8 5273#ifndef NO_NESTED_PMAP
9bccf70c 5274 pmap_unnest(dst_map->pmap,
0c530ab8
A
5275 (addr64_t)entry->vme_start);
5276#endif /* NO_NESTED_PMAP */
9bccf70c
A
5277 if(dst_map->mapped) {
5278 /* clean up parent */
5279 /* map/maps */
5280 vm_map_submap_pmap_clean(
5281 dst_map, entry->vme_start,
5282 entry->vme_end,
5283 entry->object.sub_map,
5284 entry->offset);
5285 }
5286 } else {
5287 vm_map_submap_pmap_clean(
5288 dst_map, entry->vme_start,
5289 entry->vme_end,
5290 entry->object.sub_map,
5291 entry->offset);
5292 }
5293 vm_map_deallocate(
1c79356b 5294 entry->object.sub_map);
9bccf70c
A
5295 } else {
5296 if(dst_map->mapped) {
5297 vm_object_pmap_protect(
5298 entry->object.vm_object,
5299 entry->offset,
5300 entry->vme_end
5301 - entry->vme_start,
5302 PMAP_NULL,
5303 entry->vme_start,
5304 VM_PROT_NONE);
5305 } else {
55e303ae
A
5306 pmap_remove(dst_map->pmap,
5307 (addr64_t)(entry->vme_start),
5308 (addr64_t)(entry->vme_end));
9bccf70c 5309 }
1c79356b 5310 vm_object_deallocate(old_object);
9bccf70c 5311 }
1c79356b
A
5312 }
5313
5314 entry->is_sub_map = FALSE;
5315 entry->object = copy_entry->object;
5316 object = entry->object.vm_object;
5317 entry->needs_copy = copy_entry->needs_copy;
5318 entry->wired_count = 0;
5319 entry->user_wired_count = 0;
5320 offset = entry->offset = copy_entry->offset;
5321
5322 vm_map_copy_entry_unlink(copy, copy_entry);
5323 vm_map_copy_entry_dispose(copy, copy_entry);
5324#if BAD_OPTIMIZATION
5325 /*
5326 * if we turn this optimization back on
5327 * we need to revisit our use of pmap mappings
5328 * large copies will cause us to run out and panic
5329 * this optimization only saved on average 2 us per page if ALL
5330 * the pages in the source were currently mapped
5331 * and ALL the pages in the dest were touched, if there were fewer
5332 * than 2/3 of the pages touched, this optimization actually cost more cycles
5333 */
5334
5335 /*
5336 * Try to aggressively enter physical mappings
5337 * (but avoid uninstantiated objects)
5338 */
5339 if (object != VM_OBJECT_NULL) {
91447636 5340 vm_map_offset_t va = entry->vme_start;
1c79356b
A
5341
5342 while (va < entry->vme_end) {
5343 register vm_page_t m;
5344 vm_prot_t prot;
5345
5346 /*
5347 * Look for the page in the top object
5348 */
5349 prot = entry->protection;
5350 vm_object_lock(object);
5351 vm_object_paging_begin(object);
5352
91447636
A
5353 /*
5354 * ENCRYPTED SWAP:
5355 * If the page is encrypted, skip it:
5356 * we can't let the user see the encrypted
5357 * contents. The page will get decrypted
5358 * on demand when the user generates a
5359 * soft-fault when trying to access it.
5360 */
1c79356b 5361 if ((m = vm_page_lookup(object,offset)) !=
91447636
A
5362 VM_PAGE_NULL && !m->busy &&
5363 !m->fictitious && !m->encrypted &&
1c79356b
A
5364 (!m->unusual || (!m->error &&
5365 !m->restart && !m->absent &&
5366 (prot & m->page_lock) == 0))) {
5367
5368 m->busy = TRUE;
5369 vm_object_unlock(object);
5370
5371 /*
5372 * Honor COW obligations
5373 */
5374 if (entry->needs_copy)
5375 prot &= ~VM_PROT_WRITE;
0c530ab8
A
5376#ifdef STACK_ONLY_NX
5377 if (entry->alias != VM_MEMORY_STACK && prot)
5378 prot |= VM_PROT_EXECUTE;
5379#endif
0b4e3aa0
A
5380 /* It is our policy to require */
5381 /* explicit sync from anyone */
5382 /* writing code and then */
5383 /* a pc to execute it. */
5384 /* No isync here */
1c79356b 5385
9bccf70c 5386 PMAP_ENTER(pmap, va, m, prot,
55e303ae
A
5387 ((unsigned int)
5388 (m->object->wimg_bits))
5389 & VM_WIMG_MASK,
5390 FALSE);
1c79356b
A
5391
5392 vm_object_lock(object);
5393 vm_page_lock_queues();
5394 if (!m->active && !m->inactive)
5395 vm_page_activate(m);
5396 vm_page_unlock_queues();
0c530ab8 5397 PAGE_WAKEUP_DONE(m);
1c79356b
A
5398 }
5399 vm_object_paging_end(object);
5400 vm_object_unlock(object);
5401
5402 offset += PAGE_SIZE_64;
5403 va += PAGE_SIZE;
5404 } /* end while (va < entry->vme_end) */
5405 } /* end if (object) */
5406#endif
5407 /*
5408 * Set up for the next iteration. The map
5409 * has not been unlocked, so the next
5410 * address should be at the end of this
5411 * entry, and the next map entry should be
5412 * the one following it.
5413 */
5414
5415 start = tmp_entry->vme_end;
5416 tmp_entry = tmp_entry->vme_next;
5417 } else {
5418 vm_map_version_t version;
5419 vm_object_t dst_object = entry->object.vm_object;
5420 vm_object_offset_t dst_offset = entry->offset;
5421 kern_return_t r;
5422
5423 /*
5424 * Take an object reference, and record
5425 * the map version information so that the
5426 * map can be safely unlocked.
5427 */
5428
5429 vm_object_reference(dst_object);
5430
9bccf70c
A
5431 /* account for unlock bumping up timestamp */
5432 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
5433
5434 vm_map_unlock(dst_map);
5435
5436 /*
5437 * Copy as much as possible in one pass
5438 */
5439
5440 copy_size = size;
5441 r = vm_fault_copy(
5442 copy_entry->object.vm_object,
5443 copy_entry->offset,
5444 &copy_size,
5445 dst_object,
5446 dst_offset,
5447 dst_map,
5448 &version,
5449 THREAD_UNINT );
5450
5451 /*
5452 * Release the object reference
5453 */
5454
5455 vm_object_deallocate(dst_object);
5456
5457 /*
5458 * If a hard error occurred, return it now
5459 */
5460
5461 if (r != KERN_SUCCESS)
5462 return(r);
5463
5464 if (copy_size != 0) {
5465 /*
5466 * Dispose of the copied region
5467 */
5468
5469 vm_map_copy_clip_end(copy, copy_entry,
5470 copy_entry->vme_start + copy_size);
5471 vm_map_copy_entry_unlink(copy, copy_entry);
5472 vm_object_deallocate(copy_entry->object.vm_object);
5473 vm_map_copy_entry_dispose(copy, copy_entry);
5474 }
5475
5476 /*
5477 * Pick up in the destination map where we left off.
5478 *
5479 * Use the version information to avoid a lookup
5480 * in the normal case.
5481 */
5482
5483 start += copy_size;
5484 vm_map_lock(dst_map);
9bccf70c 5485 if (version.main_timestamp == dst_map->timestamp) {
1c79356b
A
5486 /* We can safely use saved tmp_entry value */
5487
5488 vm_map_clip_end(dst_map, tmp_entry, start);
5489 tmp_entry = tmp_entry->vme_next;
5490 } else {
5491 /* Must do lookup of tmp_entry */
5492
5493 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
5494 vm_map_unlock(dst_map);
5495 return(KERN_INVALID_ADDRESS);
5496 }
5497 vm_map_clip_start(dst_map, tmp_entry, start);
5498 }
5499 }
5500 }/* while */
5501
5502 return(KERN_SUCCESS);
5503}/* vm_map_copy_overwrite_aligned */
5504
5505/*
91447636 5506 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
5507 *
5508 * Description:
5509 * Copy in data to a kernel buffer from space in the
91447636 5510 * source map. The original space may be optionally
1c79356b
A
5511 * deallocated.
5512 *
5513 * If successful, returns a new copy object.
5514 */
91447636 5515static kern_return_t
1c79356b
A
5516vm_map_copyin_kernel_buffer(
5517 vm_map_t src_map,
91447636
A
5518 vm_map_offset_t src_addr,
5519 vm_map_size_t len,
1c79356b
A
5520 boolean_t src_destroy,
5521 vm_map_copy_t *copy_result)
5522{
91447636 5523 kern_return_t kr;
1c79356b 5524 vm_map_copy_t copy;
91447636 5525 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
1c79356b
A
5526
5527 copy = (vm_map_copy_t) kalloc(kalloc_size);
5528 if (copy == VM_MAP_COPY_NULL) {
5529 return KERN_RESOURCE_SHORTAGE;
5530 }
5531 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
5532 copy->size = len;
5533 copy->offset = 0;
91447636 5534 copy->cpy_kdata = (void *) (copy + 1);
1c79356b
A
5535 copy->cpy_kalloc_size = kalloc_size;
5536
91447636
A
5537 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
5538 if (kr != KERN_SUCCESS) {
5539 kfree(copy, kalloc_size);
5540 return kr;
1c79356b
A
5541 }
5542 if (src_destroy) {
91447636
A
5543 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
5544 vm_map_round_page(src_addr + len),
5545 VM_MAP_REMOVE_INTERRUPTIBLE |
5546 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
5547 (src_map == kernel_map) ?
5548 VM_MAP_REMOVE_KUNWIRE : 0);
1c79356b
A
5549 }
5550 *copy_result = copy;
5551 return KERN_SUCCESS;
5552}
5553
5554/*
91447636 5555 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
5556 *
5557 * Description:
5558 * Copy out data from a kernel buffer into space in the
5559 * destination map. The space may be otpionally dynamically
5560 * allocated.
5561 *
5562 * If successful, consumes the copy object.
5563 * Otherwise, the caller is responsible for it.
5564 */
91447636
A
5565static int vm_map_copyout_kernel_buffer_failures = 0;
5566static kern_return_t
1c79356b 5567vm_map_copyout_kernel_buffer(
91447636
A
5568 vm_map_t map,
5569 vm_map_address_t *addr, /* IN/OUT */
5570 vm_map_copy_t copy,
5571 boolean_t overwrite)
1c79356b
A
5572{
5573 kern_return_t kr = KERN_SUCCESS;
91447636 5574 thread_t thread = current_thread();
1c79356b
A
5575
5576 if (!overwrite) {
5577
5578 /*
5579 * Allocate space in the target map for the data
5580 */
5581 *addr = 0;
5582 kr = vm_map_enter(map,
5583 addr,
91447636
A
5584 vm_map_round_page(copy->size),
5585 (vm_map_offset_t) 0,
5586 VM_FLAGS_ANYWHERE,
1c79356b
A
5587 VM_OBJECT_NULL,
5588 (vm_object_offset_t) 0,
5589 FALSE,
5590 VM_PROT_DEFAULT,
5591 VM_PROT_ALL,
5592 VM_INHERIT_DEFAULT);
5593 if (kr != KERN_SUCCESS)
91447636 5594 return kr;
1c79356b
A
5595 }
5596
5597 /*
5598 * Copyout the data from the kernel buffer to the target map.
5599 */
91447636 5600 if (thread->map == map) {
1c79356b
A
5601
5602 /*
5603 * If the target map is the current map, just do
5604 * the copy.
5605 */
91447636
A
5606 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
5607 kr = KERN_INVALID_ADDRESS;
1c79356b
A
5608 }
5609 }
5610 else {
5611 vm_map_t oldmap;
5612
5613 /*
5614 * If the target map is another map, assume the
5615 * target's address space identity for the duration
5616 * of the copy.
5617 */
5618 vm_map_reference(map);
5619 oldmap = vm_map_switch(map);
5620
91447636
A
5621 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
5622 vm_map_copyout_kernel_buffer_failures++;
5623 kr = KERN_INVALID_ADDRESS;
1c79356b
A
5624 }
5625
5626 (void) vm_map_switch(oldmap);
5627 vm_map_deallocate(map);
5628 }
5629
91447636
A
5630 if (kr != KERN_SUCCESS) {
5631 /* the copy failed, clean up */
5632 if (!overwrite) {
5633 /*
5634 * Deallocate the space we allocated in the target map.
5635 */
5636 (void) vm_map_remove(map,
5637 vm_map_trunc_page(*addr),
5638 vm_map_round_page(*addr +
5639 vm_map_round_page(copy->size)),
5640 VM_MAP_NO_FLAGS);
5641 *addr = 0;
5642 }
5643 } else {
5644 /* copy was successful, dicard the copy structure */
5645 kfree(copy, copy->cpy_kalloc_size);
5646 }
1c79356b 5647
91447636 5648 return kr;
1c79356b
A
5649}
5650
5651/*
5652 * Macro: vm_map_copy_insert
5653 *
5654 * Description:
5655 * Link a copy chain ("copy") into a map at the
5656 * specified location (after "where").
5657 * Side effects:
5658 * The copy chain is destroyed.
5659 * Warning:
5660 * The arguments are evaluated multiple times.
5661 */
5662#define vm_map_copy_insert(map, where, copy) \
5663MACRO_BEGIN \
5664 vm_map_t VMCI_map; \
5665 vm_map_entry_t VMCI_where; \
5666 vm_map_copy_t VMCI_copy; \
5667 VMCI_map = (map); \
5668 VMCI_where = (where); \
5669 VMCI_copy = (copy); \
5670 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
5671 ->vme_next = (VMCI_where->vme_next); \
5672 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
5673 ->vme_prev = VMCI_where; \
5674 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
5675 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
91447636 5676 zfree(vm_map_copy_zone, VMCI_copy); \
1c79356b
A
5677MACRO_END
5678
5679/*
5680 * Routine: vm_map_copyout
5681 *
5682 * Description:
5683 * Copy out a copy chain ("copy") into newly-allocated
5684 * space in the destination map.
5685 *
5686 * If successful, consumes the copy object.
5687 * Otherwise, the caller is responsible for it.
5688 */
5689kern_return_t
5690vm_map_copyout(
91447636
A
5691 vm_map_t dst_map,
5692 vm_map_address_t *dst_addr, /* OUT */
5693 vm_map_copy_t copy)
1c79356b 5694{
91447636
A
5695 vm_map_size_t size;
5696 vm_map_size_t adjustment;
5697 vm_map_offset_t start;
1c79356b
A
5698 vm_object_offset_t vm_copy_start;
5699 vm_map_entry_t last;
5700 register
5701 vm_map_entry_t entry;
5702
5703 /*
5704 * Check for null copy object.
5705 */
5706
5707 if (copy == VM_MAP_COPY_NULL) {
5708 *dst_addr = 0;
5709 return(KERN_SUCCESS);
5710 }
5711
5712 /*
5713 * Check for special copy object, created
5714 * by vm_map_copyin_object.
5715 */
5716
5717 if (copy->type == VM_MAP_COPY_OBJECT) {
5718 vm_object_t object = copy->cpy_object;
5719 kern_return_t kr;
5720 vm_object_offset_t offset;
5721
91447636
A
5722 offset = vm_object_trunc_page(copy->offset);
5723 size = vm_map_round_page(copy->size +
5724 (vm_map_size_t)(copy->offset - offset));
1c79356b
A
5725 *dst_addr = 0;
5726 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 5727 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
5728 object, offset, FALSE,
5729 VM_PROT_DEFAULT, VM_PROT_ALL,
5730 VM_INHERIT_DEFAULT);
5731 if (kr != KERN_SUCCESS)
5732 return(kr);
5733 /* Account for non-pagealigned copy object */
91447636
A
5734 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
5735 zfree(vm_map_copy_zone, copy);
1c79356b
A
5736 return(KERN_SUCCESS);
5737 }
5738
5739 /*
5740 * Check for special kernel buffer allocated
5741 * by new_ipc_kmsg_copyin.
5742 */
5743
5744 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5745 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
5746 copy, FALSE));
5747 }
5748
1c79356b
A
5749 /*
5750 * Find space for the data
5751 */
5752
91447636
A
5753 vm_copy_start = vm_object_trunc_page(copy->offset);
5754 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
1c79356b
A
5755 - vm_copy_start;
5756
5757 StartAgain: ;
5758
5759 vm_map_lock(dst_map);
5760 assert(first_free_is_valid(dst_map));
5761 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
5762 vm_map_min(dst_map) : last->vme_end;
5763
5764 while (TRUE) {
5765 vm_map_entry_t next = last->vme_next;
91447636 5766 vm_map_offset_t end = start + size;
1c79356b
A
5767
5768 if ((end > dst_map->max_offset) || (end < start)) {
5769 if (dst_map->wait_for_space) {
5770 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
5771 assert_wait((event_t) dst_map,
5772 THREAD_INTERRUPTIBLE);
5773 vm_map_unlock(dst_map);
91447636 5774 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
5775 goto StartAgain;
5776 }
5777 }
5778 vm_map_unlock(dst_map);
5779 return(KERN_NO_SPACE);
5780 }
5781
5782 if ((next == vm_map_to_entry(dst_map)) ||
5783 (next->vme_start >= end))
5784 break;
5785
5786 last = next;
5787 start = last->vme_end;
5788 }
5789
5790 /*
5791 * Since we're going to just drop the map
5792 * entries from the copy into the destination
5793 * map, they must come from the same pool.
5794 */
5795
5796 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
5797 /*
5798 * Mismatches occur when dealing with the default
5799 * pager.
5800 */
5801 zone_t old_zone;
5802 vm_map_entry_t next, new;
5803
5804 /*
5805 * Find the zone that the copies were allocated from
5806 */
5807 old_zone = (copy->cpy_hdr.entries_pageable)
5808 ? vm_map_entry_zone
5809 : vm_map_kentry_zone;
5810 entry = vm_map_copy_first_entry(copy);
5811
5812 /*
5813 * Reinitialize the copy so that vm_map_copy_entry_link
5814 * will work.
5815 */
5816 copy->cpy_hdr.nentries = 0;
5817 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
5818 vm_map_copy_first_entry(copy) =
5819 vm_map_copy_last_entry(copy) =
5820 vm_map_copy_to_entry(copy);
5821
5822 /*
5823 * Copy each entry.
5824 */
5825 while (entry != vm_map_copy_to_entry(copy)) {
5826 new = vm_map_copy_entry_create(copy);
5827 vm_map_entry_copy_full(new, entry);
5828 new->use_pmap = FALSE; /* clr address space specifics */
5829 vm_map_copy_entry_link(copy,
5830 vm_map_copy_last_entry(copy),
5831 new);
5832 next = entry->vme_next;
91447636 5833 zfree(old_zone, entry);
1c79356b
A
5834 entry = next;
5835 }
5836 }
5837
5838 /*
5839 * Adjust the addresses in the copy chain, and
5840 * reset the region attributes.
5841 */
5842
5843 adjustment = start - vm_copy_start;
5844 for (entry = vm_map_copy_first_entry(copy);
5845 entry != vm_map_copy_to_entry(copy);
5846 entry = entry->vme_next) {
5847 entry->vme_start += adjustment;
5848 entry->vme_end += adjustment;
5849
5850 entry->inheritance = VM_INHERIT_DEFAULT;
5851 entry->protection = VM_PROT_DEFAULT;
5852 entry->max_protection = VM_PROT_ALL;
5853 entry->behavior = VM_BEHAVIOR_DEFAULT;
5854
5855 /*
5856 * If the entry is now wired,
5857 * map the pages into the destination map.
5858 */
5859 if (entry->wired_count != 0) {
91447636 5860 register vm_map_offset_t va;
1c79356b
A
5861 vm_object_offset_t offset;
5862 register vm_object_t object;
0c530ab8 5863 vm_prot_t prot;
1c79356b
A
5864
5865 object = entry->object.vm_object;
5866 offset = entry->offset;
5867 va = entry->vme_start;
5868
5869 pmap_pageable(dst_map->pmap,
5870 entry->vme_start,
5871 entry->vme_end,
5872 TRUE);
5873
5874 while (va < entry->vme_end) {
5875 register vm_page_t m;
5876
5877 /*
5878 * Look up the page in the object.
5879 * Assert that the page will be found in the
5880 * top object:
5881 * either
5882 * the object was newly created by
5883 * vm_object_copy_slowly, and has
5884 * copies of all of the pages from
5885 * the source object
5886 * or
5887 * the object was moved from the old
5888 * map entry; because the old map
5889 * entry was wired, all of the pages
5890 * were in the top-level object.
5891 * (XXX not true if we wire pages for
5892 * reading)
5893 */
5894 vm_object_lock(object);
5895 vm_object_paging_begin(object);
5896
5897 m = vm_page_lookup(object, offset);
5898 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
5899 m->absent)
5900 panic("vm_map_copyout: wiring 0x%x", m);
5901
91447636
A
5902 /*
5903 * ENCRYPTED SWAP:
5904 * The page is assumed to be wired here, so it
5905 * shouldn't be encrypted. Otherwise, we
5906 * couldn't enter it in the page table, since
5907 * we don't want the user to see the encrypted
5908 * data.
5909 */
5910 ASSERT_PAGE_DECRYPTED(m);
5911
1c79356b
A
5912 m->busy = TRUE;
5913 vm_object_unlock(object);
0c530ab8
A
5914 prot = entry->protection;
5915#ifdef STACK_ONLY_NX
5916 if (entry->alias != VM_MEMORY_STACK && prot)
5917 prot |= VM_PROT_EXECUTE;
5918#endif
5919 PMAP_ENTER(dst_map->pmap, va, m, prot,
55e303ae
A
5920 ((unsigned int)
5921 (m->object->wimg_bits))
5922 & VM_WIMG_MASK,
5923 TRUE);
1c79356b
A
5924
5925 vm_object_lock(object);
5926 PAGE_WAKEUP_DONE(m);
5927 /* the page is wired, so we don't have to activate */
5928 vm_object_paging_end(object);
5929 vm_object_unlock(object);
5930
5931 offset += PAGE_SIZE_64;
5932 va += PAGE_SIZE;
5933 }
5934 }
5935 else if (size <= vm_map_aggressive_enter_max) {
5936
91447636 5937 register vm_map_offset_t va;
1c79356b
A
5938 vm_object_offset_t offset;
5939 register vm_object_t object;
5940 vm_prot_t prot;
5941
5942 object = entry->object.vm_object;
5943 if (object != VM_OBJECT_NULL) {
5944
5945 offset = entry->offset;
5946 va = entry->vme_start;
5947 while (va < entry->vme_end) {
5948 register vm_page_t m;
5949
5950 /*
5951 * Look up the page in the object.
5952 * Assert that the page will be found
5953 * in the top object if at all...
5954 */
5955 vm_object_lock(object);
5956 vm_object_paging_begin(object);
5957
91447636
A
5958 /*
5959 * ENCRYPTED SWAP:
5960 * If the page is encrypted, skip it:
5961 * we can't let the user see the
5962 * encrypted contents. The page will
5963 * get decrypted on demand when the
5964 * user generates a soft-fault when
5965 * trying to access it.
5966 */
1c79356b
A
5967 if (((m = vm_page_lookup(object,
5968 offset))
5969 != VM_PAGE_NULL) &&
5970 !m->busy && !m->fictitious &&
91447636 5971 !m->encrypted &&
1c79356b
A
5972 !m->absent && !m->error) {
5973 m->busy = TRUE;
5974 vm_object_unlock(object);
5975
5976 /* honor cow obligations */
5977 prot = entry->protection;
5978 if (entry->needs_copy)
5979 prot &= ~VM_PROT_WRITE;
0c530ab8
A
5980#ifdef STACK_ONLY_NX
5981 if (entry->alias != VM_MEMORY_STACK && prot)
5982 prot |= VM_PROT_EXECUTE;
5983#endif
1c79356b 5984 PMAP_ENTER(dst_map->pmap, va,
9bccf70c 5985 m, prot,
55e303ae
A
5986 ((unsigned int)
5987 (m->object->wimg_bits))
5988 & VM_WIMG_MASK,
9bccf70c 5989 FALSE);
1c79356b
A
5990
5991 vm_object_lock(object);
5992 vm_page_lock_queues();
5993 if (!m->active && !m->inactive)
5994 vm_page_activate(m);
5995 vm_page_unlock_queues();
5996 PAGE_WAKEUP_DONE(m);
5997 }
5998 vm_object_paging_end(object);
5999 vm_object_unlock(object);
6000
6001 offset += PAGE_SIZE_64;
6002 va += PAGE_SIZE;
6003 }
6004 }
6005 }
6006 }
6007
6008 /*
6009 * Correct the page alignment for the result
6010 */
6011
6012 *dst_addr = start + (copy->offset - vm_copy_start);
6013
6014 /*
6015 * Update the hints and the map size
6016 */
6017
0c530ab8 6018 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
1c79356b
A
6019
6020 dst_map->size += size;
6021
6022 /*
6023 * Link in the copy
6024 */
6025
6026 vm_map_copy_insert(dst_map, last, copy);
6027
6028 vm_map_unlock(dst_map);
6029
6030 /*
6031 * XXX If wiring_required, call vm_map_pageable
6032 */
6033
6034 return(KERN_SUCCESS);
6035}
6036
1c79356b
A
6037/*
6038 * Routine: vm_map_copyin
6039 *
6040 * Description:
6041 * Copy the specified region (src_addr, len) from the
6042 * source address space (src_map), possibly removing
6043 * the region from the source address space (src_destroy).
6044 *
6045 * Returns:
6046 * A vm_map_copy_t object (copy_result), suitable for
6047 * insertion into another address space (using vm_map_copyout),
6048 * copying over another address space region (using
6049 * vm_map_copy_overwrite). If the copy is unused, it
6050 * should be destroyed (using vm_map_copy_discard).
6051 *
6052 * In/out conditions:
6053 * The source map should not be locked on entry.
6054 */
6055
6056typedef struct submap_map {
6057 vm_map_t parent_map;
91447636
A
6058 vm_map_offset_t base_start;
6059 vm_map_offset_t base_end;
1c79356b
A
6060 struct submap_map *next;
6061} submap_map_t;
6062
6063kern_return_t
6064vm_map_copyin_common(
6065 vm_map_t src_map,
91447636
A
6066 vm_map_address_t src_addr,
6067 vm_map_size_t len,
1c79356b 6068 boolean_t src_destroy,
91447636 6069 __unused boolean_t src_volatile,
1c79356b
A
6070 vm_map_copy_t *copy_result, /* OUT */
6071 boolean_t use_maxprot)
6072{
1c79356b
A
6073 vm_map_entry_t tmp_entry; /* Result of last map lookup --
6074 * in multi-level lookup, this
6075 * entry contains the actual
6076 * vm_object/offset.
6077 */
6078 register
6079 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
6080
91447636 6081 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
6082 * where copy is taking place now
6083 */
91447636 6084 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 6085 * copied */
91447636 6086 vm_map_t base_map = src_map;
1c79356b
A
6087 boolean_t map_share=FALSE;
6088 submap_map_t *parent_maps = NULL;
6089
6090 register
6091 vm_map_copy_t copy; /* Resulting copy */
91447636 6092 vm_map_address_t copy_addr;
1c79356b
A
6093
6094 /*
6095 * Check for copies of zero bytes.
6096 */
6097
6098 if (len == 0) {
6099 *copy_result = VM_MAP_COPY_NULL;
6100 return(KERN_SUCCESS);
6101 }
6102
4a249263
A
6103 /*
6104 * Check that the end address doesn't overflow
6105 */
6106 src_end = src_addr + len;
6107 if (src_end < src_addr)
6108 return KERN_INVALID_ADDRESS;
6109
1c79356b
A
6110 /*
6111 * If the copy is sufficiently small, use a kernel buffer instead
6112 * of making a virtual copy. The theory being that the cost of
6113 * setting up VM (and taking C-O-W faults) dominates the copy costs
6114 * for small regions.
6115 */
6116 if ((len < msg_ool_size_small) && !use_maxprot)
6117 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6118 src_destroy, copy_result);
6119
6120 /*
4a249263 6121 * Compute (page aligned) start and end of region
1c79356b 6122 */
91447636
A
6123 src_start = vm_map_trunc_page(src_addr);
6124 src_end = vm_map_round_page(src_end);
1c79356b
A
6125
6126 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6127
1c79356b
A
6128 /*
6129 * Allocate a header element for the list.
6130 *
6131 * Use the start and end in the header to
6132 * remember the endpoints prior to rounding.
6133 */
6134
6135 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6136 vm_map_copy_first_entry(copy) =
6137 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6138 copy->type = VM_MAP_COPY_ENTRY_LIST;
6139 copy->cpy_hdr.nentries = 0;
6140 copy->cpy_hdr.entries_pageable = TRUE;
6141
6142 copy->offset = src_addr;
6143 copy->size = len;
6144
6145 new_entry = vm_map_copy_entry_create(copy);
6146
6147#define RETURN(x) \
6148 MACRO_BEGIN \
6149 vm_map_unlock(src_map); \
9bccf70c
A
6150 if(src_map != base_map) \
6151 vm_map_deallocate(src_map); \
1c79356b
A
6152 if (new_entry != VM_MAP_ENTRY_NULL) \
6153 vm_map_copy_entry_dispose(copy,new_entry); \
6154 vm_map_copy_discard(copy); \
6155 { \
91447636 6156 submap_map_t *_ptr; \
1c79356b 6157 \
91447636 6158 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 6159 parent_maps=parent_maps->next; \
91447636
A
6160 if (_ptr->parent_map != base_map) \
6161 vm_map_deallocate(_ptr->parent_map); \
6162 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
6163 } \
6164 } \
6165 MACRO_RETURN(x); \
6166 MACRO_END
6167
6168 /*
6169 * Find the beginning of the region.
6170 */
6171
6172 vm_map_lock(src_map);
6173
6174 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6175 RETURN(KERN_INVALID_ADDRESS);
6176 if(!tmp_entry->is_sub_map) {
6177 vm_map_clip_start(src_map, tmp_entry, src_start);
6178 }
6179 /* set for later submap fix-up */
6180 copy_addr = src_start;
6181
6182 /*
6183 * Go through entries until we get to the end.
6184 */
6185
6186 while (TRUE) {
6187 register
6188 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 6189 vm_map_size_t src_size; /* Size of source
1c79356b
A
6190 * map entry (in both
6191 * maps)
6192 */
6193
6194 register
6195 vm_object_t src_object; /* Object to copy */
6196 vm_object_offset_t src_offset;
6197
6198 boolean_t src_needs_copy; /* Should source map
6199 * be made read-only
6200 * for copy-on-write?
6201 */
6202
6203 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
6204
6205 boolean_t was_wired; /* Was source wired? */
6206 vm_map_version_t version; /* Version before locks
6207 * dropped to make copy
6208 */
6209 kern_return_t result; /* Return value from
6210 * copy_strategically.
6211 */
6212 while(tmp_entry->is_sub_map) {
91447636 6213 vm_map_size_t submap_len;
1c79356b
A
6214 submap_map_t *ptr;
6215
6216 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
6217 ptr->next = parent_maps;
6218 parent_maps = ptr;
6219 ptr->parent_map = src_map;
6220 ptr->base_start = src_start;
6221 ptr->base_end = src_end;
6222 submap_len = tmp_entry->vme_end - src_start;
6223 if(submap_len > (src_end-src_start))
6224 submap_len = src_end-src_start;
6225 ptr->base_start += submap_len;
6226
6227 src_start -= tmp_entry->vme_start;
6228 src_start += tmp_entry->offset;
6229 src_end = src_start + submap_len;
6230 src_map = tmp_entry->object.sub_map;
6231 vm_map_lock(src_map);
9bccf70c
A
6232 /* keep an outstanding reference for all maps in */
6233 /* the parents tree except the base map */
6234 vm_map_reference(src_map);
1c79356b
A
6235 vm_map_unlock(ptr->parent_map);
6236 if (!vm_map_lookup_entry(
6237 src_map, src_start, &tmp_entry))
6238 RETURN(KERN_INVALID_ADDRESS);
6239 map_share = TRUE;
6240 if(!tmp_entry->is_sub_map)
6241 vm_map_clip_start(src_map, tmp_entry, src_start);
6242 src_entry = tmp_entry;
6243 }
0b4e3aa0 6244 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
55e303ae
A
6245 (tmp_entry->object.vm_object->phys_contiguous)) {
6246 /* This is not, supported for now.In future */
6247 /* we will need to detect the phys_contig */
6248 /* condition and then upgrade copy_slowly */
6249 /* to do physical copy from the device mem */
6250 /* based object. We can piggy-back off of */
6251 /* the was wired boolean to set-up the */
6252 /* proper handling */
0b4e3aa0
A
6253 RETURN(KERN_PROTECTION_FAILURE);
6254 }
1c79356b
A
6255 /*
6256 * Create a new address map entry to hold the result.
6257 * Fill in the fields from the appropriate source entries.
6258 * We must unlock the source map to do this if we need
6259 * to allocate a map entry.
6260 */
6261 if (new_entry == VM_MAP_ENTRY_NULL) {
6262 version.main_timestamp = src_map->timestamp;
6263 vm_map_unlock(src_map);
6264
6265 new_entry = vm_map_copy_entry_create(copy);
6266
6267 vm_map_lock(src_map);
6268 if ((version.main_timestamp + 1) != src_map->timestamp) {
6269 if (!vm_map_lookup_entry(src_map, src_start,
6270 &tmp_entry)) {
6271 RETURN(KERN_INVALID_ADDRESS);
6272 }
6273 vm_map_clip_start(src_map, tmp_entry, src_start);
6274 continue; /* restart w/ new tmp_entry */
6275 }
6276 }
6277
6278 /*
6279 * Verify that the region can be read.
6280 */
6281 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
6282 !use_maxprot) ||
6283 (src_entry->max_protection & VM_PROT_READ) == 0)
6284 RETURN(KERN_PROTECTION_FAILURE);
6285
6286 /*
6287 * Clip against the endpoints of the entire region.
6288 */
6289
6290 vm_map_clip_end(src_map, src_entry, src_end);
6291
6292 src_size = src_entry->vme_end - src_start;
6293 src_object = src_entry->object.vm_object;
6294 src_offset = src_entry->offset;
6295 was_wired = (src_entry->wired_count != 0);
6296
6297 vm_map_entry_copy(new_entry, src_entry);
6298 new_entry->use_pmap = FALSE; /* clr address space specifics */
6299
6300 /*
6301 * Attempt non-blocking copy-on-write optimizations.
6302 */
6303
6304 if (src_destroy &&
6305 (src_object == VM_OBJECT_NULL ||
6306 (src_object->internal && !src_object->true_share
6307 && !map_share))) {
6308 /*
6309 * If we are destroying the source, and the object
6310 * is internal, we can move the object reference
6311 * from the source to the copy. The copy is
6312 * copy-on-write only if the source is.
6313 * We make another reference to the object, because
6314 * destroying the source entry will deallocate it.
6315 */
6316 vm_object_reference(src_object);
6317
6318 /*
6319 * Copy is always unwired. vm_map_copy_entry
6320 * set its wired count to zero.
6321 */
6322
6323 goto CopySuccessful;
6324 }
6325
6326
6327RestartCopy:
6328 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
6329 src_object, new_entry, new_entry->object.vm_object,
6330 was_wired, 0);
55e303ae
A
6331 if ((src_object == VM_OBJECT_NULL ||
6332 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
6333 vm_object_copy_quickly(
1c79356b
A
6334 &new_entry->object.vm_object,
6335 src_offset,
6336 src_size,
6337 &src_needs_copy,
6338 &new_entry_needs_copy)) {
6339
6340 new_entry->needs_copy = new_entry_needs_copy;
6341
6342 /*
6343 * Handle copy-on-write obligations
6344 */
6345
6346 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
6347 vm_prot_t prot;
6348
6349 prot = src_entry->protection & ~VM_PROT_WRITE;
6350#ifdef STACK_ONLY_NX
6351 if (src_entry->alias != VM_MEMORY_STACK && prot)
6352 prot |= VM_PROT_EXECUTE;
6353#endif
55e303ae
A
6354 vm_object_pmap_protect(
6355 src_object,
6356 src_offset,
6357 src_size,
6358 (src_entry->is_shared ?
6359 PMAP_NULL
6360 : src_map->pmap),
6361 src_entry->vme_start,
0c530ab8
A
6362 prot);
6363
55e303ae 6364 tmp_entry->needs_copy = TRUE;
1c79356b
A
6365 }
6366
6367 /*
6368 * The map has never been unlocked, so it's safe
6369 * to move to the next entry rather than doing
6370 * another lookup.
6371 */
6372
6373 goto CopySuccessful;
6374 }
6375
1c79356b
A
6376 /*
6377 * Take an object reference, so that we may
6378 * release the map lock(s).
6379 */
6380
6381 assert(src_object != VM_OBJECT_NULL);
6382 vm_object_reference(src_object);
6383
6384 /*
6385 * Record the timestamp for later verification.
6386 * Unlock the map.
6387 */
6388
6389 version.main_timestamp = src_map->timestamp;
9bccf70c 6390 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
6391
6392 /*
6393 * Perform the copy
6394 */
6395
6396 if (was_wired) {
55e303ae 6397 CopySlowly:
1c79356b
A
6398 vm_object_lock(src_object);
6399 result = vm_object_copy_slowly(
6400 src_object,
6401 src_offset,
6402 src_size,
6403 THREAD_UNINT,
6404 &new_entry->object.vm_object);
6405 new_entry->offset = 0;
6406 new_entry->needs_copy = FALSE;
55e303ae
A
6407
6408 }
6409 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6410 (tmp_entry->is_shared || map_share)) {
6411 vm_object_t new_object;
6412
6413 vm_object_lock(src_object);
6414 new_object = vm_object_copy_delayed(
6415 src_object,
6416 src_offset,
6417 src_size);
6418 if (new_object == VM_OBJECT_NULL)
6419 goto CopySlowly;
6420
6421 new_entry->object.vm_object = new_object;
6422 new_entry->needs_copy = TRUE;
6423 result = KERN_SUCCESS;
6424
1c79356b
A
6425 } else {
6426 result = vm_object_copy_strategically(src_object,
6427 src_offset,
6428 src_size,
6429 &new_entry->object.vm_object,
6430 &new_entry->offset,
6431 &new_entry_needs_copy);
6432
6433 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
6434 }
6435
6436 if (result != KERN_SUCCESS &&
6437 result != KERN_MEMORY_RESTART_COPY) {
6438 vm_map_lock(src_map);
6439 RETURN(result);
6440 }
6441
6442 /*
6443 * Throw away the extra reference
6444 */
6445
6446 vm_object_deallocate(src_object);
6447
6448 /*
6449 * Verify that the map has not substantially
6450 * changed while the copy was being made.
6451 */
6452
9bccf70c 6453 vm_map_lock(src_map);
1c79356b
A
6454
6455 if ((version.main_timestamp + 1) == src_map->timestamp)
6456 goto VerificationSuccessful;
6457
6458 /*
6459 * Simple version comparison failed.
6460 *
6461 * Retry the lookup and verify that the
6462 * same object/offset are still present.
6463 *
6464 * [Note: a memory manager that colludes with
6465 * the calling task can detect that we have
6466 * cheated. While the map was unlocked, the
6467 * mapping could have been changed and restored.]
6468 */
6469
6470 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
6471 RETURN(KERN_INVALID_ADDRESS);
6472 }
6473
6474 src_entry = tmp_entry;
6475 vm_map_clip_start(src_map, src_entry, src_start);
6476
91447636
A
6477 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
6478 !use_maxprot) ||
6479 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
6480 goto VerificationFailed;
6481
6482 if (src_entry->vme_end < new_entry->vme_end)
6483 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
6484
6485 if ((src_entry->object.vm_object != src_object) ||
6486 (src_entry->offset != src_offset) ) {
6487
6488 /*
6489 * Verification failed.
6490 *
6491 * Start over with this top-level entry.
6492 */
6493
6494 VerificationFailed: ;
6495
6496 vm_object_deallocate(new_entry->object.vm_object);
6497 tmp_entry = src_entry;
6498 continue;
6499 }
6500
6501 /*
6502 * Verification succeeded.
6503 */
6504
6505 VerificationSuccessful: ;
6506
6507 if (result == KERN_MEMORY_RESTART_COPY)
6508 goto RestartCopy;
6509
6510 /*
6511 * Copy succeeded.
6512 */
6513
6514 CopySuccessful: ;
6515
6516 /*
6517 * Link in the new copy entry.
6518 */
6519
6520 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
6521 new_entry);
6522
6523 /*
6524 * Determine whether the entire region
6525 * has been copied.
6526 */
6527 src_start = new_entry->vme_end;
6528 new_entry = VM_MAP_ENTRY_NULL;
6529 while ((src_start >= src_end) && (src_end != 0)) {
6530 if (src_map != base_map) {
6531 submap_map_t *ptr;
6532
6533 ptr = parent_maps;
6534 assert(ptr != NULL);
6535 parent_maps = parent_maps->next;
1c79356b 6536 vm_map_unlock(src_map);
9bccf70c
A
6537 vm_map_deallocate(src_map);
6538 vm_map_lock(ptr->parent_map);
1c79356b
A
6539 src_map = ptr->parent_map;
6540 src_start = ptr->base_start;
6541 src_end = ptr->base_end;
6542 if ((src_end > src_start) &&
6543 !vm_map_lookup_entry(
6544 src_map, src_start, &tmp_entry))
6545 RETURN(KERN_INVALID_ADDRESS);
91447636 6546 kfree(ptr, sizeof(submap_map_t));
1c79356b
A
6547 if(parent_maps == NULL)
6548 map_share = FALSE;
6549 src_entry = tmp_entry->vme_prev;
6550 } else
6551 break;
6552 }
6553 if ((src_start >= src_end) && (src_end != 0))
6554 break;
6555
6556 /*
6557 * Verify that there are no gaps in the region
6558 */
6559
6560 tmp_entry = src_entry->vme_next;
6561 if ((tmp_entry->vme_start != src_start) ||
6562 (tmp_entry == vm_map_to_entry(src_map)))
6563 RETURN(KERN_INVALID_ADDRESS);
6564 }
6565
6566 /*
6567 * If the source should be destroyed, do it now, since the
6568 * copy was successful.
6569 */
6570 if (src_destroy) {
6571 (void) vm_map_delete(src_map,
91447636 6572 vm_map_trunc_page(src_addr),
1c79356b
A
6573 src_end,
6574 (src_map == kernel_map) ?
6575 VM_MAP_REMOVE_KUNWIRE :
91447636
A
6576 VM_MAP_NO_FLAGS,
6577 VM_MAP_NULL);
1c79356b
A
6578 }
6579
6580 vm_map_unlock(src_map);
6581
6582 /* Fix-up start and end points in copy. This is necessary */
6583 /* when the various entries in the copy object were picked */
6584 /* up from different sub-maps */
6585
6586 tmp_entry = vm_map_copy_first_entry(copy);
6587 while (tmp_entry != vm_map_copy_to_entry(copy)) {
6588 tmp_entry->vme_end = copy_addr +
6589 (tmp_entry->vme_end - tmp_entry->vme_start);
6590 tmp_entry->vme_start = copy_addr;
6591 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
6592 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
6593 }
6594
6595 *copy_result = copy;
6596 return(KERN_SUCCESS);
6597
6598#undef RETURN
6599}
6600
6601/*
6602 * vm_map_copyin_object:
6603 *
6604 * Create a copy object from an object.
6605 * Our caller donates an object reference.
6606 */
6607
6608kern_return_t
6609vm_map_copyin_object(
6610 vm_object_t object,
6611 vm_object_offset_t offset, /* offset of region in object */
6612 vm_object_size_t size, /* size of region in object */
6613 vm_map_copy_t *copy_result) /* OUT */
6614{
6615 vm_map_copy_t copy; /* Resulting copy */
6616
6617 /*
6618 * We drop the object into a special copy object
6619 * that contains the object directly.
6620 */
6621
6622 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6623 copy->type = VM_MAP_COPY_OBJECT;
6624 copy->cpy_object = object;
1c79356b
A
6625 copy->offset = offset;
6626 copy->size = size;
6627
6628 *copy_result = copy;
6629 return(KERN_SUCCESS);
6630}
6631
91447636 6632static void
1c79356b
A
6633vm_map_fork_share(
6634 vm_map_t old_map,
6635 vm_map_entry_t old_entry,
6636 vm_map_t new_map)
6637{
6638 vm_object_t object;
6639 vm_map_entry_t new_entry;
1c79356b
A
6640
6641 /*
6642 * New sharing code. New map entry
6643 * references original object. Internal
6644 * objects use asynchronous copy algorithm for
6645 * future copies. First make sure we have
6646 * the right object. If we need a shadow,
6647 * or someone else already has one, then
6648 * make a new shadow and share it.
6649 */
6650
6651 object = old_entry->object.vm_object;
6652 if (old_entry->is_sub_map) {
6653 assert(old_entry->wired_count == 0);
0c530ab8 6654#ifndef NO_NESTED_PMAP
1c79356b 6655 if(old_entry->use_pmap) {
91447636
A
6656 kern_return_t result;
6657
1c79356b
A
6658 result = pmap_nest(new_map->pmap,
6659 (old_entry->object.sub_map)->pmap,
55e303ae
A
6660 (addr64_t)old_entry->vme_start,
6661 (addr64_t)old_entry->vme_start,
6662 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
6663 if(result)
6664 panic("vm_map_fork_share: pmap_nest failed!");
6665 }
0c530ab8 6666#endif /* NO_NESTED_PMAP */
1c79356b 6667 } else if (object == VM_OBJECT_NULL) {
91447636 6668 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
1c79356b
A
6669 old_entry->vme_start));
6670 old_entry->offset = 0;
6671 old_entry->object.vm_object = object;
6672 assert(!old_entry->needs_copy);
6673 } else if (object->copy_strategy !=
6674 MEMORY_OBJECT_COPY_SYMMETRIC) {
6675
6676 /*
6677 * We are already using an asymmetric
6678 * copy, and therefore we already have
6679 * the right object.
6680 */
6681
6682 assert(! old_entry->needs_copy);
6683 }
6684 else if (old_entry->needs_copy || /* case 1 */
6685 object->shadowed || /* case 2 */
6686 (!object->true_share && /* case 3 */
6687 !old_entry->is_shared &&
6688 (object->size >
91447636 6689 (vm_map_size_t)(old_entry->vme_end -
1c79356b
A
6690 old_entry->vme_start)))) {
6691
6692 /*
6693 * We need to create a shadow.
6694 * There are three cases here.
6695 * In the first case, we need to
6696 * complete a deferred symmetrical
6697 * copy that we participated in.
6698 * In the second and third cases,
6699 * we need to create the shadow so
6700 * that changes that we make to the
6701 * object do not interfere with
6702 * any symmetrical copies which
6703 * have occured (case 2) or which
6704 * might occur (case 3).
6705 *
6706 * The first case is when we had
6707 * deferred shadow object creation
6708 * via the entry->needs_copy mechanism.
6709 * This mechanism only works when
6710 * only one entry points to the source
6711 * object, and we are about to create
6712 * a second entry pointing to the
6713 * same object. The problem is that
6714 * there is no way of mapping from
6715 * an object to the entries pointing
6716 * to it. (Deferred shadow creation
6717 * works with one entry because occurs
6718 * at fault time, and we walk from the
6719 * entry to the object when handling
6720 * the fault.)
6721 *
6722 * The second case is when the object
6723 * to be shared has already been copied
6724 * with a symmetric copy, but we point
6725 * directly to the object without
6726 * needs_copy set in our entry. (This
6727 * can happen because different ranges
6728 * of an object can be pointed to by
6729 * different entries. In particular,
6730 * a single entry pointing to an object
6731 * can be split by a call to vm_inherit,
6732 * which, combined with task_create, can
6733 * result in the different entries
6734 * having different needs_copy values.)
6735 * The shadowed flag in the object allows
6736 * us to detect this case. The problem
6737 * with this case is that if this object
6738 * has or will have shadows, then we
6739 * must not perform an asymmetric copy
6740 * of this object, since such a copy
6741 * allows the object to be changed, which
6742 * will break the previous symmetrical
6743 * copies (which rely upon the object
6744 * not changing). In a sense, the shadowed
6745 * flag says "don't change this object".
6746 * We fix this by creating a shadow
6747 * object for this object, and sharing
6748 * that. This works because we are free
6749 * to change the shadow object (and thus
6750 * to use an asymmetric copy strategy);
6751 * this is also semantically correct,
6752 * since this object is temporary, and
6753 * therefore a copy of the object is
6754 * as good as the object itself. (This
6755 * is not true for permanent objects,
6756 * since the pager needs to see changes,
6757 * which won't happen if the changes
6758 * are made to a copy.)
6759 *
6760 * The third case is when the object
6761 * to be shared has parts sticking
6762 * outside of the entry we're working
6763 * with, and thus may in the future
6764 * be subject to a symmetrical copy.
6765 * (This is a preemptive version of
6766 * case 2.)
6767 */
6768
6769 assert(!(object->shadowed && old_entry->is_shared));
6770 vm_object_shadow(&old_entry->object.vm_object,
6771 &old_entry->offset,
91447636 6772 (vm_map_size_t) (old_entry->vme_end -
1c79356b
A
6773 old_entry->vme_start));
6774
6775 /*
6776 * If we're making a shadow for other than
6777 * copy on write reasons, then we have
6778 * to remove write permission.
6779 */
6780
1c79356b
A
6781 if (!old_entry->needs_copy &&
6782 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
6783 vm_prot_t prot;
6784
6785 prot = old_entry->protection & ~VM_PROT_WRITE;
6786#ifdef STACK_ONLY_NX
6787 if (old_entry->alias != VM_MEMORY_STACK && prot)
6788 prot |= VM_PROT_EXECUTE;
6789#endif
6790 if (old_map->mapped) {
9bccf70c
A
6791 vm_object_pmap_protect(
6792 old_entry->object.vm_object,
6793 old_entry->offset,
6794 (old_entry->vme_end -
6795 old_entry->vme_start),
6796 PMAP_NULL,
6797 old_entry->vme_start,
0c530ab8 6798 prot);
1c79356b 6799 } else {
9bccf70c 6800 pmap_protect(old_map->pmap,
1c79356b
A
6801 old_entry->vme_start,
6802 old_entry->vme_end,
0c530ab8 6803 prot);
1c79356b
A
6804 }
6805 }
6806
6807 old_entry->needs_copy = FALSE;
6808 object = old_entry->object.vm_object;
6809 }
6810
6811 /*
6812 * If object was using a symmetric copy strategy,
6813 * change its copy strategy to the default
6814 * asymmetric copy strategy, which is copy_delay
6815 * in the non-norma case and copy_call in the
6816 * norma case. Bump the reference count for the
6817 * new entry.
6818 */
6819
6820 if(old_entry->is_sub_map) {
6821 vm_map_lock(old_entry->object.sub_map);
6822 vm_map_reference(old_entry->object.sub_map);
6823 vm_map_unlock(old_entry->object.sub_map);
6824 } else {
6825 vm_object_lock(object);
6826 object->ref_count++;
6827 vm_object_res_reference(object);
6828 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6829 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6830 }
6831 vm_object_unlock(object);
6832 }
6833
6834 /*
6835 * Clone the entry, using object ref from above.
6836 * Mark both entries as shared.
6837 */
6838
6839 new_entry = vm_map_entry_create(new_map);
6840 vm_map_entry_copy(new_entry, old_entry);
6841 old_entry->is_shared = TRUE;
6842 new_entry->is_shared = TRUE;
6843
6844 /*
6845 * Insert the entry into the new map -- we
6846 * know we're inserting at the end of the new
6847 * map.
6848 */
6849
6850 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
6851
6852 /*
6853 * Update the physical map
6854 */
6855
6856 if (old_entry->is_sub_map) {
6857 /* Bill Angell pmap support goes here */
6858 } else {
6859 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
6860 old_entry->vme_end - old_entry->vme_start,
6861 old_entry->vme_start);
6862 }
6863}
6864
91447636 6865static boolean_t
1c79356b
A
6866vm_map_fork_copy(
6867 vm_map_t old_map,
6868 vm_map_entry_t *old_entry_p,
6869 vm_map_t new_map)
6870{
6871 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
6872 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
6873 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
6874 vm_map_copy_t copy;
6875 vm_map_entry_t last = vm_map_last_entry(new_map);
6876
6877 vm_map_unlock(old_map);
6878 /*
6879 * Use maxprot version of copyin because we
6880 * care about whether this memory can ever
6881 * be accessed, not just whether it's accessible
6882 * right now.
6883 */
6884 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
6885 != KERN_SUCCESS) {
6886 /*
6887 * The map might have changed while it
6888 * was unlocked, check it again. Skip
6889 * any blank space or permanently
6890 * unreadable region.
6891 */
6892 vm_map_lock(old_map);
6893 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 6894 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
6895 last = last->vme_next;
6896 }
6897 *old_entry_p = last;
6898
6899 /*
6900 * XXX For some error returns, want to
6901 * XXX skip to the next element. Note
6902 * that INVALID_ADDRESS and
6903 * PROTECTION_FAILURE are handled above.
6904 */
6905
6906 return FALSE;
6907 }
6908
6909 /*
6910 * Insert the copy into the new map
6911 */
6912
6913 vm_map_copy_insert(new_map, last, copy);
6914
6915 /*
6916 * Pick up the traversal at the end of
6917 * the copied region.
6918 */
6919
6920 vm_map_lock(old_map);
6921 start += entry_size;
6922 if (! vm_map_lookup_entry(old_map, start, &last)) {
6923 last = last->vme_next;
6924 } else {
6925 vm_map_clip_start(old_map, last, start);
6926 }
6927 *old_entry_p = last;
6928
6929 return TRUE;
6930}
6931
6932/*
6933 * vm_map_fork:
6934 *
6935 * Create and return a new map based on the old
6936 * map, according to the inheritance values on the
6937 * regions in that map.
6938 *
6939 * The source map must not be locked.
6940 */
6941vm_map_t
6942vm_map_fork(
6943 vm_map_t old_map)
6944{
0c530ab8
A
6945 pmap_t new_pmap = pmap_create(
6946 (vm_map_size_t) 0,
6947 task_has_64BitAddr(current_task()));
1c79356b
A
6948 vm_map_t new_map;
6949 vm_map_entry_t old_entry;
91447636 6950 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
6951 vm_map_entry_t new_entry;
6952 boolean_t src_needs_copy;
6953 boolean_t new_entry_needs_copy;
6954
6955 vm_map_reference_swap(old_map);
6956 vm_map_lock(old_map);
6957
6958 new_map = vm_map_create(new_pmap,
6959 old_map->min_offset,
6960 old_map->max_offset,
6961 old_map->hdr.entries_pageable);
6962
6963 for (
6964 old_entry = vm_map_first_entry(old_map);
6965 old_entry != vm_map_to_entry(old_map);
6966 ) {
6967
6968 entry_size = old_entry->vme_end - old_entry->vme_start;
6969
6970 switch (old_entry->inheritance) {
6971 case VM_INHERIT_NONE:
6972 break;
6973
6974 case VM_INHERIT_SHARE:
6975 vm_map_fork_share(old_map, old_entry, new_map);
6976 new_size += entry_size;
6977 break;
6978
6979 case VM_INHERIT_COPY:
6980
6981 /*
6982 * Inline the copy_quickly case;
6983 * upon failure, fall back on call
6984 * to vm_map_fork_copy.
6985 */
6986
6987 if(old_entry->is_sub_map)
6988 break;
9bccf70c
A
6989 if ((old_entry->wired_count != 0) ||
6990 ((old_entry->object.vm_object != NULL) &&
6991 (old_entry->object.vm_object->true_share))) {
1c79356b
A
6992 goto slow_vm_map_fork_copy;
6993 }
6994
6995 new_entry = vm_map_entry_create(new_map);
6996 vm_map_entry_copy(new_entry, old_entry);
6997 /* clear address space specifics */
6998 new_entry->use_pmap = FALSE;
6999
7000 if (! vm_object_copy_quickly(
7001 &new_entry->object.vm_object,
7002 old_entry->offset,
7003 (old_entry->vme_end -
7004 old_entry->vme_start),
7005 &src_needs_copy,
7006 &new_entry_needs_copy)) {
7007 vm_map_entry_dispose(new_map, new_entry);
7008 goto slow_vm_map_fork_copy;
7009 }
7010
7011 /*
7012 * Handle copy-on-write obligations
7013 */
7014
7015 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
7016 vm_prot_t prot;
7017
7018 prot = old_entry->protection & ~VM_PROT_WRITE;
7019#ifdef STACK_ONLY_NX
7020 if (old_entry->alias != VM_MEMORY_STACK && prot)
7021 prot |= VM_PROT_EXECUTE;
7022#endif
1c79356b
A
7023 vm_object_pmap_protect(
7024 old_entry->object.vm_object,
7025 old_entry->offset,
7026 (old_entry->vme_end -
7027 old_entry->vme_start),
7028 ((old_entry->is_shared
9bccf70c 7029 || old_map->mapped)
1c79356b
A
7030 ? PMAP_NULL :
7031 old_map->pmap),
7032 old_entry->vme_start,
0c530ab8 7033 prot);
1c79356b
A
7034
7035 old_entry->needs_copy = TRUE;
7036 }
7037 new_entry->needs_copy = new_entry_needs_copy;
7038
7039 /*
7040 * Insert the entry at the end
7041 * of the map.
7042 */
7043
7044 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7045 new_entry);
7046 new_size += entry_size;
7047 break;
7048
7049 slow_vm_map_fork_copy:
7050 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7051 new_size += entry_size;
7052 }
7053 continue;
7054 }
7055 old_entry = old_entry->vme_next;
7056 }
7057
7058 new_map->size = new_size;
7059 vm_map_unlock(old_map);
7060 vm_map_deallocate(old_map);
7061
7062 return(new_map);
7063}
7064
7065
7066/*
7067 * vm_map_lookup_locked:
7068 *
7069 * Finds the VM object, offset, and
7070 * protection for a given virtual address in the
7071 * specified map, assuming a page fault of the
7072 * type specified.
7073 *
7074 * Returns the (object, offset, protection) for
7075 * this address, whether it is wired down, and whether
7076 * this map has the only reference to the data in question.
7077 * In order to later verify this lookup, a "version"
7078 * is returned.
7079 *
7080 * The map MUST be locked by the caller and WILL be
7081 * locked on exit. In order to guarantee the
7082 * existence of the returned object, it is returned
7083 * locked.
7084 *
7085 * If a lookup is requested with "write protection"
7086 * specified, the map may be changed to perform virtual
7087 * copying operations, although the data referenced will
7088 * remain the same.
7089 */
7090kern_return_t
7091vm_map_lookup_locked(
7092 vm_map_t *var_map, /* IN/OUT */
91447636
A
7093 vm_map_offset_t vaddr,
7094 vm_prot_t fault_type,
1c79356b
A
7095 vm_map_version_t *out_version, /* OUT */
7096 vm_object_t *object, /* OUT */
7097 vm_object_offset_t *offset, /* OUT */
7098 vm_prot_t *out_prot, /* OUT */
7099 boolean_t *wired, /* OUT */
7100 int *behavior, /* OUT */
91447636
A
7101 vm_map_offset_t *lo_offset, /* OUT */
7102 vm_map_offset_t *hi_offset, /* OUT */
7103 vm_map_t *real_map)
1c79356b
A
7104{
7105 vm_map_entry_t entry;
7106 register vm_map_t map = *var_map;
7107 vm_map_t old_map = *var_map;
7108 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
7109 vm_map_offset_t cow_parent_vaddr = 0;
7110 vm_map_offset_t old_start = 0;
7111 vm_map_offset_t old_end = 0;
1c79356b
A
7112 register vm_prot_t prot;
7113
91447636 7114 *real_map = map;
1c79356b
A
7115 RetryLookup: ;
7116
7117 /*
7118 * If the map has an interesting hint, try it before calling
7119 * full blown lookup routine.
7120 */
1c79356b 7121 entry = map->hint;
1c79356b
A
7122
7123 if ((entry == vm_map_to_entry(map)) ||
7124 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7125 vm_map_entry_t tmp_entry;
7126
7127 /*
7128 * Entry was either not a valid hint, or the vaddr
7129 * was not contained in the entry, so do a full lookup.
7130 */
7131 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7132 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7133 vm_map_unlock(cow_sub_map_parent);
91447636
A
7134 if((*real_map != map)
7135 && (*real_map != cow_sub_map_parent))
7136 vm_map_unlock(*real_map);
1c79356b
A
7137 return KERN_INVALID_ADDRESS;
7138 }
7139
7140 entry = tmp_entry;
7141 }
7142 if(map == old_map) {
7143 old_start = entry->vme_start;
7144 old_end = entry->vme_end;
7145 }
7146
7147 /*
7148 * Handle submaps. Drop lock on upper map, submap is
7149 * returned locked.
7150 */
7151
7152submap_recurse:
7153 if (entry->is_sub_map) {
91447636
A
7154 vm_map_offset_t local_vaddr;
7155 vm_map_offset_t end_delta;
7156 vm_map_offset_t start_delta;
1c79356b
A
7157 vm_map_entry_t submap_entry;
7158 boolean_t mapped_needs_copy=FALSE;
7159
7160 local_vaddr = vaddr;
7161
7162 if ((!entry->needs_copy) && (entry->use_pmap)) {
91447636
A
7163 /* if real_map equals map we unlock below */
7164 if ((*real_map != map) &&
7165 (*real_map != cow_sub_map_parent))
7166 vm_map_unlock(*real_map);
7167 *real_map = entry->object.sub_map;
1c79356b
A
7168 }
7169
7170 if(entry->needs_copy) {
7171 if (!mapped_needs_copy) {
7172 if (vm_map_lock_read_to_write(map)) {
7173 vm_map_lock_read(map);
91447636
A
7174 if(*real_map == entry->object.sub_map)
7175 *real_map = map;
1c79356b
A
7176 goto RetryLookup;
7177 }
7178 vm_map_lock_read(entry->object.sub_map);
7179 cow_sub_map_parent = map;
7180 /* reset base to map before cow object */
7181 /* this is the map which will accept */
7182 /* the new cow object */
7183 old_start = entry->vme_start;
7184 old_end = entry->vme_end;
7185 cow_parent_vaddr = vaddr;
7186 mapped_needs_copy = TRUE;
7187 } else {
7188 vm_map_lock_read(entry->object.sub_map);
7189 if((cow_sub_map_parent != map) &&
91447636 7190 (*real_map != map))
1c79356b
A
7191 vm_map_unlock(map);
7192 }
7193 } else {
7194 vm_map_lock_read(entry->object.sub_map);
7195 /* leave map locked if it is a target */
7196 /* cow sub_map above otherwise, just */
7197 /* follow the maps down to the object */
7198 /* here we unlock knowing we are not */
7199 /* revisiting the map. */
91447636 7200 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
7201 vm_map_unlock_read(map);
7202 }
7203
7204 *var_map = map = entry->object.sub_map;
7205
7206 /* calculate the offset in the submap for vaddr */
7207 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
7208
7209RetrySubMap:
7210 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
7211 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
7212 vm_map_unlock(cow_sub_map_parent);
7213 }
91447636
A
7214 if((*real_map != map)
7215 && (*real_map != cow_sub_map_parent)) {
7216 vm_map_unlock(*real_map);
1c79356b 7217 }
91447636 7218 *real_map = map;
1c79356b
A
7219 return KERN_INVALID_ADDRESS;
7220 }
7221 /* find the attenuated shadow of the underlying object */
7222 /* on our target map */
7223
7224 /* in english the submap object may extend beyond the */
7225 /* region mapped by the entry or, may only fill a portion */
7226 /* of it. For our purposes, we only care if the object */
7227 /* doesn't fill. In this case the area which will */
7228 /* ultimately be clipped in the top map will only need */
7229 /* to be as big as the portion of the underlying entry */
7230 /* which is mapped */
7231 start_delta = submap_entry->vme_start > entry->offset ?
7232 submap_entry->vme_start - entry->offset : 0;
7233
7234 end_delta =
7235 (entry->offset + start_delta + (old_end - old_start)) <=
7236 submap_entry->vme_end ?
7237 0 : (entry->offset +
7238 (old_end - old_start))
7239 - submap_entry->vme_end;
7240
7241 old_start += start_delta;
7242 old_end -= end_delta;
7243
7244 if(submap_entry->is_sub_map) {
7245 entry = submap_entry;
7246 vaddr = local_vaddr;
7247 goto submap_recurse;
7248 }
7249
7250 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
7251
7252 vm_object_t copy_object;
91447636
A
7253 vm_map_offset_t local_start;
7254 vm_map_offset_t local_end;
0b4e3aa0 7255 boolean_t copied_slowly = FALSE;
1c79356b
A
7256
7257 if (vm_map_lock_read_to_write(map)) {
7258 vm_map_lock_read(map);
7259 old_start -= start_delta;
7260 old_end += end_delta;
7261 goto RetrySubMap;
7262 }
0b4e3aa0
A
7263
7264
1c79356b
A
7265 if (submap_entry->object.vm_object == VM_OBJECT_NULL) {
7266 submap_entry->object.vm_object =
7267 vm_object_allocate(
91447636 7268 (vm_map_size_t)
1c79356b
A
7269 (submap_entry->vme_end
7270 - submap_entry->vme_start));
91447636 7271 submap_entry->offset = 0;
1c79356b
A
7272 }
7273 local_start = local_vaddr -
7274 (cow_parent_vaddr - old_start);
7275 local_end = local_vaddr +
7276 (old_end - cow_parent_vaddr);
7277 vm_map_clip_start(map, submap_entry, local_start);
7278 vm_map_clip_end(map, submap_entry, local_end);
7279
7280 /* This is the COW case, lets connect */
7281 /* an entry in our space to the underlying */
7282 /* object in the submap, bypassing the */
7283 /* submap. */
0b4e3aa0
A
7284
7285
7286 if(submap_entry->wired_count != 0) {
7287 vm_object_lock(
7288 submap_entry->object.vm_object);
7289 vm_object_copy_slowly(
7290 submap_entry->object.vm_object,
7291 submap_entry->offset,
7292 submap_entry->vme_end -
7293 submap_entry->vme_start,
7294 FALSE,
7295 &copy_object);
7296 copied_slowly = TRUE;
7297 } else {
0b4e3aa0
A
7298 /* set up shadow object */
7299 copy_object = submap_entry->object.vm_object;
7300 vm_object_reference(copy_object);
7301 submap_entry->object.vm_object->shadowed = TRUE;
7302 submap_entry->needs_copy = TRUE;
0c530ab8
A
7303
7304 prot = submap_entry->protection & ~VM_PROT_WRITE;
7305#ifdef STACK_ONLY_NX
7306 if (submap_entry->alias != VM_MEMORY_STACK && prot)
7307 prot |= VM_PROT_EXECUTE;
7308#endif
0b4e3aa0
A
7309 vm_object_pmap_protect(
7310 submap_entry->object.vm_object,
1c79356b
A
7311 submap_entry->offset,
7312 submap_entry->vme_end -
7313 submap_entry->vme_start,
9bccf70c
A
7314 (submap_entry->is_shared
7315 || map->mapped) ?
1c79356b
A
7316 PMAP_NULL : map->pmap,
7317 submap_entry->vme_start,
0c530ab8 7318 prot);
0b4e3aa0 7319 }
1c79356b
A
7320
7321
7322 /* This works diffently than the */
7323 /* normal submap case. We go back */
7324 /* to the parent of the cow map and*/
7325 /* clip out the target portion of */
7326 /* the sub_map, substituting the */
7327 /* new copy object, */
7328
7329 vm_map_unlock(map);
7330 local_start = old_start;
7331 local_end = old_end;
7332 map = cow_sub_map_parent;
7333 *var_map = cow_sub_map_parent;
7334 vaddr = cow_parent_vaddr;
7335 cow_sub_map_parent = NULL;
7336
7337 if(!vm_map_lookup_entry(map,
7338 vaddr, &entry)) {
7339 vm_object_deallocate(
7340 copy_object);
7341 vm_map_lock_write_to_read(map);
7342 return KERN_INVALID_ADDRESS;
7343 }
7344
7345 /* clip out the portion of space */
7346 /* mapped by the sub map which */
7347 /* corresponds to the underlying */
7348 /* object */
7349 vm_map_clip_start(map, entry, local_start);
7350 vm_map_clip_end(map, entry, local_end);
7351
7352
7353 /* substitute copy object for */
7354 /* shared map entry */
7355 vm_map_deallocate(entry->object.sub_map);
7356 entry->is_sub_map = FALSE;
1c79356b 7357 entry->object.vm_object = copy_object;
1c79356b
A
7358
7359 entry->protection |= VM_PROT_WRITE;
7360 entry->max_protection |= VM_PROT_WRITE;
0b4e3aa0
A
7361 if(copied_slowly) {
7362 entry->offset = 0;
7363 entry->needs_copy = FALSE;
7364 entry->is_shared = FALSE;
7365 } else {
7366 entry->offset = submap_entry->offset;
7367 entry->needs_copy = TRUE;
7368 if(entry->inheritance == VM_INHERIT_SHARE)
7369 entry->inheritance = VM_INHERIT_COPY;
7370 if (map != old_map)
7371 entry->is_shared = TRUE;
7372 }
1c79356b 7373 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 7374 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
7375
7376 vm_map_lock_write_to_read(map);
7377 } else {
7378 if((cow_sub_map_parent)
91447636 7379 && (cow_sub_map_parent != *real_map)
1c79356b
A
7380 && (cow_sub_map_parent != map)) {
7381 vm_map_unlock(cow_sub_map_parent);
7382 }
7383 entry = submap_entry;
7384 vaddr = local_vaddr;
7385 }
7386 }
7387
7388 /*
7389 * Check whether this task is allowed to have
7390 * this page.
7391 */
6601e61a 7392 prot = entry->protection;
0c530ab8
A
7393
7394#ifdef STACK_ONLY_NX
7395 if (entry->alias != VM_MEMORY_STACK && prot)
7396 /*
7397 * HACK -- if not a stack, than allow execution
7398 */
7399 prot |= VM_PROT_EXECUTE;
7400#endif
1c79356b 7401 if ((fault_type & (prot)) != fault_type) {
0c530ab8
A
7402 if (*real_map != map) {
7403 vm_map_unlock(*real_map);
7404 }
7405 *real_map = map;
7406
7407 if ((fault_type & VM_PROT_EXECUTE) && prot)
7408 log_nx_failure((addr64_t)vaddr, prot);
7409
7410 return KERN_PROTECTION_FAILURE;
1c79356b
A
7411 }
7412
7413 /*
7414 * If this page is not pageable, we have to get
7415 * it for all possible accesses.
7416 */
7417
91447636
A
7418 *wired = (entry->wired_count != 0);
7419 if (*wired)
0c530ab8 7420 fault_type = prot;
1c79356b
A
7421
7422 /*
7423 * If the entry was copy-on-write, we either ...
7424 */
7425
7426 if (entry->needs_copy) {
7427 /*
7428 * If we want to write the page, we may as well
7429 * handle that now since we've got the map locked.
7430 *
7431 * If we don't need to write the page, we just
7432 * demote the permissions allowed.
7433 */
7434
91447636 7435 if ((fault_type & VM_PROT_WRITE) || *wired) {
1c79356b
A
7436 /*
7437 * Make a new object, and place it in the
7438 * object chain. Note that no new references
7439 * have appeared -- one just moved from the
7440 * map to the new object.
7441 */
7442
7443 if (vm_map_lock_read_to_write(map)) {
7444 vm_map_lock_read(map);
7445 goto RetryLookup;
7446 }
7447 vm_object_shadow(&entry->object.vm_object,
7448 &entry->offset,
91447636 7449 (vm_map_size_t) (entry->vme_end -
1c79356b
A
7450 entry->vme_start));
7451
7452 entry->object.vm_object->shadowed = TRUE;
7453 entry->needs_copy = FALSE;
7454 vm_map_lock_write_to_read(map);
7455 }
7456 else {
7457 /*
7458 * We're attempting to read a copy-on-write
7459 * page -- don't allow writes.
7460 */
7461
7462 prot &= (~VM_PROT_WRITE);
7463 }
7464 }
7465
7466 /*
7467 * Create an object if necessary.
7468 */
7469 if (entry->object.vm_object == VM_OBJECT_NULL) {
7470
7471 if (vm_map_lock_read_to_write(map)) {
7472 vm_map_lock_read(map);
7473 goto RetryLookup;
7474 }
7475
7476 entry->object.vm_object = vm_object_allocate(
91447636 7477 (vm_map_size_t)(entry->vme_end - entry->vme_start));
1c79356b
A
7478 entry->offset = 0;
7479 vm_map_lock_write_to_read(map);
7480 }
7481
7482 /*
7483 * Return the object/offset from this entry. If the entry
7484 * was copy-on-write or empty, it has been fixed up. Also
7485 * return the protection.
7486 */
7487
7488 *offset = (vaddr - entry->vme_start) + entry->offset;
7489 *object = entry->object.vm_object;
7490 *out_prot = prot;
7491 *behavior = entry->behavior;
7492 *lo_offset = entry->offset;
7493 *hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
7494
7495 /*
7496 * Lock the object to prevent it from disappearing
7497 */
7498
7499 vm_object_lock(*object);
7500
7501 /*
7502 * Save the version number
7503 */
7504
7505 out_version->main_timestamp = map->timestamp;
7506
7507 return KERN_SUCCESS;
7508}
7509
7510
7511/*
7512 * vm_map_verify:
7513 *
7514 * Verifies that the map in question has not changed
7515 * since the given version. If successful, the map
7516 * will not change until vm_map_verify_done() is called.
7517 */
7518boolean_t
7519vm_map_verify(
7520 register vm_map_t map,
7521 register vm_map_version_t *version) /* REF */
7522{
7523 boolean_t result;
7524
7525 vm_map_lock_read(map);
7526 result = (map->timestamp == version->main_timestamp);
7527
7528 if (!result)
7529 vm_map_unlock_read(map);
7530
7531 return(result);
7532}
7533
7534/*
7535 * vm_map_verify_done:
7536 *
7537 * Releases locks acquired by a vm_map_verify.
7538 *
7539 * This is now a macro in vm/vm_map.h. It does a
7540 * vm_map_unlock_read on the map.
7541 */
7542
7543
91447636
A
7544/*
7545 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
7546 * Goes away after regular vm_region_recurse function migrates to
7547 * 64 bits
7548 * vm_region_recurse: A form of vm_region which follows the
7549 * submaps in a target map
7550 *
7551 */
7552
7553kern_return_t
7554vm_map_region_recurse_64(
7555 vm_map_t map,
7556 vm_map_offset_t *address, /* IN/OUT */
7557 vm_map_size_t *size, /* OUT */
7558 natural_t *nesting_depth, /* IN/OUT */
7559 vm_region_submap_info_64_t submap_info, /* IN/OUT */
7560 mach_msg_type_number_t *count) /* IN/OUT */
7561{
7562 vm_region_extended_info_data_t extended;
7563 vm_map_entry_t tmp_entry;
7564 vm_map_offset_t user_address;
7565 unsigned int user_max_depth;
7566
7567 /*
7568 * "curr_entry" is the VM map entry preceding or including the
7569 * address we're looking for.
7570 * "curr_map" is the map or sub-map containing "curr_entry".
7571 * "curr_offset" is the cumulated offset of "curr_map" in the
7572 * target task's address space.
7573 * "curr_depth" is the depth of "curr_map" in the chain of
7574 * sub-maps.
7575 * "curr_max_offset" is the maximum offset we should take into
7576 * account in the current map. It may be smaller than the current
7577 * map's "max_offset" because we might not have mapped it all in
7578 * the upper level map.
7579 */
7580 vm_map_entry_t curr_entry;
7581 vm_map_offset_t curr_offset;
7582 vm_map_t curr_map;
7583 unsigned int curr_depth;
7584 vm_map_offset_t curr_max_offset;
7585
7586 /*
7587 * "next_" is the same as "curr_" but for the VM region immediately
7588 * after the address we're looking for. We need to keep track of this
7589 * too because we want to return info about that region if the
7590 * address we're looking for is not mapped.
7591 */
7592 vm_map_entry_t next_entry;
7593 vm_map_offset_t next_offset;
7594 vm_map_t next_map;
7595 unsigned int next_depth;
7596 vm_map_offset_t next_max_offset;
7597
7598 if (map == VM_MAP_NULL) {
7599 /* no address space to work on */
7600 return KERN_INVALID_ARGUMENT;
7601 }
7602
7603 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
7604 /* "info" structure is not big enough and would overflow */
7605 return KERN_INVALID_ARGUMENT;
7606 }
7607
7608 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
7609
7610 user_address = *address;
7611 user_max_depth = *nesting_depth;
7612
7613 curr_entry = NULL;
7614 curr_map = map;
7615 curr_offset = 0;
7616 curr_depth = 0;
7617 curr_max_offset = curr_map->max_offset;
7618
7619 next_entry = NULL;
7620 next_map = NULL;
7621 next_offset = 0;
7622 next_depth = 0;
7623 next_max_offset = curr_max_offset;
7624
7625 if (not_in_kdp) {
7626 vm_map_lock_read(curr_map);
7627 }
7628
7629 for (;;) {
7630 if (vm_map_lookup_entry(curr_map,
7631 user_address - curr_offset,
7632 &tmp_entry)) {
7633 /* tmp_entry contains the address we're looking for */
7634 curr_entry = tmp_entry;
7635 } else {
7636 /*
7637 * The address is not mapped. "tmp_entry" is the
7638 * map entry preceding the address. We want the next
7639 * one, if it exists.
7640 */
7641 curr_entry = tmp_entry->vme_next;
7642 if (curr_entry == vm_map_to_entry(curr_map) ||
7643 curr_entry->vme_start >= curr_max_offset) {
7644 /* no next entry at this level: stop looking */
7645 if (not_in_kdp) {
7646 vm_map_unlock_read(curr_map);
7647 }
7648 curr_entry = NULL;
7649 curr_map = NULL;
7650 curr_offset = 0;
7651 curr_depth = 0;
7652 curr_max_offset = 0;
7653 break;
7654 }
7655 }
7656
7657 /*
7658 * Is the next entry at this level closer to the address (or
7659 * deeper in the submap chain) than the one we had
7660 * so far ?
7661 */
7662 tmp_entry = curr_entry->vme_next;
7663 if (tmp_entry == vm_map_to_entry(curr_map)) {
7664 /* no next entry at this level */
7665 } else if (tmp_entry->vme_start >= curr_max_offset) {
7666 /*
7667 * tmp_entry is beyond the scope of what we mapped of
7668 * this submap in the upper level: ignore it.
7669 */
7670 } else if ((next_entry == NULL) ||
7671 (tmp_entry->vme_start + curr_offset <=
7672 next_entry->vme_start + next_offset)) {
7673 /*
7674 * We didn't have a "next_entry" or this one is
7675 * closer to the address we're looking for:
7676 * use this "tmp_entry" as the new "next_entry".
7677 */
7678 if (next_entry != NULL) {
7679 /* unlock the last "next_map" */
7680 if (next_map != curr_map && not_in_kdp) {
7681 vm_map_unlock_read(next_map);
7682 }
7683 }
7684 next_entry = tmp_entry;
7685 next_map = curr_map;
7686 next_offset = curr_offset;
7687 next_depth = curr_depth;
7688 next_max_offset = curr_max_offset;
7689 }
7690
7691 if (!curr_entry->is_sub_map ||
7692 curr_depth >= user_max_depth) {
7693 /*
7694 * We hit a leaf map or we reached the maximum depth
7695 * we could, so stop looking. Keep the current map
7696 * locked.
7697 */
7698 break;
7699 }
7700
7701 /*
7702 * Get down to the next submap level.
7703 */
7704
7705 /*
7706 * Lock the next level and unlock the current level,
7707 * unless we need to keep it locked to access the "next_entry"
7708 * later.
7709 */
7710 if (not_in_kdp) {
7711 vm_map_lock_read(curr_entry->object.sub_map);
7712 }
7713 if (curr_map == next_map) {
7714 /* keep "next_map" locked in case we need it */
7715 } else {
7716 /* release this map */
7717 vm_map_unlock_read(curr_map);
7718 }
7719
7720 /*
7721 * Adjust the offset. "curr_entry" maps the submap
7722 * at relative address "curr_entry->vme_start" in the
7723 * curr_map but skips the first "curr_entry->offset"
7724 * bytes of the submap.
7725 * "curr_offset" always represents the offset of a virtual
7726 * address in the curr_map relative to the absolute address
7727 * space (i.e. the top-level VM map).
7728 */
7729 curr_offset +=
7730 (curr_entry->vme_start - curr_entry->offset);
7731 /* switch to the submap */
7732 curr_map = curr_entry->object.sub_map;
7733 curr_depth++;
7734 /*
7735 * "curr_max_offset" allows us to keep track of the
7736 * portion of the submap that is actually mapped at this level:
7737 * the rest of that submap is irrelevant to us, since it's not
7738 * mapped here.
7739 * The relevant portion of the map starts at
7740 * "curr_entry->offset" up to the size of "curr_entry".
7741 */
7742 curr_max_offset =
7743 curr_entry->vme_end - curr_entry->vme_start +
7744 curr_entry->offset;
7745 curr_entry = NULL;
7746 }
7747
7748 if (curr_entry == NULL) {
7749 /* no VM region contains the address... */
7750 if (next_entry == NULL) {
7751 /* ... and no VM region follows it either */
7752 return KERN_INVALID_ADDRESS;
7753 }
7754 /* ... gather info about the next VM region */
7755 curr_entry = next_entry;
7756 curr_map = next_map; /* still locked ... */
7757 curr_offset = next_offset;
7758 curr_depth = next_depth;
7759 curr_max_offset = next_max_offset;
7760 } else {
7761 /* we won't need "next_entry" after all */
7762 if (next_entry != NULL) {
7763 /* release "next_map" */
7764 if (next_map != curr_map && not_in_kdp) {
7765 vm_map_unlock_read(next_map);
7766 }
7767 }
7768 }
7769 next_entry = NULL;
7770 next_map = NULL;
7771 next_offset = 0;
7772 next_depth = 0;
7773 next_max_offset = 0;
7774
7775 *nesting_depth = curr_depth;
7776 *size = curr_entry->vme_end - curr_entry->vme_start;
7777 *address = curr_entry->vme_start + curr_offset;
7778
7779 submap_info->user_tag = curr_entry->alias;
7780 submap_info->offset = curr_entry->offset;
7781 submap_info->protection = curr_entry->protection;
7782 submap_info->inheritance = curr_entry->inheritance;
7783 submap_info->max_protection = curr_entry->max_protection;
7784 submap_info->behavior = curr_entry->behavior;
7785 submap_info->user_wired_count = curr_entry->user_wired_count;
7786 submap_info->is_submap = curr_entry->is_sub_map;
7787 submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
7788
7789 extended.pages_resident = 0;
7790 extended.pages_swapped_out = 0;
7791 extended.pages_shared_now_private = 0;
7792 extended.pages_dirtied = 0;
7793 extended.external_pager = 0;
7794 extended.shadow_depth = 0;
7795
7796 if (not_in_kdp) {
7797 if (!curr_entry->is_sub_map) {
7798 vm_map_region_walk(curr_map,
7799 curr_entry->vme_start,
7800 curr_entry,
7801 curr_entry->offset,
7802 (curr_entry->vme_end -
7803 curr_entry->vme_start),
7804 &extended);
7805 submap_info->share_mode = extended.share_mode;
7806 if (extended.external_pager &&
7807 extended.ref_count == 2 &&
7808 extended.share_mode == SM_SHARED) {
7809 submap_info->share_mode = SM_PRIVATE;
7810 }
7811 submap_info->ref_count = extended.ref_count;
7812 } else {
7813 if (curr_entry->use_pmap) {
7814 submap_info->share_mode = SM_TRUESHARED;
7815 } else {
7816 submap_info->share_mode = SM_PRIVATE;
7817 }
7818 submap_info->ref_count =
7819 curr_entry->object.sub_map->ref_count;
7820 }
7821 }
7822
7823 submap_info->pages_resident = extended.pages_resident;
7824 submap_info->pages_swapped_out = extended.pages_swapped_out;
7825 submap_info->pages_shared_now_private =
7826 extended.pages_shared_now_private;
7827 submap_info->pages_dirtied = extended.pages_dirtied;
7828 submap_info->external_pager = extended.external_pager;
7829 submap_info->shadow_depth = extended.shadow_depth;
7830
7831 if (not_in_kdp) {
7832 vm_map_unlock_read(curr_map);
7833 }
7834
7835 return KERN_SUCCESS;
7836}
7837
1c79356b
A
7838/*
7839 * vm_region:
7840 *
7841 * User call to obtain information about a region in
7842 * a task's address map. Currently, only one flavor is
7843 * supported.
7844 *
7845 * XXX The reserved and behavior fields cannot be filled
7846 * in until the vm merge from the IK is completed, and
7847 * vm_reserve is implemented.
1c79356b
A
7848 */
7849
7850kern_return_t
91447636 7851vm_map_region(
1c79356b 7852 vm_map_t map,
91447636
A
7853 vm_map_offset_t *address, /* IN/OUT */
7854 vm_map_size_t *size, /* OUT */
1c79356b
A
7855 vm_region_flavor_t flavor, /* IN */
7856 vm_region_info_t info, /* OUT */
91447636
A
7857 mach_msg_type_number_t *count, /* IN/OUT */
7858 mach_port_t *object_name) /* OUT */
1c79356b
A
7859{
7860 vm_map_entry_t tmp_entry;
1c79356b 7861 vm_map_entry_t entry;
91447636 7862 vm_map_offset_t start;
1c79356b
A
7863
7864 if (map == VM_MAP_NULL)
7865 return(KERN_INVALID_ARGUMENT);
7866
7867 switch (flavor) {
91447636 7868
1c79356b 7869 case VM_REGION_BASIC_INFO:
91447636 7870 /* legacy for old 32-bit objects info */
1c79356b 7871 {
91447636
A
7872 vm_region_basic_info_t basic;
7873
1c79356b
A
7874 if (*count < VM_REGION_BASIC_INFO_COUNT)
7875 return(KERN_INVALID_ARGUMENT);
7876
7877 basic = (vm_region_basic_info_t) info;
7878 *count = VM_REGION_BASIC_INFO_COUNT;
7879
7880 vm_map_lock_read(map);
7881
7882 start = *address;
7883 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7884 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7885 vm_map_unlock_read(map);
7886 return(KERN_INVALID_ADDRESS);
7887 }
7888 } else {
7889 entry = tmp_entry;
7890 }
7891
7892 start = entry->vme_start;
7893
91447636
A
7894 basic->offset = (uint32_t)entry->offset;
7895 basic->protection = entry->protection;
7896 basic->inheritance = entry->inheritance;
7897 basic->max_protection = entry->max_protection;
7898 basic->behavior = entry->behavior;
7899 basic->user_wired_count = entry->user_wired_count;
7900 basic->reserved = entry->is_sub_map;
7901 *address = start;
7902 *size = (entry->vme_end - start);
7903
7904 if (object_name) *object_name = IP_NULL;
7905 if (entry->is_sub_map) {
7906 basic->shared = FALSE;
7907 } else {
7908 basic->shared = entry->is_shared;
7909 }
7910
7911 vm_map_unlock_read(map);
7912 return(KERN_SUCCESS);
7913 }
7914
7915 case VM_REGION_BASIC_INFO_64:
7916 {
7917 vm_region_basic_info_64_t basic;
7918
7919 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
7920 return(KERN_INVALID_ARGUMENT);
7921
7922 basic = (vm_region_basic_info_64_t) info;
7923 *count = VM_REGION_BASIC_INFO_COUNT_64;
7924
7925 vm_map_lock_read(map);
7926
7927 start = *address;
7928 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7929 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7930 vm_map_unlock_read(map);
7931 return(KERN_INVALID_ADDRESS);
7932 }
7933 } else {
7934 entry = tmp_entry;
7935 }
7936
7937 start = entry->vme_start;
7938
1c79356b
A
7939 basic->offset = entry->offset;
7940 basic->protection = entry->protection;
7941 basic->inheritance = entry->inheritance;
7942 basic->max_protection = entry->max_protection;
7943 basic->behavior = entry->behavior;
7944 basic->user_wired_count = entry->user_wired_count;
7945 basic->reserved = entry->is_sub_map;
7946 *address = start;
7947 *size = (entry->vme_end - start);
7948
7949 if (object_name) *object_name = IP_NULL;
7950 if (entry->is_sub_map) {
7951 basic->shared = FALSE;
7952 } else {
7953 basic->shared = entry->is_shared;
7954 }
7955
7956 vm_map_unlock_read(map);
7957 return(KERN_SUCCESS);
7958 }
7959 case VM_REGION_EXTENDED_INFO:
7960 {
91447636 7961 vm_region_extended_info_t extended;
1c79356b
A
7962
7963 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
7964 return(KERN_INVALID_ARGUMENT);
7965
7966 extended = (vm_region_extended_info_t) info;
7967 *count = VM_REGION_EXTENDED_INFO_COUNT;
7968
7969 vm_map_lock_read(map);
7970
7971 start = *address;
7972 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7973 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7974 vm_map_unlock_read(map);
7975 return(KERN_INVALID_ADDRESS);
7976 }
7977 } else {
7978 entry = tmp_entry;
7979 }
7980 start = entry->vme_start;
7981
7982 extended->protection = entry->protection;
7983 extended->user_tag = entry->alias;
7984 extended->pages_resident = 0;
7985 extended->pages_swapped_out = 0;
7986 extended->pages_shared_now_private = 0;
0b4e3aa0 7987 extended->pages_dirtied = 0;
1c79356b
A
7988 extended->external_pager = 0;
7989 extended->shadow_depth = 0;
7990
91447636 7991 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended);
1c79356b
A
7992
7993 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
7994 extended->share_mode = SM_PRIVATE;
7995
7996 if (object_name)
7997 *object_name = IP_NULL;
7998 *address = start;
7999 *size = (entry->vme_end - start);
8000
8001 vm_map_unlock_read(map);
8002 return(KERN_SUCCESS);
8003 }
8004 case VM_REGION_TOP_INFO:
8005 {
91447636 8006 vm_region_top_info_t top;
1c79356b
A
8007
8008 if (*count < VM_REGION_TOP_INFO_COUNT)
8009 return(KERN_INVALID_ARGUMENT);
8010
8011 top = (vm_region_top_info_t) info;
8012 *count = VM_REGION_TOP_INFO_COUNT;
8013
8014 vm_map_lock_read(map);
8015
8016 start = *address;
8017 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8018 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8019 vm_map_unlock_read(map);
8020 return(KERN_INVALID_ADDRESS);
8021 }
8022 } else {
8023 entry = tmp_entry;
8024
8025 }
8026 start = entry->vme_start;
8027
8028 top->private_pages_resident = 0;
8029 top->shared_pages_resident = 0;
8030
91447636 8031 vm_map_region_top_walk(entry, top);
1c79356b
A
8032
8033 if (object_name)
8034 *object_name = IP_NULL;
8035 *address = start;
8036 *size = (entry->vme_end - start);
8037
8038 vm_map_unlock_read(map);
8039 return(KERN_SUCCESS);
8040 }
8041 default:
8042 return(KERN_INVALID_ARGUMENT);
8043 }
8044}
8045
0c530ab8 8046void
91447636
A
8047vm_map_region_top_walk(
8048 vm_map_entry_t entry,
8049 vm_region_top_info_t top)
1c79356b 8050{
91447636
A
8051 register struct vm_object *obj, *tmp_obj;
8052 register int ref_count;
1c79356b 8053
91447636
A
8054 if (entry->object.vm_object == 0 || entry->is_sub_map) {
8055 top->share_mode = SM_EMPTY;
8056 top->ref_count = 0;
8057 top->obj_id = 0;
8058 return;
1c79356b 8059 }
91447636
A
8060 {
8061 obj = entry->object.vm_object;
1c79356b 8062
91447636 8063 vm_object_lock(obj);
1c79356b 8064
91447636
A
8065 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8066 ref_count--;
1c79356b 8067
91447636
A
8068 if (obj->shadow) {
8069 if (ref_count == 1)
8070 top->private_pages_resident = obj->resident_page_count;
8071 else
8072 top->shared_pages_resident = obj->resident_page_count;
8073 top->ref_count = ref_count;
8074 top->share_mode = SM_COW;
8075
8076 while ((tmp_obj = obj->shadow)) {
8077 vm_object_lock(tmp_obj);
8078 vm_object_unlock(obj);
8079 obj = tmp_obj;
1c79356b 8080
91447636
A
8081 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8082 ref_count--;
1c79356b 8083
91447636
A
8084 top->shared_pages_resident += obj->resident_page_count;
8085 top->ref_count += ref_count - 1;
1c79356b 8086 }
91447636
A
8087 } else {
8088 if (entry->needs_copy) {
8089 top->share_mode = SM_COW;
8090 top->shared_pages_resident = obj->resident_page_count;
1c79356b 8091 } else {
91447636
A
8092 if (ref_count == 1 ||
8093 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
8094 top->share_mode = SM_PRIVATE;
8095 top->private_pages_resident = obj->resident_page_count;
8096 } else {
8097 top->share_mode = SM_SHARED;
8098 top->shared_pages_resident = obj->resident_page_count;
8099 }
1c79356b 8100 }
91447636
A
8101 top->ref_count = ref_count;
8102 }
8103 top->obj_id = (int)obj;
1c79356b 8104
91447636 8105 vm_object_unlock(obj);
1c79356b 8106 }
91447636
A
8107}
8108
0c530ab8 8109void
91447636
A
8110vm_map_region_walk(
8111 vm_map_t map,
8112 vm_map_offset_t va,
8113 vm_map_entry_t entry,
8114 vm_object_offset_t offset,
8115 vm_object_size_t range,
8116 vm_region_extended_info_t extended)
8117{
8118 register struct vm_object *obj, *tmp_obj;
8119 register vm_map_offset_t last_offset;
8120 register int i;
8121 register int ref_count;
8122 struct vm_object *shadow_object;
8123 int shadow_depth;
8124
8125 if ((entry->object.vm_object == 0) ||
8126 (entry->is_sub_map) ||
8127 (entry->object.vm_object->phys_contiguous)) {
8128 extended->share_mode = SM_EMPTY;
8129 extended->ref_count = 0;
8130 return;
1c79356b 8131 }
91447636
A
8132 {
8133 obj = entry->object.vm_object;
1c79356b 8134
91447636 8135 vm_object_lock(obj);
1c79356b 8136
91447636
A
8137 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8138 ref_count--;
1c79356b 8139
91447636
A
8140 for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE_64, va += PAGE_SIZE)
8141 vm_map_region_look_for_page(map, va, obj, offset, ref_count, 0, extended);
8142
8143 shadow_object = obj->shadow;
8144 shadow_depth = 0;
8145 if (shadow_object != VM_OBJECT_NULL) {
8146 vm_object_lock(shadow_object);
8147 for (;
8148 shadow_object != VM_OBJECT_NULL;
8149 shadow_depth++) {
8150 vm_object_t next_shadow;
8151
8152 next_shadow = shadow_object->shadow;
8153 if (next_shadow) {
8154 vm_object_lock(next_shadow);
8155 }
8156 vm_object_unlock(shadow_object);
8157 shadow_object = next_shadow;
8158 }
8159 }
8160 extended->shadow_depth = shadow_depth;
1c79356b 8161
91447636
A
8162 if (extended->shadow_depth || entry->needs_copy)
8163 extended->share_mode = SM_COW;
8164 else {
8165 if (ref_count == 1)
8166 extended->share_mode = SM_PRIVATE;
8167 else {
8168 if (obj->true_share)
8169 extended->share_mode = SM_TRUESHARED;
8170 else
8171 extended->share_mode = SM_SHARED;
8172 }
8173 }
8174 extended->ref_count = ref_count - extended->shadow_depth;
8175
8176 for (i = 0; i < extended->shadow_depth; i++) {
8177 if ((tmp_obj = obj->shadow) == 0)
8178 break;
8179 vm_object_lock(tmp_obj);
8180 vm_object_unlock(obj);
1c79356b 8181
91447636
A
8182 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
8183 ref_count--;
1c79356b 8184
91447636
A
8185 extended->ref_count += ref_count;
8186 obj = tmp_obj;
8187 }
8188 vm_object_unlock(obj);
1c79356b 8189
91447636
A
8190 if (extended->share_mode == SM_SHARED) {
8191 register vm_map_entry_t cur;
8192 register vm_map_entry_t last;
8193 int my_refs;
8194
8195 obj = entry->object.vm_object;
8196 last = vm_map_to_entry(map);
8197 my_refs = 0;
8198
8199 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8200 ref_count--;
8201 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
8202 my_refs += vm_map_region_count_obj_refs(cur, obj);
8203
8204 if (my_refs == ref_count)
8205 extended->share_mode = SM_PRIVATE_ALIASED;
8206 else if (my_refs > 1)
8207 extended->share_mode = SM_SHARED_ALIASED;
8208 }
8209 }
1c79356b
A
8210}
8211
1c79356b 8212
91447636
A
8213/* object is locked on entry and locked on return */
8214
8215
8216static void
8217vm_map_region_look_for_page(
8218 __unused vm_map_t map,
8219 __unused vm_map_offset_t va,
8220 vm_object_t object,
8221 vm_object_offset_t offset,
8222 int max_refcnt,
8223 int depth,
8224 vm_region_extended_info_t extended)
1c79356b 8225{
91447636
A
8226 register vm_page_t p;
8227 register vm_object_t shadow;
8228 register int ref_count;
8229 vm_object_t caller_object;
8230
8231 shadow = object->shadow;
8232 caller_object = object;
1c79356b 8233
91447636
A
8234
8235 while (TRUE) {
1c79356b 8236
91447636
A
8237 if ( !(object->pager_trusted) && !(object->internal))
8238 extended->external_pager = 1;
1c79356b 8239
91447636
A
8240 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
8241 if (shadow && (max_refcnt == 1))
8242 extended->pages_shared_now_private++;
1c79356b 8243
91447636
A
8244 if (!p->fictitious &&
8245 (p->dirty || pmap_is_modified(p->phys_page)))
8246 extended->pages_dirtied++;
1c79356b 8247
91447636
A
8248 extended->pages_resident++;
8249
8250 if(object != caller_object)
8251 vm_object_unlock(object);
8252
8253 return;
1c79356b 8254 }
91447636
A
8255 if (object->existence_map) {
8256 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
1c79356b 8257
91447636 8258 extended->pages_swapped_out++;
1c79356b 8259
91447636
A
8260 if(object != caller_object)
8261 vm_object_unlock(object);
1c79356b 8262
91447636
A
8263 return;
8264 }
1c79356b 8265 }
91447636
A
8266 if (shadow) {
8267 vm_object_lock(shadow);
1c79356b 8268
91447636
A
8269 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
8270 ref_count--;
1c79356b 8271
91447636
A
8272 if (++depth > extended->shadow_depth)
8273 extended->shadow_depth = depth;
1c79356b 8274
91447636
A
8275 if (ref_count > max_refcnt)
8276 max_refcnt = ref_count;
8277
8278 if(object != caller_object)
8279 vm_object_unlock(object);
8280
8281 offset = offset + object->shadow_offset;
8282 object = shadow;
8283 shadow = object->shadow;
8284 continue;
1c79356b 8285 }
91447636
A
8286 if(object != caller_object)
8287 vm_object_unlock(object);
8288 break;
8289 }
8290}
1c79356b 8291
91447636
A
8292static int
8293vm_map_region_count_obj_refs(
8294 vm_map_entry_t entry,
8295 vm_object_t object)
8296{
8297 register int ref_count;
8298 register vm_object_t chk_obj;
8299 register vm_object_t tmp_obj;
1c79356b 8300
91447636
A
8301 if (entry->object.vm_object == 0)
8302 return(0);
1c79356b 8303
91447636
A
8304 if (entry->is_sub_map)
8305 return(0);
8306 else {
8307 ref_count = 0;
1c79356b 8308
91447636
A
8309 chk_obj = entry->object.vm_object;
8310 vm_object_lock(chk_obj);
1c79356b 8311
91447636
A
8312 while (chk_obj) {
8313 if (chk_obj == object)
8314 ref_count++;
8315 tmp_obj = chk_obj->shadow;
8316 if (tmp_obj)
8317 vm_object_lock(tmp_obj);
8318 vm_object_unlock(chk_obj);
1c79356b 8319
91447636
A
8320 chk_obj = tmp_obj;
8321 }
1c79356b 8322 }
91447636 8323 return(ref_count);
1c79356b
A
8324}
8325
8326
8327/*
91447636
A
8328 * Routine: vm_map_simplify
8329 *
8330 * Description:
8331 * Attempt to simplify the map representation in
8332 * the vicinity of the given starting address.
8333 * Note:
8334 * This routine is intended primarily to keep the
8335 * kernel maps more compact -- they generally don't
8336 * benefit from the "expand a map entry" technology
8337 * at allocation time because the adjacent entry
8338 * is often wired down.
1c79356b 8339 */
91447636
A
8340void
8341vm_map_simplify_entry(
8342 vm_map_t map,
8343 vm_map_entry_t this_entry)
1c79356b 8344{
91447636 8345 vm_map_entry_t prev_entry;
1c79356b 8346
91447636 8347 counter(c_vm_map_simplify_entry_called++);
1c79356b 8348
91447636 8349 prev_entry = this_entry->vme_prev;
1c79356b 8350
91447636
A
8351 if ((this_entry != vm_map_to_entry(map)) &&
8352 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 8353
91447636 8354 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 8355
91447636
A
8356 (prev_entry->is_sub_map == FALSE) &&
8357 (this_entry->is_sub_map == FALSE) &&
1c79356b 8358
91447636
A
8359 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
8360 ((prev_entry->offset + (prev_entry->vme_end -
8361 prev_entry->vme_start))
8362 == this_entry->offset) &&
1c79356b 8363
91447636
A
8364 (prev_entry->inheritance == this_entry->inheritance) &&
8365 (prev_entry->protection == this_entry->protection) &&
8366 (prev_entry->max_protection == this_entry->max_protection) &&
8367 (prev_entry->behavior == this_entry->behavior) &&
8368 (prev_entry->alias == this_entry->alias) &&
8369 (prev_entry->wired_count == this_entry->wired_count) &&
8370 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 8371
91447636 8372 (prev_entry->needs_copy == this_entry->needs_copy) &&
1c79356b 8373
91447636
A
8374 (prev_entry->use_pmap == FALSE) &&
8375 (this_entry->use_pmap == FALSE) &&
8376 (prev_entry->in_transition == FALSE) &&
8377 (this_entry->in_transition == FALSE) &&
8378 (prev_entry->needs_wakeup == FALSE) &&
8379 (this_entry->needs_wakeup == FALSE) &&
8380 (prev_entry->is_shared == FALSE) &&
8381 (this_entry->is_shared == FALSE)
8382 ) {
8383 _vm_map_entry_unlink(&map->hdr, prev_entry);
8384 this_entry->vme_start = prev_entry->vme_start;
8385 this_entry->offset = prev_entry->offset;
8386 vm_object_deallocate(prev_entry->object.vm_object);
8387 vm_map_entry_dispose(map, prev_entry);
0c530ab8 8388 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 8389 counter(c_vm_map_simplified++);
1c79356b 8390 }
91447636 8391}
1c79356b 8392
91447636
A
8393void
8394vm_map_simplify(
8395 vm_map_t map,
8396 vm_map_offset_t start)
8397{
8398 vm_map_entry_t this_entry;
1c79356b 8399
91447636
A
8400 vm_map_lock(map);
8401 if (vm_map_lookup_entry(map, start, &this_entry)) {
8402 vm_map_simplify_entry(map, this_entry);
8403 vm_map_simplify_entry(map, this_entry->vme_next);
8404 }
8405 counter(c_vm_map_simplify_called++);
8406 vm_map_unlock(map);
8407}
1c79356b 8408
91447636
A
8409static void
8410vm_map_simplify_range(
8411 vm_map_t map,
8412 vm_map_offset_t start,
8413 vm_map_offset_t end)
8414{
8415 vm_map_entry_t entry;
1c79356b 8416
91447636
A
8417 /*
8418 * The map should be locked (for "write") by the caller.
8419 */
1c79356b 8420
91447636
A
8421 if (start >= end) {
8422 /* invalid address range */
8423 return;
8424 }
1c79356b 8425
91447636
A
8426 if (!vm_map_lookup_entry(map, start, &entry)) {
8427 /* "start" is not mapped and "entry" ends before "start" */
8428 if (entry == vm_map_to_entry(map)) {
8429 /* start with first entry in the map */
8430 entry = vm_map_first_entry(map);
8431 } else {
8432 /* start with next entry */
8433 entry = entry->vme_next;
8434 }
8435 }
8436
8437 while (entry != vm_map_to_entry(map) &&
8438 entry->vme_start <= end) {
8439 /* try and coalesce "entry" with its previous entry */
8440 vm_map_simplify_entry(map, entry);
8441 entry = entry->vme_next;
8442 }
8443}
1c79356b 8444
1c79356b 8445
91447636
A
8446/*
8447 * Routine: vm_map_machine_attribute
8448 * Purpose:
8449 * Provide machine-specific attributes to mappings,
8450 * such as cachability etc. for machines that provide
8451 * them. NUMA architectures and machines with big/strange
8452 * caches will use this.
8453 * Note:
8454 * Responsibilities for locking and checking are handled here,
8455 * everything else in the pmap module. If any non-volatile
8456 * information must be kept, the pmap module should handle
8457 * it itself. [This assumes that attributes do not
8458 * need to be inherited, which seems ok to me]
8459 */
8460kern_return_t
8461vm_map_machine_attribute(
8462 vm_map_t map,
8463 vm_map_offset_t start,
8464 vm_map_offset_t end,
8465 vm_machine_attribute_t attribute,
8466 vm_machine_attribute_val_t* value) /* IN/OUT */
8467{
8468 kern_return_t ret;
8469 vm_map_size_t sync_size;
8470 vm_map_entry_t entry;
8471
8472 if (start < vm_map_min(map) || end > vm_map_max(map))
8473 return KERN_INVALID_ADDRESS;
1c79356b 8474
91447636
A
8475 /* Figure how much memory we need to flush (in page increments) */
8476 sync_size = end - start;
1c79356b 8477
91447636
A
8478 vm_map_lock(map);
8479
8480 if (attribute != MATTR_CACHE) {
8481 /* If we don't have to find physical addresses, we */
8482 /* don't have to do an explicit traversal here. */
8483 ret = pmap_attribute(map->pmap, start, end-start,
8484 attribute, value);
8485 vm_map_unlock(map);
8486 return ret;
8487 }
1c79356b 8488
91447636 8489 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 8490
91447636
A
8491 while(sync_size) {
8492 if (vm_map_lookup_entry(map, start, &entry)) {
8493 vm_map_size_t sub_size;
8494 if((entry->vme_end - start) > sync_size) {
8495 sub_size = sync_size;
8496 sync_size = 0;
8497 } else {
8498 sub_size = entry->vme_end - start;
8499 sync_size -= sub_size;
8500 }
8501 if(entry->is_sub_map) {
8502 vm_map_offset_t sub_start;
8503 vm_map_offset_t sub_end;
1c79356b 8504
91447636
A
8505 sub_start = (start - entry->vme_start)
8506 + entry->offset;
8507 sub_end = sub_start + sub_size;
8508 vm_map_machine_attribute(
8509 entry->object.sub_map,
8510 sub_start,
8511 sub_end,
8512 attribute, value);
8513 } else {
8514 if(entry->object.vm_object) {
8515 vm_page_t m;
8516 vm_object_t object;
8517 vm_object_t base_object;
8518 vm_object_t last_object;
8519 vm_object_offset_t offset;
8520 vm_object_offset_t base_offset;
8521 vm_map_size_t range;
8522 range = sub_size;
8523 offset = (start - entry->vme_start)
8524 + entry->offset;
8525 base_offset = offset;
8526 object = entry->object.vm_object;
8527 base_object = object;
8528 last_object = NULL;
1c79356b 8529
91447636 8530 vm_object_lock(object);
1c79356b 8531
91447636
A
8532 while (range) {
8533 m = vm_page_lookup(
8534 object, offset);
1c79356b 8535
91447636
A
8536 if (m && !m->fictitious) {
8537 ret =
8538 pmap_attribute_cache_sync(
8539 m->phys_page,
8540 PAGE_SIZE,
8541 attribute, value);
8542
8543 } else if (object->shadow) {
8544 offset = offset + object->shadow_offset;
8545 last_object = object;
8546 object = object->shadow;
8547 vm_object_lock(last_object->shadow);
8548 vm_object_unlock(last_object);
8549 continue;
8550 }
8551 range -= PAGE_SIZE;
1c79356b 8552
91447636
A
8553 if (base_object != object) {
8554 vm_object_unlock(object);
8555 vm_object_lock(base_object);
8556 object = base_object;
8557 }
8558 /* Bump to the next page */
8559 base_offset += PAGE_SIZE;
8560 offset = base_offset;
8561 }
8562 vm_object_unlock(object);
8563 }
8564 }
8565 start += sub_size;
8566 } else {
8567 vm_map_unlock(map);
8568 return KERN_FAILURE;
8569 }
8570
1c79356b 8571 }
e5568f75 8572
91447636 8573 vm_map_unlock(map);
e5568f75 8574
91447636
A
8575 return ret;
8576}
e5568f75 8577
91447636
A
8578/*
8579 * vm_map_behavior_set:
8580 *
8581 * Sets the paging reference behavior of the specified address
8582 * range in the target map. Paging reference behavior affects
8583 * how pagein operations resulting from faults on the map will be
8584 * clustered.
8585 */
8586kern_return_t
8587vm_map_behavior_set(
8588 vm_map_t map,
8589 vm_map_offset_t start,
8590 vm_map_offset_t end,
8591 vm_behavior_t new_behavior)
8592{
8593 register vm_map_entry_t entry;
8594 vm_map_entry_t temp_entry;
e5568f75 8595
91447636
A
8596 XPR(XPR_VM_MAP,
8597 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
8598 (integer_t)map, start, end, new_behavior, 0);
e5568f75 8599
91447636
A
8600 switch (new_behavior) {
8601 case VM_BEHAVIOR_DEFAULT:
8602 case VM_BEHAVIOR_RANDOM:
8603 case VM_BEHAVIOR_SEQUENTIAL:
8604 case VM_BEHAVIOR_RSEQNTL:
8605 break;
8606 case VM_BEHAVIOR_WILLNEED:
8607 case VM_BEHAVIOR_DONTNEED:
8608 new_behavior = VM_BEHAVIOR_DEFAULT;
8609 break;
1c79356b 8610 default:
91447636 8611 return(KERN_INVALID_ARGUMENT);
1c79356b 8612 }
1c79356b 8613
91447636 8614 vm_map_lock(map);
1c79356b 8615
91447636
A
8616 /*
8617 * The entire address range must be valid for the map.
8618 * Note that vm_map_range_check() does a
8619 * vm_map_lookup_entry() internally and returns the
8620 * entry containing the start of the address range if
8621 * the entire range is valid.
8622 */
8623 if (vm_map_range_check(map, start, end, &temp_entry)) {
8624 entry = temp_entry;
8625 vm_map_clip_start(map, entry, start);
8626 }
8627 else {
8628 vm_map_unlock(map);
8629 return(KERN_INVALID_ADDRESS);
1c79356b 8630 }
1c79356b 8631
91447636
A
8632 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
8633 vm_map_clip_end(map, entry, end);
0b4e3aa0 8634
91447636 8635 entry->behavior = new_behavior;
1c79356b 8636
91447636 8637 entry = entry->vme_next;
1c79356b 8638 }
91447636
A
8639
8640 vm_map_unlock(map);
8641 return(KERN_SUCCESS);
1c79356b
A
8642}
8643
1c79356b 8644
91447636
A
8645#include <mach_kdb.h>
8646#if MACH_KDB
8647#include <ddb/db_output.h>
8648#include <vm/vm_print.h>
1c79356b 8649
91447636 8650#define printf db_printf
1c79356b 8651
91447636
A
8652/*
8653 * Forward declarations for internal functions.
8654 */
8655extern void vm_map_links_print(
8656 struct vm_map_links *links);
0b4e3aa0 8657
91447636
A
8658extern void vm_map_header_print(
8659 struct vm_map_header *header);
1c79356b 8660
91447636
A
8661extern void vm_map_entry_print(
8662 vm_map_entry_t entry);
0b4e3aa0 8663
91447636
A
8664extern void vm_follow_entry(
8665 vm_map_entry_t entry);
0b4e3aa0 8666
91447636
A
8667extern void vm_follow_map(
8668 vm_map_t map);
1c79356b 8669
91447636
A
8670/*
8671 * vm_map_links_print: [ debug ]
8672 */
8673void
8674vm_map_links_print(
8675 struct vm_map_links *links)
8676{
8677 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
8678 links->prev,
8679 links->next,
8680 (unsigned long long)links->start,
8681 (unsigned long long)links->end);
8682}
1c79356b 8683
91447636
A
8684/*
8685 * vm_map_header_print: [ debug ]
8686 */
8687void
8688vm_map_header_print(
8689 struct vm_map_header *header)
8690{
8691 vm_map_links_print(&header->links);
8692 iprintf("nentries = %08X, %sentries_pageable\n",
8693 header->nentries,
8694 (header->entries_pageable ? "" : "!"));
8695}
1c79356b 8696
91447636
A
8697/*
8698 * vm_follow_entry: [ debug ]
8699 */
8700void
8701vm_follow_entry(
8702 vm_map_entry_t entry)
8703{
8704 int shadows;
1c79356b 8705
91447636 8706 iprintf("map entry %08X\n", entry);
1c79356b 8707
91447636 8708 db_indent += 2;
1c79356b 8709
91447636
A
8710 shadows = vm_follow_object(entry->object.vm_object);
8711 iprintf("Total objects : %d\n",shadows);
0b4e3aa0 8712
91447636
A
8713 db_indent -= 2;
8714}
1c79356b 8715
91447636
A
8716/*
8717 * vm_map_entry_print: [ debug ]
8718 */
1c79356b 8719void
91447636
A
8720vm_map_entry_print(
8721 register vm_map_entry_t entry)
1c79356b 8722{
91447636
A
8723 static const char *inheritance_name[4] =
8724 { "share", "copy", "none", "?"};
8725 static const char *behavior_name[4] =
8726 { "dflt", "rand", "seqtl", "rseqntl" };
0b4e3aa0 8727
91447636 8728 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
0b4e3aa0 8729
91447636 8730 db_indent += 2;
0b4e3aa0 8731
91447636 8732 vm_map_links_print(&entry->links);
0b4e3aa0 8733
91447636
A
8734 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
8735 (unsigned long long)entry->vme_start,
8736 (unsigned long long)entry->vme_end,
8737 entry->protection,
8738 entry->max_protection,
8739 inheritance_name[(entry->inheritance & 0x3)]);
0b4e3aa0 8740
91447636
A
8741 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
8742 behavior_name[(entry->behavior & 0x3)],
8743 entry->wired_count,
8744 entry->user_wired_count);
8745 iprintf("%sin_transition, %sneeds_wakeup\n",
8746 (entry->in_transition ? "" : "!"),
8747 (entry->needs_wakeup ? "" : "!"));
0b4e3aa0 8748
91447636
A
8749 if (entry->is_sub_map) {
8750 iprintf("submap = %08X - offset = %016llX\n",
8751 entry->object.sub_map,
8752 (unsigned long long)entry->offset);
8753 } else {
8754 iprintf("object = %08X offset = %016llX - ",
8755 entry->object.vm_object,
8756 (unsigned long long)entry->offset);
8757 printf("%sis_shared, %sneeds_copy\n",
8758 (entry->is_shared ? "" : "!"),
8759 (entry->needs_copy ? "" : "!"));
1c79356b 8760 }
1c79356b 8761
91447636
A
8762 db_indent -= 2;
8763}
1c79356b 8764
91447636
A
8765/*
8766 * vm_follow_map: [ debug ]
8767 */
8768void
8769vm_follow_map(
8770 vm_map_t map)
1c79356b 8771{
91447636 8772 register vm_map_entry_t entry;
1c79356b 8773
91447636 8774 iprintf("task map %08X\n", map);
1c79356b 8775
91447636 8776 db_indent += 2;
55e303ae 8777
91447636
A
8778 for (entry = vm_map_first_entry(map);
8779 entry && entry != vm_map_to_entry(map);
8780 entry = entry->vme_next) {
8781 vm_follow_entry(entry);
1c79356b 8782 }
1c79356b 8783
91447636
A
8784 db_indent -= 2;
8785}
1c79356b
A
8786
8787/*
91447636 8788 * vm_map_print: [ debug ]
1c79356b 8789 */
5353443c 8790void
91447636
A
8791vm_map_print(
8792 db_addr_t inmap)
5353443c 8793{
91447636
A
8794 register vm_map_entry_t entry;
8795 vm_map_t map;
8796#if TASK_SWAPPER
8797 char *swstate;
8798#endif /* TASK_SWAPPER */
5353443c 8799
91447636
A
8800 map = (vm_map_t)(long)
8801 inmap; /* Make sure we have the right type */
5353443c 8802
91447636 8803 iprintf("task map %08X\n", map);
5353443c 8804
91447636 8805 db_indent += 2;
5353443c 8806
91447636 8807 vm_map_header_print(&map->hdr);
5353443c 8808
91447636
A
8809 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
8810 map->pmap,
8811 map->size,
8812 map->ref_count,
8813 map->hint,
8814 map->first_free);
1c79356b 8815
91447636
A
8816 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
8817 (map->wait_for_space ? "" : "!"),
8818 (map->wiring_required ? "" : "!"),
8819 map->timestamp);
8820
8821#if TASK_SWAPPER
8822 switch (map->sw_state) {
8823 case MAP_SW_IN:
8824 swstate = "SW_IN";
8825 break;
8826 case MAP_SW_OUT:
8827 swstate = "SW_OUT";
8828 break;
8829 default:
8830 swstate = "????";
8831 break;
1c79356b 8832 }
91447636
A
8833 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
8834#endif /* TASK_SWAPPER */
8835
8836 for (entry = vm_map_first_entry(map);
8837 entry && entry != vm_map_to_entry(map);
8838 entry = entry->vme_next) {
8839 vm_map_entry_print(entry);
8840 }
8841
8842 db_indent -= 2;
1c79356b
A
8843}
8844
1c79356b 8845/*
91447636 8846 * Routine: vm_map_copy_print
1c79356b 8847 * Purpose:
91447636 8848 * Pretty-print a copy object for ddb.
1c79356b 8849 */
91447636
A
8850
8851void
8852vm_map_copy_print(
8853 db_addr_t incopy)
1c79356b 8854{
91447636 8855 vm_map_copy_t copy;
9bccf70c 8856 vm_map_entry_t entry;
1c79356b 8857
91447636
A
8858 copy = (vm_map_copy_t)(long)
8859 incopy; /* Make sure we have the right type */
1c79356b 8860
91447636 8861 printf("copy object 0x%x\n", copy);
9bccf70c 8862
91447636 8863 db_indent += 2;
9bccf70c 8864
91447636
A
8865 iprintf("type=%d", copy->type);
8866 switch (copy->type) {
8867 case VM_MAP_COPY_ENTRY_LIST:
8868 printf("[entry_list]");
8869 break;
9bccf70c 8870
91447636
A
8871 case VM_MAP_COPY_OBJECT:
8872 printf("[object]");
1c79356b 8873 break;
91447636
A
8874
8875 case VM_MAP_COPY_KERNEL_BUFFER:
8876 printf("[kernel_buffer]");
9bccf70c 8877 break;
1c79356b 8878
91447636
A
8879 default:
8880 printf("[bad type]");
8881 break;
1c79356b 8882 }
91447636
A
8883 printf(", offset=0x%llx", (unsigned long long)copy->offset);
8884 printf(", size=0x%x\n", copy->size);
1c79356b 8885
91447636
A
8886 switch (copy->type) {
8887 case VM_MAP_COPY_ENTRY_LIST:
8888 vm_map_header_print(&copy->cpy_hdr);
8889 for (entry = vm_map_copy_first_entry(copy);
8890 entry && entry != vm_map_copy_to_entry(copy);
8891 entry = entry->vme_next) {
8892 vm_map_entry_print(entry);
8893 }
8894 break;
1c79356b 8895
91447636
A
8896 case VM_MAP_COPY_OBJECT:
8897 iprintf("object=0x%x\n", copy->cpy_object);
8898 break;
8899
8900 case VM_MAP_COPY_KERNEL_BUFFER:
8901 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
8902 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
8903 break;
1c79356b 8904
1c79356b
A
8905 }
8906
91447636 8907 db_indent -=2;
1c79356b
A
8908}
8909
1c79356b 8910/*
91447636
A
8911 * db_vm_map_total_size(map) [ debug ]
8912 *
8913 * return the total virtual size (in bytes) of the map
1c79356b 8914 */
91447636
A
8915vm_map_size_t
8916db_vm_map_total_size(
8917 db_addr_t inmap)
8918{
8919 vm_map_entry_t entry;
8920 vm_map_size_t total;
8921 vm_map_t map;
1c79356b 8922
91447636
A
8923 map = (vm_map_t)(long)
8924 inmap; /* Make sure we have the right type */
1c79356b 8925
91447636
A
8926 total = 0;
8927 for (entry = vm_map_first_entry(map);
8928 entry != vm_map_to_entry(map);
8929 entry = entry->vme_next) {
8930 total += entry->vme_end - entry->vme_start;
8931 }
1c79356b 8932
91447636
A
8933 return total;
8934}
1c79356b 8935
91447636 8936#endif /* MACH_KDB */
1c79356b
A
8937
8938/*
91447636
A
8939 * Routine: vm_map_entry_insert
8940 *
8941 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 8942 */
91447636
A
8943vm_map_entry_t
8944vm_map_entry_insert(
8945 vm_map_t map,
8946 vm_map_entry_t insp_entry,
8947 vm_map_offset_t start,
8948 vm_map_offset_t end,
8949 vm_object_t object,
8950 vm_object_offset_t offset,
8951 boolean_t needs_copy,
8952 boolean_t is_shared,
8953 boolean_t in_transition,
8954 vm_prot_t cur_protection,
8955 vm_prot_t max_protection,
8956 vm_behavior_t behavior,
8957 vm_inherit_t inheritance,
8958 unsigned wired_count)
1c79356b 8959{
91447636 8960 vm_map_entry_t new_entry;
1c79356b 8961
91447636 8962 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 8963
91447636 8964 new_entry = vm_map_entry_create(map);
1c79356b 8965
91447636
A
8966 new_entry->vme_start = start;
8967 new_entry->vme_end = end;
8968 assert(page_aligned(new_entry->vme_start));
8969 assert(page_aligned(new_entry->vme_end));
1c79356b 8970
91447636
A
8971 new_entry->object.vm_object = object;
8972 new_entry->offset = offset;
8973 new_entry->is_shared = is_shared;
8974 new_entry->is_sub_map = FALSE;
8975 new_entry->needs_copy = needs_copy;
8976 new_entry->in_transition = in_transition;
8977 new_entry->needs_wakeup = FALSE;
8978 new_entry->inheritance = inheritance;
8979 new_entry->protection = cur_protection;
8980 new_entry->max_protection = max_protection;
8981 new_entry->behavior = behavior;
8982 new_entry->wired_count = wired_count;
8983 new_entry->user_wired_count = 0;
8984 new_entry->use_pmap = FALSE;
0c530ab8 8985 new_entry->alias = 0;
1c79356b 8986
91447636
A
8987 /*
8988 * Insert the new entry into the list.
8989 */
1c79356b 8990
91447636
A
8991 vm_map_entry_link(map, insp_entry, new_entry);
8992 map->size += end - start;
8993
8994 /*
8995 * Update the free space hint and the lookup hint.
8996 */
8997
0c530ab8 8998 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 8999 return new_entry;
1c79356b
A
9000}
9001
9002/*
91447636
A
9003 * Routine: vm_map_remap_extract
9004 *
9005 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 9006 */
91447636
A
9007static kern_return_t
9008vm_map_remap_extract(
9009 vm_map_t map,
9010 vm_map_offset_t addr,
9011 vm_map_size_t size,
9012 boolean_t copy,
9013 struct vm_map_header *map_header,
9014 vm_prot_t *cur_protection,
9015 vm_prot_t *max_protection,
9016 /* What, no behavior? */
9017 vm_inherit_t inheritance,
9018 boolean_t pageable)
1c79356b 9019{
91447636
A
9020 kern_return_t result;
9021 vm_map_size_t mapped_size;
9022 vm_map_size_t tmp_size;
9023 vm_map_entry_t src_entry; /* result of last map lookup */
9024 vm_map_entry_t new_entry;
9025 vm_object_offset_t offset;
9026 vm_map_offset_t map_address;
9027 vm_map_offset_t src_start; /* start of entry to map */
9028 vm_map_offset_t src_end; /* end of region to be mapped */
9029 vm_object_t object;
9030 vm_map_version_t version;
9031 boolean_t src_needs_copy;
9032 boolean_t new_entry_needs_copy;
1c79356b 9033
91447636
A
9034 assert(map != VM_MAP_NULL);
9035 assert(size != 0 && size == vm_map_round_page(size));
9036 assert(inheritance == VM_INHERIT_NONE ||
9037 inheritance == VM_INHERIT_COPY ||
9038 inheritance == VM_INHERIT_SHARE);
1c79356b 9039
91447636
A
9040 /*
9041 * Compute start and end of region.
9042 */
9043 src_start = vm_map_trunc_page(addr);
9044 src_end = vm_map_round_page(src_start + size);
1c79356b 9045
91447636
A
9046 /*
9047 * Initialize map_header.
9048 */
9049 map_header->links.next = (struct vm_map_entry *)&map_header->links;
9050 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
9051 map_header->nentries = 0;
9052 map_header->entries_pageable = pageable;
1c79356b 9053
91447636
A
9054 *cur_protection = VM_PROT_ALL;
9055 *max_protection = VM_PROT_ALL;
1c79356b 9056
91447636
A
9057 map_address = 0;
9058 mapped_size = 0;
9059 result = KERN_SUCCESS;
1c79356b 9060
91447636
A
9061 /*
9062 * The specified source virtual space might correspond to
9063 * multiple map entries, need to loop on them.
9064 */
9065 vm_map_lock(map);
9066 while (mapped_size != size) {
9067 vm_map_size_t entry_size;
1c79356b 9068
91447636
A
9069 /*
9070 * Find the beginning of the region.
9071 */
9072 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
9073 result = KERN_INVALID_ADDRESS;
9074 break;
9075 }
1c79356b 9076
91447636
A
9077 if (src_start < src_entry->vme_start ||
9078 (mapped_size && src_start != src_entry->vme_start)) {
9079 result = KERN_INVALID_ADDRESS;
9080 break;
9081 }
1c79356b 9082
91447636
A
9083 if(src_entry->is_sub_map) {
9084 result = KERN_INVALID_ADDRESS;
9085 break;
9086 }
1c79356b 9087
91447636
A
9088 tmp_size = size - mapped_size;
9089 if (src_end > src_entry->vme_end)
9090 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 9091
91447636
A
9092 entry_size = (vm_map_size_t)(src_entry->vme_end -
9093 src_entry->vme_start);
1c79356b 9094
91447636
A
9095 if(src_entry->is_sub_map) {
9096 vm_map_reference(src_entry->object.sub_map);
9097 object = VM_OBJECT_NULL;
9098 } else {
9099 object = src_entry->object.vm_object;
55e303ae 9100
91447636
A
9101 if (object == VM_OBJECT_NULL) {
9102 object = vm_object_allocate(entry_size);
9103 src_entry->offset = 0;
9104 src_entry->object.vm_object = object;
9105 } else if (object->copy_strategy !=
9106 MEMORY_OBJECT_COPY_SYMMETRIC) {
9107 /*
9108 * We are already using an asymmetric
9109 * copy, and therefore we already have
9110 * the right object.
9111 */
9112 assert(!src_entry->needs_copy);
9113 } else if (src_entry->needs_copy || object->shadowed ||
9114 (object->internal && !object->true_share &&
9115 !src_entry->is_shared &&
9116 object->size > entry_size)) {
1c79356b 9117
91447636
A
9118 vm_object_shadow(&src_entry->object.vm_object,
9119 &src_entry->offset,
9120 entry_size);
1c79356b 9121
91447636
A
9122 if (!src_entry->needs_copy &&
9123 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
9124 vm_prot_t prot;
9125
9126 prot = src_entry->protection & ~VM_PROT_WRITE;
9127#ifdef STACK_ONLY_NX
9128 if (src_entry->alias != VM_MEMORY_STACK && prot)
9129 prot |= VM_PROT_EXECUTE;
9130#endif
91447636
A
9131 if(map->mapped) {
9132 vm_object_pmap_protect(
9133 src_entry->object.vm_object,
9134 src_entry->offset,
9135 entry_size,
9136 PMAP_NULL,
9137 src_entry->vme_start,
0c530ab8 9138 prot);
91447636
A
9139 } else {
9140 pmap_protect(vm_map_pmap(map),
0c530ab8
A
9141 src_entry->vme_start,
9142 src_entry->vme_end,
9143 prot);
91447636
A
9144 }
9145 }
1c79356b 9146
91447636
A
9147 object = src_entry->object.vm_object;
9148 src_entry->needs_copy = FALSE;
9149 }
1c79356b 9150
1c79356b 9151
91447636
A
9152 vm_object_lock(object);
9153 object->ref_count++; /* object ref. for new entry */
9154 VM_OBJ_RES_INCR(object);
9155 if (object->copy_strategy ==
9156 MEMORY_OBJECT_COPY_SYMMETRIC) {
9157 object->copy_strategy =
9158 MEMORY_OBJECT_COPY_DELAY;
9159 }
9160 vm_object_unlock(object);
9161 }
1c79356b 9162
91447636 9163 offset = src_entry->offset + (src_start - src_entry->vme_start);
1c79356b 9164
91447636
A
9165 new_entry = _vm_map_entry_create(map_header);
9166 vm_map_entry_copy(new_entry, src_entry);
9167 new_entry->use_pmap = FALSE; /* clr address space specifics */
1c79356b 9168
91447636
A
9169 new_entry->vme_start = map_address;
9170 new_entry->vme_end = map_address + tmp_size;
9171 new_entry->inheritance = inheritance;
9172 new_entry->offset = offset;
1c79356b 9173
91447636
A
9174 /*
9175 * The new region has to be copied now if required.
9176 */
9177 RestartCopy:
9178 if (!copy) {
9179 src_entry->is_shared = TRUE;
9180 new_entry->is_shared = TRUE;
9181 if (!(new_entry->is_sub_map))
9182 new_entry->needs_copy = FALSE;
1c79356b 9183
91447636
A
9184 } else if (src_entry->is_sub_map) {
9185 /* make this a COW sub_map if not already */
9186 new_entry->needs_copy = TRUE;
9187 object = VM_OBJECT_NULL;
9188 } else if (src_entry->wired_count == 0 &&
9189 vm_object_copy_quickly(&new_entry->object.vm_object,
9190 new_entry->offset,
9191 (new_entry->vme_end -
9192 new_entry->vme_start),
9193 &src_needs_copy,
9194 &new_entry_needs_copy)) {
55e303ae 9195
91447636
A
9196 new_entry->needs_copy = new_entry_needs_copy;
9197 new_entry->is_shared = FALSE;
1c79356b 9198
91447636
A
9199 /*
9200 * Handle copy_on_write semantics.
9201 */
9202 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
9203 vm_prot_t prot;
9204
9205 prot = src_entry->protection & ~VM_PROT_WRITE;
9206#ifdef STACK_ONLY_NX
9207 if (src_entry->alias != VM_MEMORY_STACK && prot)
9208 prot |= VM_PROT_EXECUTE;
9209#endif
91447636
A
9210 vm_object_pmap_protect(object,
9211 offset,
9212 entry_size,
9213 ((src_entry->is_shared
9214 || map->mapped) ?
9215 PMAP_NULL : map->pmap),
9216 src_entry->vme_start,
0c530ab8 9217 prot);
1c79356b 9218
91447636
A
9219 src_entry->needs_copy = TRUE;
9220 }
9221 /*
9222 * Throw away the old object reference of the new entry.
9223 */
9224 vm_object_deallocate(object);
1c79356b 9225
91447636
A
9226 } else {
9227 new_entry->is_shared = FALSE;
1c79356b 9228
91447636
A
9229 /*
9230 * The map can be safely unlocked since we
9231 * already hold a reference on the object.
9232 *
9233 * Record the timestamp of the map for later
9234 * verification, and unlock the map.
9235 */
9236 version.main_timestamp = map->timestamp;
9237 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 9238
91447636
A
9239 /*
9240 * Perform the copy.
9241 */
9242 if (src_entry->wired_count > 0) {
9243 vm_object_lock(object);
9244 result = vm_object_copy_slowly(
9245 object,
9246 offset,
9247 entry_size,
9248 THREAD_UNINT,
9249 &new_entry->object.vm_object);
1c79356b 9250
91447636
A
9251 new_entry->offset = 0;
9252 new_entry->needs_copy = FALSE;
9253 } else {
9254 result = vm_object_copy_strategically(
9255 object,
9256 offset,
9257 entry_size,
9258 &new_entry->object.vm_object,
9259 &new_entry->offset,
9260 &new_entry_needs_copy);
1c79356b 9261
91447636
A
9262 new_entry->needs_copy = new_entry_needs_copy;
9263 }
1c79356b 9264
91447636
A
9265 /*
9266 * Throw away the old object reference of the new entry.
9267 */
9268 vm_object_deallocate(object);
1c79356b 9269
91447636
A
9270 if (result != KERN_SUCCESS &&
9271 result != KERN_MEMORY_RESTART_COPY) {
9272 _vm_map_entry_dispose(map_header, new_entry);
9273 break;
9274 }
1c79356b 9275
91447636
A
9276 /*
9277 * Verify that the map has not substantially
9278 * changed while the copy was being made.
9279 */
1c79356b 9280
91447636
A
9281 vm_map_lock(map);
9282 if (version.main_timestamp + 1 != map->timestamp) {
9283 /*
9284 * Simple version comparison failed.
9285 *
9286 * Retry the lookup and verify that the
9287 * same object/offset are still present.
9288 */
9289 vm_object_deallocate(new_entry->
9290 object.vm_object);
9291 _vm_map_entry_dispose(map_header, new_entry);
9292 if (result == KERN_MEMORY_RESTART_COPY)
9293 result = KERN_SUCCESS;
9294 continue;
9295 }
1c79356b 9296
91447636
A
9297 if (result == KERN_MEMORY_RESTART_COPY) {
9298 vm_object_reference(object);
9299 goto RestartCopy;
9300 }
9301 }
1c79356b 9302
91447636
A
9303 _vm_map_entry_link(map_header,
9304 map_header->links.prev, new_entry);
1c79356b 9305
91447636
A
9306 *cur_protection &= src_entry->protection;
9307 *max_protection &= src_entry->max_protection;
1c79356b 9308
91447636
A
9309 map_address += tmp_size;
9310 mapped_size += tmp_size;
9311 src_start += tmp_size;
1c79356b 9312
91447636 9313 } /* end while */
1c79356b 9314
91447636
A
9315 vm_map_unlock(map);
9316 if (result != KERN_SUCCESS) {
9317 /*
9318 * Free all allocated elements.
9319 */
9320 for (src_entry = map_header->links.next;
9321 src_entry != (struct vm_map_entry *)&map_header->links;
9322 src_entry = new_entry) {
9323 new_entry = src_entry->vme_next;
9324 _vm_map_entry_unlink(map_header, src_entry);
9325 vm_object_deallocate(src_entry->object.vm_object);
9326 _vm_map_entry_dispose(map_header, src_entry);
9327 }
9328 }
9329 return result;
1c79356b
A
9330}
9331
9332/*
91447636 9333 * Routine: vm_remap
1c79356b 9334 *
91447636
A
9335 * Map portion of a task's address space.
9336 * Mapped region must not overlap more than
9337 * one vm memory object. Protections and
9338 * inheritance attributes remain the same
9339 * as in the original task and are out parameters.
9340 * Source and Target task can be identical
9341 * Other attributes are identical as for vm_map()
1c79356b
A
9342 */
9343kern_return_t
91447636
A
9344vm_map_remap(
9345 vm_map_t target_map,
9346 vm_map_address_t *address,
9347 vm_map_size_t size,
9348 vm_map_offset_t mask,
9349 boolean_t anywhere,
9350 vm_map_t src_map,
9351 vm_map_offset_t memory_address,
1c79356b 9352 boolean_t copy,
1c79356b
A
9353 vm_prot_t *cur_protection,
9354 vm_prot_t *max_protection,
91447636 9355 vm_inherit_t inheritance)
1c79356b
A
9356{
9357 kern_return_t result;
91447636 9358 vm_map_entry_t entry;
0c530ab8 9359 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 9360 vm_map_entry_t new_entry;
91447636 9361 struct vm_map_header map_header;
1c79356b 9362
91447636
A
9363 if (target_map == VM_MAP_NULL)
9364 return KERN_INVALID_ARGUMENT;
1c79356b 9365
91447636
A
9366 switch (inheritance) {
9367 case VM_INHERIT_NONE:
9368 case VM_INHERIT_COPY:
9369 case VM_INHERIT_SHARE:
9370 if (size != 0 && src_map != VM_MAP_NULL)
9371 break;
9372 /*FALL THRU*/
9373 default:
9374 return KERN_INVALID_ARGUMENT;
9375 }
1c79356b 9376
91447636 9377 size = vm_map_round_page(size);
1c79356b 9378
91447636
A
9379 result = vm_map_remap_extract(src_map, memory_address,
9380 size, copy, &map_header,
9381 cur_protection,
9382 max_protection,
9383 inheritance,
9384 target_map->hdr.
9385 entries_pageable);
1c79356b 9386
91447636
A
9387 if (result != KERN_SUCCESS) {
9388 return result;
9389 }
1c79356b 9390
91447636
A
9391 /*
9392 * Allocate/check a range of free virtual address
9393 * space for the target
1c79356b 9394 */
91447636
A
9395 *address = vm_map_trunc_page(*address);
9396 vm_map_lock(target_map);
9397 result = vm_map_remap_range_allocate(target_map, address, size,
9398 mask, anywhere, &insp_entry);
1c79356b 9399
91447636
A
9400 for (entry = map_header.links.next;
9401 entry != (struct vm_map_entry *)&map_header.links;
9402 entry = new_entry) {
9403 new_entry = entry->vme_next;
9404 _vm_map_entry_unlink(&map_header, entry);
9405 if (result == KERN_SUCCESS) {
9406 entry->vme_start += *address;
9407 entry->vme_end += *address;
9408 vm_map_entry_link(target_map, insp_entry, entry);
9409 insp_entry = entry;
9410 } else {
9411 if (!entry->is_sub_map) {
9412 vm_object_deallocate(entry->object.vm_object);
9413 } else {
9414 vm_map_deallocate(entry->object.sub_map);
9415 }
9416 _vm_map_entry_dispose(&map_header, entry);
1c79356b 9417 }
91447636 9418 }
1c79356b 9419
91447636
A
9420 if (result == KERN_SUCCESS) {
9421 target_map->size += size;
0c530ab8 9422 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
9423 }
9424 vm_map_unlock(target_map);
1c79356b 9425
91447636
A
9426 if (result == KERN_SUCCESS && target_map->wiring_required)
9427 result = vm_map_wire(target_map, *address,
9428 *address + size, *cur_protection, TRUE);
9429 return result;
9430}
1c79356b 9431
91447636
A
9432/*
9433 * Routine: vm_map_remap_range_allocate
9434 *
9435 * Description:
9436 * Allocate a range in the specified virtual address map.
9437 * returns the address and the map entry just before the allocated
9438 * range
9439 *
9440 * Map must be locked.
9441 */
1c79356b 9442
91447636
A
9443static kern_return_t
9444vm_map_remap_range_allocate(
9445 vm_map_t map,
9446 vm_map_address_t *address, /* IN/OUT */
9447 vm_map_size_t size,
9448 vm_map_offset_t mask,
9449 boolean_t anywhere,
9450 vm_map_entry_t *map_entry) /* OUT */
9451{
9452 register vm_map_entry_t entry;
9453 register vm_map_offset_t start;
9454 register vm_map_offset_t end;
1c79356b 9455
91447636 9456 StartAgain: ;
1c79356b 9457
91447636 9458 start = *address;
1c79356b 9459
91447636
A
9460 if (anywhere)
9461 {
9462 /*
9463 * Calculate the first possible address.
9464 */
1c79356b 9465
91447636
A
9466 if (start < map->min_offset)
9467 start = map->min_offset;
9468 if (start > map->max_offset)
9469 return(KERN_NO_SPACE);
9470
9471 /*
9472 * Look for the first possible address;
9473 * if there's already something at this
9474 * address, we have to start after it.
9475 */
1c79356b 9476
91447636
A
9477 assert(first_free_is_valid(map));
9478 if (start == map->min_offset) {
9479 if ((entry = map->first_free) != vm_map_to_entry(map))
9480 start = entry->vme_end;
9481 } else {
9482 vm_map_entry_t tmp_entry;
9483 if (vm_map_lookup_entry(map, start, &tmp_entry))
9484 start = tmp_entry->vme_end;
9485 entry = tmp_entry;
9486 }
9487
9488 /*
9489 * In any case, the "entry" always precedes
9490 * the proposed new region throughout the
9491 * loop:
9492 */
1c79356b 9493
91447636
A
9494 while (TRUE) {
9495 register vm_map_entry_t next;
1c79356b 9496
91447636
A
9497 /*
9498 * Find the end of the proposed new region.
9499 * Be sure we didn't go beyond the end, or
9500 * wrap around the address.
9501 */
9502
9503 end = ((start + mask) & ~mask);
9504 if (end < start)
9505 return(KERN_NO_SPACE);
9506 start = end;
9507 end += size;
9508
9509 if ((end > map->max_offset) || (end < start)) {
9510 if (map->wait_for_space) {
9511 if (size <= (map->max_offset -
9512 map->min_offset)) {
9513 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
9514 vm_map_unlock(map);
9515 thread_block(THREAD_CONTINUE_NULL);
9516 vm_map_lock(map);
9517 goto StartAgain;
9518 }
1c79356b 9519 }
91447636
A
9520
9521 return(KERN_NO_SPACE);
9522 }
1c79356b 9523
91447636
A
9524 /*
9525 * If there are no more entries, we must win.
9526 */
1c79356b 9527
91447636
A
9528 next = entry->vme_next;
9529 if (next == vm_map_to_entry(map))
9530 break;
1c79356b 9531
91447636
A
9532 /*
9533 * If there is another entry, it must be
9534 * after the end of the potential new region.
9535 */
1c79356b 9536
91447636
A
9537 if (next->vme_start >= end)
9538 break;
1c79356b 9539
91447636
A
9540 /*
9541 * Didn't fit -- move to the next entry.
9542 */
1c79356b 9543
91447636
A
9544 entry = next;
9545 start = entry->vme_end;
9546 }
9547 *address = start;
9548 } else {
9549 vm_map_entry_t temp_entry;
9550
9551 /*
9552 * Verify that:
9553 * the address doesn't itself violate
9554 * the mask requirement.
9555 */
1c79356b 9556
91447636
A
9557 if ((start & mask) != 0)
9558 return(KERN_NO_SPACE);
1c79356b 9559
1c79356b 9560
91447636
A
9561 /*
9562 * ... the address is within bounds
9563 */
1c79356b 9564
91447636 9565 end = start + size;
1c79356b 9566
91447636
A
9567 if ((start < map->min_offset) ||
9568 (end > map->max_offset) ||
9569 (start >= end)) {
9570 return(KERN_INVALID_ADDRESS);
9571 }
1c79356b 9572
91447636
A
9573 /*
9574 * ... the starting address isn't allocated
9575 */
9576
9577 if (vm_map_lookup_entry(map, start, &temp_entry))
9578 return(KERN_NO_SPACE);
9579
9580 entry = temp_entry;
9581
9582 /*
9583 * ... the next region doesn't overlap the
9584 * end point.
9585 */
1c79356b 9586
91447636
A
9587 if ((entry->vme_next != vm_map_to_entry(map)) &&
9588 (entry->vme_next->vme_start < end))
9589 return(KERN_NO_SPACE);
9590 }
9591 *map_entry = entry;
9592 return(KERN_SUCCESS);
9593}
1c79356b 9594
91447636
A
9595/*
9596 * vm_map_switch:
9597 *
9598 * Set the address map for the current thread to the specified map
9599 */
1c79356b 9600
91447636
A
9601vm_map_t
9602vm_map_switch(
9603 vm_map_t map)
9604{
9605 int mycpu;
9606 thread_t thread = current_thread();
9607 vm_map_t oldmap = thread->map;
1c79356b 9608
91447636
A
9609 mp_disable_preemption();
9610 mycpu = cpu_number();
1c79356b 9611
91447636
A
9612 /*
9613 * Deactivate the current map and activate the requested map
9614 */
9615 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 9616
91447636
A
9617 mp_enable_preemption();
9618 return(oldmap);
9619}
1c79356b 9620
1c79356b 9621
91447636
A
9622/*
9623 * Routine: vm_map_write_user
9624 *
9625 * Description:
9626 * Copy out data from a kernel space into space in the
9627 * destination map. The space must already exist in the
9628 * destination map.
9629 * NOTE: This routine should only be called by threads
9630 * which can block on a page fault. i.e. kernel mode user
9631 * threads.
9632 *
9633 */
9634kern_return_t
9635vm_map_write_user(
9636 vm_map_t map,
9637 void *src_p,
9638 vm_map_address_t dst_addr,
9639 vm_size_t size)
9640{
9641 kern_return_t kr = KERN_SUCCESS;
1c79356b 9642
91447636
A
9643 if(current_map() == map) {
9644 if (copyout(src_p, dst_addr, size)) {
9645 kr = KERN_INVALID_ADDRESS;
9646 }
9647 } else {
9648 vm_map_t oldmap;
1c79356b 9649
91447636
A
9650 /* take on the identity of the target map while doing */
9651 /* the transfer */
1c79356b 9652
91447636
A
9653 vm_map_reference(map);
9654 oldmap = vm_map_switch(map);
9655 if (copyout(src_p, dst_addr, size)) {
9656 kr = KERN_INVALID_ADDRESS;
1c79356b 9657 }
91447636
A
9658 vm_map_switch(oldmap);
9659 vm_map_deallocate(map);
1c79356b 9660 }
91447636 9661 return kr;
1c79356b
A
9662}
9663
9664/*
91447636
A
9665 * Routine: vm_map_read_user
9666 *
9667 * Description:
9668 * Copy in data from a user space source map into the
9669 * kernel map. The space must already exist in the
9670 * kernel map.
9671 * NOTE: This routine should only be called by threads
9672 * which can block on a page fault. i.e. kernel mode user
9673 * threads.
1c79356b 9674 *
1c79356b
A
9675 */
9676kern_return_t
91447636
A
9677vm_map_read_user(
9678 vm_map_t map,
9679 vm_map_address_t src_addr,
9680 void *dst_p,
9681 vm_size_t size)
1c79356b 9682{
91447636 9683 kern_return_t kr = KERN_SUCCESS;
1c79356b 9684
91447636
A
9685 if(current_map() == map) {
9686 if (copyin(src_addr, dst_p, size)) {
9687 kr = KERN_INVALID_ADDRESS;
9688 }
9689 } else {
9690 vm_map_t oldmap;
1c79356b 9691
91447636
A
9692 /* take on the identity of the target map while doing */
9693 /* the transfer */
9694
9695 vm_map_reference(map);
9696 oldmap = vm_map_switch(map);
9697 if (copyin(src_addr, dst_p, size)) {
9698 kr = KERN_INVALID_ADDRESS;
9699 }
9700 vm_map_switch(oldmap);
9701 vm_map_deallocate(map);
1c79356b 9702 }
91447636
A
9703 return kr;
9704}
9705
1c79356b 9706
91447636
A
9707/*
9708 * vm_map_check_protection:
9709 *
9710 * Assert that the target map allows the specified
9711 * privilege on the entire address region given.
9712 * The entire region must be allocated.
9713 */
9714boolean_t vm_map_check_protection(map, start, end, protection)
9715 register vm_map_t map;
9716 register vm_map_offset_t start;
9717 register vm_map_offset_t end;
9718 register vm_prot_t protection;
9719{
9720 register vm_map_entry_t entry;
9721 vm_map_entry_t tmp_entry;
1c79356b 9722
91447636 9723 vm_map_lock(map);
1c79356b 9724
91447636
A
9725 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
9726 {
9727 vm_map_unlock(map);
9728 return (FALSE);
1c79356b
A
9729 }
9730
91447636
A
9731 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9732 vm_map_unlock(map);
9733 return(FALSE);
9734 }
1c79356b 9735
91447636
A
9736 entry = tmp_entry;
9737
9738 while (start < end) {
9739 if (entry == vm_map_to_entry(map)) {
9740 vm_map_unlock(map);
9741 return(FALSE);
1c79356b 9742 }
1c79356b 9743
91447636
A
9744 /*
9745 * No holes allowed!
9746 */
1c79356b 9747
91447636
A
9748 if (start < entry->vme_start) {
9749 vm_map_unlock(map);
9750 return(FALSE);
9751 }
9752
9753 /*
9754 * Check protection associated with entry.
9755 */
9756
9757 if ((entry->protection & protection) != protection) {
9758 vm_map_unlock(map);
9759 return(FALSE);
9760 }
9761
9762 /* go to next entry */
9763
9764 start = entry->vme_end;
9765 entry = entry->vme_next;
9766 }
9767 vm_map_unlock(map);
9768 return(TRUE);
1c79356b
A
9769}
9770
1c79356b 9771kern_return_t
91447636
A
9772vm_map_purgable_control(
9773 vm_map_t map,
9774 vm_map_offset_t address,
9775 vm_purgable_t control,
9776 int *state)
1c79356b 9777{
91447636
A
9778 vm_map_entry_t entry;
9779 vm_object_t object;
9780 kern_return_t kr;
1c79356b 9781
1c79356b 9782 /*
91447636
A
9783 * Vet all the input parameters and current type and state of the
9784 * underlaying object. Return with an error if anything is amiss.
1c79356b 9785 */
91447636
A
9786 if (map == VM_MAP_NULL)
9787 return(KERN_INVALID_ARGUMENT);
1c79356b 9788
91447636
A
9789 if (control != VM_PURGABLE_SET_STATE &&
9790 control != VM_PURGABLE_GET_STATE)
9791 return(KERN_INVALID_ARGUMENT);
1c79356b 9792
91447636
A
9793 if (control == VM_PURGABLE_SET_STATE &&
9794 (*state < VM_PURGABLE_STATE_MIN ||
9795 *state > VM_PURGABLE_STATE_MAX))
9796 return(KERN_INVALID_ARGUMENT);
9797
9798 vm_map_lock(map);
9799
9800 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
9801
9802 /*
9803 * Must pass a valid non-submap address.
9804 */
9805 vm_map_unlock(map);
9806 return(KERN_INVALID_ADDRESS);
9807 }
9808
9809 if ((entry->protection & VM_PROT_WRITE) == 0) {
9810 /*
9811 * Can't apply purgable controls to something you can't write.
9812 */
9813 vm_map_unlock(map);
9814 return(KERN_PROTECTION_FAILURE);
9815 }
9816
9817 object = entry->object.vm_object;
9818 if (object == VM_OBJECT_NULL) {
9819 /*
9820 * Object must already be present or it can't be purgable.
9821 */
9822 vm_map_unlock(map);
9823 return KERN_INVALID_ARGUMENT;
9824 }
9825
9826 vm_object_lock(object);
9827
9828 if (entry->offset != 0 ||
9829 entry->vme_end - entry->vme_start != object->size) {
9830 /*
9831 * Can only apply purgable controls to the whole (existing)
9832 * object at once.
9833 */
9834 vm_map_unlock(map);
9835 vm_object_unlock(object);
9836 return KERN_INVALID_ARGUMENT;
1c79356b
A
9837 }
9838
91447636 9839 vm_map_unlock(map);
1c79356b 9840
91447636 9841 kr = vm_object_purgable_control(object, control, state);
1c79356b 9842
91447636 9843 vm_object_unlock(object);
1c79356b 9844
91447636
A
9845 return kr;
9846}
1c79356b 9847
91447636
A
9848kern_return_t
9849vm_map_page_info(
9850 vm_map_t target_map,
9851 vm_map_offset_t offset,
9852 int *disposition,
9853 int *ref_count)
9854{
9855 vm_map_entry_t map_entry;
9856 vm_object_t object;
9857 vm_page_t m;
9858
9859restart_page_query:
9860 *disposition = 0;
9861 *ref_count = 0;
9862 vm_map_lock(target_map);
9863 if(!vm_map_lookup_entry(target_map, offset, &map_entry)) {
9864 vm_map_unlock(target_map);
9865 return KERN_FAILURE;
9866 }
9867 offset -= map_entry->vme_start; /* adjust to offset within entry */
9868 offset += map_entry->offset; /* adjust to target object offset */
9869 if(map_entry->object.vm_object != VM_OBJECT_NULL) {
9870 if(!map_entry->is_sub_map) {
9871 object = map_entry->object.vm_object;
9872 } else {
9873 vm_map_unlock(target_map);
9874 target_map = map_entry->object.sub_map;
9875 goto restart_page_query;
1c79356b 9876 }
91447636
A
9877 } else {
9878 vm_map_unlock(target_map);
9879 return KERN_FAILURE;
9880 }
9881 vm_object_lock(object);
9882 vm_map_unlock(target_map);
9883 while(TRUE) {
9884 m = vm_page_lookup(object, offset);
9885 if (m != VM_PAGE_NULL) {
9886 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
9887 break;
9888 } else {
9889 if(object->shadow) {
9890 offset += object->shadow_offset;
9891 vm_object_unlock(object);
9892 object = object->shadow;
9893 vm_object_lock(object);
9894 continue;
9895 }
9896 vm_object_unlock(object);
9897 return KERN_FAILURE;
9898 }
9899 }
1c79356b 9900
91447636
A
9901 /* The ref_count is not strictly accurate, it measures the number */
9902 /* of entities holding a ref on the object, they may not be mapping */
9903 /* the object or may not be mapping the section holding the */
9904 /* target page but its still a ball park number and though an over- */
9905 /* count, it picks up the copy-on-write cases */
1c79356b 9906
91447636
A
9907 /* We could also get a picture of page sharing from pmap_attributes */
9908 /* but this would under count as only faulted-in mappings would */
9909 /* show up. */
1c79356b 9910
91447636 9911 *ref_count = object->ref_count;
1c79356b 9912
91447636
A
9913 if (m->fictitious) {
9914 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
9915 vm_object_unlock(object);
9916 return KERN_SUCCESS;
9917 }
1c79356b 9918
91447636
A
9919 if (m->dirty)
9920 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
9921 else if(pmap_is_modified(m->phys_page))
9922 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 9923
91447636
A
9924 if (m->reference)
9925 *disposition |= VM_PAGE_QUERY_PAGE_REF;
9926 else if(pmap_is_referenced(m->phys_page))
9927 *disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 9928
91447636
A
9929 vm_object_unlock(object);
9930 return KERN_SUCCESS;
9931
9932}
1c79356b
A
9933
9934
91447636
A
9935/* For a given range, check all map entries. If the entry coresponds to */
9936/* the old vm_region/map provided on the call, replace it with the */
9937/* corresponding range in the new vm_region/map */
9938kern_return_t vm_map_region_replace(
9939 vm_map_t target_map,
9940 ipc_port_t old_region,
9941 ipc_port_t new_region,
9942 vm_map_offset_t start,
9943 vm_map_offset_t end)
9944{
9945 vm_named_entry_t old_object;
9946 vm_named_entry_t new_object;
9947 vm_map_t old_submap;
9948 vm_map_t new_submap;
9949 vm_map_offset_t addr;
9950 vm_map_entry_t entry;
9951 int nested_pmap = 0;
1c79356b 9952
1c79356b 9953
91447636
A
9954 vm_map_lock(target_map);
9955 old_object = (vm_named_entry_t)old_region->ip_kobject;
9956 new_object = (vm_named_entry_t)new_region->ip_kobject;
9957 if((!old_object->is_sub_map) || (!new_object->is_sub_map)) {
9958 vm_map_unlock(target_map);
9959 return KERN_INVALID_ARGUMENT;
9960 }
9961 old_submap = (vm_map_t)old_object->backing.map;
9962 new_submap = (vm_map_t)new_object->backing.map;
9963 vm_map_lock(old_submap);
9964 if((old_submap->min_offset != new_submap->min_offset) ||
9965 (old_submap->max_offset != new_submap->max_offset)) {
9966 vm_map_unlock(old_submap);
9967 vm_map_unlock(target_map);
9968 return KERN_INVALID_ARGUMENT;
9969 }
9970 if(!vm_map_lookup_entry(target_map, start, &entry)) {
9971 /* if the src is not contained, the entry preceeds */
9972 /* our range */
9973 addr = entry->vme_start;
9974 if(entry == vm_map_to_entry(target_map)) {
9975 vm_map_unlock(old_submap);
9976 vm_map_unlock(target_map);
9977 return KERN_SUCCESS;
9978 }
9979 }
9980 if ((entry->use_pmap) &&
9981 (new_submap->pmap == NULL)) {
0c530ab8 9982 new_submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
91447636
A
9983 if(new_submap->pmap == PMAP_NULL) {
9984 vm_map_unlock(old_submap);
9985 vm_map_unlock(target_map);
9986 return(KERN_NO_SPACE);
9987 }
9988 }
0c530ab8
A
9989
9990 /*
9991 * Mark the new submap as "mapped", so that we get proper
9992 * cleanup of the sub-pmap when we unmap it.
9993 */
9994 new_submap->mapped = TRUE;
9995
91447636
A
9996 addr = entry->vme_start;
9997 vm_map_reference(old_submap);
9998 while((entry != vm_map_to_entry(target_map)) &&
9999 (entry->vme_start < end)) {
10000 if((entry->is_sub_map) &&
10001 (entry->object.sub_map == old_submap)) {
10002 if(entry->use_pmap) {
10003 if((start & 0x0fffffff) ||
10004 ((end - start) != 0x10000000)) {
10005 vm_map_unlock(old_submap);
10006 vm_map_deallocate(old_submap);
10007 vm_map_unlock(target_map);
10008 return KERN_INVALID_ARGUMENT;
10009 }
10010 nested_pmap = 1;
10011 }
10012 entry->object.sub_map = new_submap;
10013 vm_map_reference(new_submap);
10014 vm_map_deallocate(old_submap);
10015 }
10016 entry = entry->vme_next;
10017 addr = entry->vme_start;
10018 }
10019 if(nested_pmap) {
0c530ab8 10020#ifndef NO_NESTED_PMAP
91447636
A
10021 pmap_unnest(target_map->pmap, (addr64_t)start);
10022 if(target_map->mapped) {
10023 vm_map_submap_pmap_clean(target_map,
10024 start, end, old_submap, 0);
10025 }
10026 pmap_nest(target_map->pmap, new_submap->pmap,
10027 (addr64_t)start, (addr64_t)start,
10028 (uint64_t)(end - start));
0c530ab8 10029#endif /* NO_NESTED_PMAP */
91447636
A
10030 } else {
10031 vm_map_submap_pmap_clean(target_map,
10032 start, end, old_submap, 0);
1c79356b 10033 }
91447636
A
10034 vm_map_unlock(old_submap);
10035 vm_map_deallocate(old_submap);
10036 vm_map_unlock(target_map);
10037 return KERN_SUCCESS;
10038}
10039
10040/*
10041 * vm_map_msync
10042 *
10043 * Synchronises the memory range specified with its backing store
10044 * image by either flushing or cleaning the contents to the appropriate
10045 * memory manager engaging in a memory object synchronize dialog with
10046 * the manager. The client doesn't return until the manager issues
10047 * m_o_s_completed message. MIG Magically converts user task parameter
10048 * to the task's address map.
10049 *
10050 * interpretation of sync_flags
10051 * VM_SYNC_INVALIDATE - discard pages, only return precious
10052 * pages to manager.
10053 *
10054 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
10055 * - discard pages, write dirty or precious
10056 * pages back to memory manager.
10057 *
10058 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
10059 * - write dirty or precious pages back to
10060 * the memory manager.
10061 *
10062 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
10063 * is a hole in the region, and we would
10064 * have returned KERN_SUCCESS, return
10065 * KERN_INVALID_ADDRESS instead.
10066 *
10067 * NOTE
10068 * The memory object attributes have not yet been implemented, this
10069 * function will have to deal with the invalidate attribute
10070 *
10071 * RETURNS
10072 * KERN_INVALID_TASK Bad task parameter
10073 * KERN_INVALID_ARGUMENT both sync and async were specified.
10074 * KERN_SUCCESS The usual.
10075 * KERN_INVALID_ADDRESS There was a hole in the region.
10076 */
10077
10078kern_return_t
10079vm_map_msync(
10080 vm_map_t map,
10081 vm_map_address_t address,
10082 vm_map_size_t size,
10083 vm_sync_t sync_flags)
10084{
10085 msync_req_t msr;
10086 msync_req_t new_msr;
10087 queue_chain_t req_q; /* queue of requests for this msync */
10088 vm_map_entry_t entry;
10089 vm_map_size_t amount_left;
10090 vm_object_offset_t offset;
10091 boolean_t do_sync_req;
10092 boolean_t modifiable;
10093 boolean_t had_hole = FALSE;
10094
10095 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
10096 (sync_flags & VM_SYNC_SYNCHRONOUS))
10097 return(KERN_INVALID_ARGUMENT);
1c79356b
A
10098
10099 /*
91447636 10100 * align address and size on page boundaries
1c79356b 10101 */
91447636
A
10102 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
10103 address = vm_map_trunc_page(address);
1c79356b 10104
91447636
A
10105 if (map == VM_MAP_NULL)
10106 return(KERN_INVALID_TASK);
1c79356b 10107
91447636
A
10108 if (size == 0)
10109 return(KERN_SUCCESS);
1c79356b 10110
91447636
A
10111 queue_init(&req_q);
10112 amount_left = size;
1c79356b 10113
91447636
A
10114 while (amount_left > 0) {
10115 vm_object_size_t flush_size;
10116 vm_object_t object;
1c79356b 10117
91447636
A
10118 vm_map_lock(map);
10119 if (!vm_map_lookup_entry(map,
10120 vm_map_trunc_page(address), &entry)) {
10121
10122 vm_size_t skip;
10123
10124 /*
10125 * hole in the address map.
10126 */
10127 had_hole = TRUE;
10128
10129 /*
10130 * Check for empty map.
10131 */
10132 if (entry == vm_map_to_entry(map) &&
10133 entry->vme_next == entry) {
10134 vm_map_unlock(map);
10135 break;
10136 }
10137 /*
10138 * Check that we don't wrap and that
10139 * we have at least one real map entry.
10140 */
10141 if ((map->hdr.nentries == 0) ||
10142 (entry->vme_next->vme_start < address)) {
10143 vm_map_unlock(map);
10144 break;
10145 }
10146 /*
10147 * Move up to the next entry if needed
10148 */
10149 skip = (entry->vme_next->vme_start - address);
10150 if (skip >= amount_left)
10151 amount_left = 0;
10152 else
10153 amount_left -= skip;
10154 address = entry->vme_next->vme_start;
10155 vm_map_unlock(map);
10156 continue;
10157 }
1c79356b 10158
91447636 10159 offset = address - entry->vme_start;
1c79356b 10160
91447636
A
10161 /*
10162 * do we have more to flush than is contained in this
10163 * entry ?
10164 */
10165 if (amount_left + entry->vme_start + offset > entry->vme_end) {
10166 flush_size = entry->vme_end -
10167 (entry->vme_start + offset);
10168 } else {
10169 flush_size = amount_left;
10170 }
10171 amount_left -= flush_size;
10172 address += flush_size;
1c79356b 10173
91447636
A
10174 if (entry->is_sub_map == TRUE) {
10175 vm_map_t local_map;
10176 vm_map_offset_t local_offset;
1c79356b 10177
91447636
A
10178 local_map = entry->object.sub_map;
10179 local_offset = entry->offset;
10180 vm_map_unlock(map);
10181 if (vm_map_msync(
10182 local_map,
10183 local_offset,
10184 flush_size,
10185 sync_flags) == KERN_INVALID_ADDRESS) {
10186 had_hole = TRUE;
10187 }
10188 continue;
10189 }
10190 object = entry->object.vm_object;
1c79356b 10191
91447636
A
10192 /*
10193 * We can't sync this object if the object has not been
10194 * created yet
10195 */
10196 if (object == VM_OBJECT_NULL) {
10197 vm_map_unlock(map);
10198 continue;
10199 }
10200 offset += entry->offset;
10201 modifiable = (entry->protection & VM_PROT_WRITE)
10202 != VM_PROT_NONE;
1c79356b 10203
91447636 10204 vm_object_lock(object);
1c79356b 10205
91447636
A
10206 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
10207 boolean_t kill_pages = 0;
10208
10209 if (sync_flags & VM_SYNC_KILLPAGES) {
10210 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
10211 kill_pages = 1;
10212 else
10213 kill_pages = -1;
10214 }
10215 if (kill_pages != -1)
10216 vm_object_deactivate_pages(object, offset,
10217 (vm_object_size_t)flush_size, kill_pages);
10218 vm_object_unlock(object);
10219 vm_map_unlock(map);
10220 continue;
1c79356b 10221 }
91447636
A
10222 /*
10223 * We can't sync this object if there isn't a pager.
10224 * Don't bother to sync internal objects, since there can't
10225 * be any "permanent" storage for these objects anyway.
10226 */
10227 if ((object->pager == MEMORY_OBJECT_NULL) ||
10228 (object->internal) || (object->private)) {
10229 vm_object_unlock(object);
10230 vm_map_unlock(map);
10231 continue;
10232 }
10233 /*
10234 * keep reference on the object until syncing is done
10235 */
10236 assert(object->ref_count > 0);
10237 object->ref_count++;
10238 vm_object_res_reference(object);
10239 vm_object_unlock(object);
1c79356b 10240
91447636 10241 vm_map_unlock(map);
1c79356b 10242
91447636
A
10243 do_sync_req = vm_object_sync(object,
10244 offset,
10245 flush_size,
10246 sync_flags & VM_SYNC_INVALIDATE,
10247 (modifiable &&
10248 (sync_flags & VM_SYNC_SYNCHRONOUS ||
10249 sync_flags & VM_SYNC_ASYNCHRONOUS)),
10250 sync_flags & VM_SYNC_SYNCHRONOUS);
10251 /*
10252 * only send a m_o_s if we returned pages or if the entry
10253 * is writable (ie dirty pages may have already been sent back)
10254 */
10255 if (!do_sync_req && !modifiable) {
10256 vm_object_deallocate(object);
10257 continue;
1c79356b 10258 }
91447636 10259 msync_req_alloc(new_msr);
1c79356b 10260
91447636
A
10261 vm_object_lock(object);
10262 offset += object->paging_offset;
1c79356b 10263
91447636
A
10264 new_msr->offset = offset;
10265 new_msr->length = flush_size;
10266 new_msr->object = object;
10267 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
10268re_iterate:
10269 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
10270 /*
10271 * need to check for overlapping entry, if found, wait
10272 * on overlapping msr to be done, then reiterate
10273 */
10274 msr_lock(msr);
10275 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
10276 ((offset >= msr->offset &&
10277 offset < (msr->offset + msr->length)) ||
10278 (msr->offset >= offset &&
10279 msr->offset < (offset + flush_size))))
10280 {
10281 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
10282 msr_unlock(msr);
10283 vm_object_unlock(object);
10284 thread_block(THREAD_CONTINUE_NULL);
10285 vm_object_lock(object);
10286 goto re_iterate;
10287 }
10288 msr_unlock(msr);
10289 }/* queue_iterate */
1c79356b 10290
91447636
A
10291 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
10292 vm_object_unlock(object);
1c79356b 10293
91447636
A
10294 queue_enter(&req_q, new_msr, msync_req_t, req_q);
10295
10296 (void) memory_object_synchronize(
10297 object->pager,
10298 offset,
10299 flush_size,
10300 sync_flags & ~VM_SYNC_CONTIGUOUS);
10301 }/* while */
10302
10303 /*
10304 * wait for memory_object_sychronize_completed messages from pager(s)
10305 */
10306
10307 while (!queue_empty(&req_q)) {
10308 msr = (msync_req_t)queue_first(&req_q);
10309 msr_lock(msr);
10310 while(msr->flag != VM_MSYNC_DONE) {
10311 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
10312 msr_unlock(msr);
10313 thread_block(THREAD_CONTINUE_NULL);
10314 msr_lock(msr);
10315 }/* while */
10316 queue_remove(&req_q, msr, msync_req_t, req_q);
10317 msr_unlock(msr);
10318 vm_object_deallocate(msr->object);
10319 msync_req_free(msr);
10320 }/* queue_iterate */
10321
10322 /* for proper msync() behaviour */
10323 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
10324 return(KERN_INVALID_ADDRESS);
10325
10326 return(KERN_SUCCESS);
10327}/* vm_msync */
1c79356b
A
10328
10329/* Takes existing source and destination sub-maps and clones the contents of */
10330/* the source map */
1c79356b
A
10331kern_return_t
10332vm_region_clone(
10333 ipc_port_t src_region,
10334 ipc_port_t dst_region)
10335{
10336 vm_named_entry_t src_object;
10337 vm_named_entry_t dst_object;
10338 vm_map_t src_map;
10339 vm_map_t dst_map;
91447636
A
10340 vm_map_offset_t addr;
10341 vm_map_offset_t max_off;
1c79356b
A
10342 vm_map_entry_t entry;
10343 vm_map_entry_t new_entry;
10344 vm_map_entry_t insert_point;
10345
10346 src_object = (vm_named_entry_t)src_region->ip_kobject;
10347 dst_object = (vm_named_entry_t)dst_region->ip_kobject;
10348 if((!src_object->is_sub_map) || (!dst_object->is_sub_map)) {
10349 return KERN_INVALID_ARGUMENT;
10350 }
10351 src_map = (vm_map_t)src_object->backing.map;
10352 dst_map = (vm_map_t)dst_object->backing.map;
10353 /* destination map is assumed to be unavailable to any other */
10354 /* activity. i.e. it is new */
10355 vm_map_lock(src_map);
10356 if((src_map->min_offset != dst_map->min_offset)
10357 || (src_map->max_offset != dst_map->max_offset)) {
10358 vm_map_unlock(src_map);
10359 return KERN_INVALID_ARGUMENT;
10360 }
10361 addr = src_map->min_offset;
10362 vm_map_lookup_entry(dst_map, addr, &entry);
10363 if(entry == vm_map_to_entry(dst_map)) {
10364 entry = entry->vme_next;
10365 }
10366 if(entry == vm_map_to_entry(dst_map)) {
10367 max_off = src_map->max_offset;
10368 } else {
10369 max_off = entry->vme_start;
10370 }
10371 vm_map_lookup_entry(src_map, addr, &entry);
10372 if(entry == vm_map_to_entry(src_map)) {
10373 entry = entry->vme_next;
10374 }
10375 vm_map_lookup_entry(dst_map, addr, &insert_point);
10376 while((entry != vm_map_to_entry(src_map)) &&
10377 (entry->vme_end <= max_off)) {
10378 addr = entry->vme_start;
10379 new_entry = vm_map_entry_create(dst_map);
10380 vm_map_entry_copy(new_entry, entry);
10381 vm_map_entry_link(dst_map, insert_point, new_entry);
10382 insert_point = new_entry;
10383 if (entry->object.vm_object != VM_OBJECT_NULL) {
10384 if (new_entry->is_sub_map) {
10385 vm_map_reference(new_entry->object.sub_map);
10386 } else {
10387 vm_object_reference(
10388 new_entry->object.vm_object);
10389 }
10390 }
10391 dst_map->size += new_entry->vme_end - new_entry->vme_start;
10392 entry = entry->vme_next;
10393 }
10394 vm_map_unlock(src_map);
10395 return KERN_SUCCESS;
10396}
10397
10398/*
91447636
A
10399 * Routine: convert_port_entry_to_map
10400 * Purpose:
10401 * Convert from a port specifying an entry or a task
10402 * to a map. Doesn't consume the port ref; produces a map ref,
10403 * which may be null. Unlike convert_port_to_map, the
10404 * port may be task or a named entry backed.
10405 * Conditions:
10406 * Nothing locked.
1c79356b 10407 */
1c79356b 10408
1c79356b 10409
91447636
A
10410vm_map_t
10411convert_port_entry_to_map(
10412 ipc_port_t port)
10413{
10414 vm_map_t map;
10415 vm_named_entry_t named_entry;
1c79356b 10416
91447636
A
10417 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
10418 while(TRUE) {
10419 ip_lock(port);
10420 if(ip_active(port) && (ip_kotype(port)
10421 == IKOT_NAMED_ENTRY)) {
10422 named_entry =
10423 (vm_named_entry_t)port->ip_kobject;
10424 if (!(mutex_try(&(named_entry)->Lock))) {
10425 ip_unlock(port);
10426 mutex_pause();
10427 continue;
10428 }
10429 named_entry->ref_count++;
10430 mutex_unlock(&(named_entry)->Lock);
10431 ip_unlock(port);
10432 if ((named_entry->is_sub_map) &&
10433 (named_entry->protection
10434 & VM_PROT_WRITE)) {
10435 map = named_entry->backing.map;
10436 } else {
10437 mach_destroy_memory_entry(port);
10438 return VM_MAP_NULL;
10439 }
10440 vm_map_reference_swap(map);
10441 mach_destroy_memory_entry(port);
10442 break;
10443 }
10444 else
10445 return VM_MAP_NULL;
10446 }
1c79356b 10447 }
91447636
A
10448 else
10449 map = convert_port_to_map(port);
1c79356b 10450
91447636
A
10451 return map;
10452}
1c79356b 10453
91447636
A
10454/*
10455 * Routine: convert_port_entry_to_object
10456 * Purpose:
10457 * Convert from a port specifying a named entry to an
10458 * object. Doesn't consume the port ref; produces a map ref,
10459 * which may be null.
10460 * Conditions:
10461 * Nothing locked.
10462 */
1c79356b 10463
1c79356b 10464
91447636
A
10465vm_object_t
10466convert_port_entry_to_object(
10467 ipc_port_t port)
10468{
10469 vm_object_t object;
10470 vm_named_entry_t named_entry;
1c79356b 10471
91447636
A
10472 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
10473 while(TRUE) {
10474 ip_lock(port);
10475 if(ip_active(port) && (ip_kotype(port)
10476 == IKOT_NAMED_ENTRY)) {
10477 named_entry =
10478 (vm_named_entry_t)port->ip_kobject;
10479 if (!(mutex_try(&(named_entry)->Lock))) {
10480 ip_unlock(port);
10481 mutex_pause();
10482 continue;
10483 }
10484 named_entry->ref_count++;
10485 mutex_unlock(&(named_entry)->Lock);
10486 ip_unlock(port);
10487 if ((!named_entry->is_sub_map) &&
10488 (!named_entry->is_pager) &&
10489 (named_entry->protection
10490 & VM_PROT_WRITE)) {
10491 object = named_entry->backing.object;
10492 } else {
10493 mach_destroy_memory_entry(port);
10494 return (vm_object_t)NULL;
10495 }
10496 vm_object_reference(named_entry->backing.object);
10497 mach_destroy_memory_entry(port);
10498 break;
10499 }
10500 else
10501 return (vm_object_t)NULL;
1c79356b 10502 }
91447636
A
10503 } else {
10504 return (vm_object_t)NULL;
1c79356b 10505 }
91447636
A
10506
10507 return object;
1c79356b 10508}
9bccf70c
A
10509
10510/*
91447636
A
10511 * Export routines to other components for the things we access locally through
10512 * macros.
9bccf70c 10513 */
91447636
A
10514#undef current_map
10515vm_map_t
10516current_map(void)
9bccf70c 10517{
91447636 10518 return (current_map_fast());
9bccf70c
A
10519}
10520
10521/*
10522 * vm_map_reference:
10523 *
10524 * Most code internal to the osfmk will go through a
10525 * macro defining this. This is always here for the
10526 * use of other kernel components.
10527 */
10528#undef vm_map_reference
10529void
10530vm_map_reference(
10531 register vm_map_t map)
10532{
10533 if (map == VM_MAP_NULL)
10534 return;
10535
10536 mutex_lock(&map->s_lock);
10537#if TASK_SWAPPER
10538 assert(map->res_count > 0);
10539 assert(map->ref_count >= map->res_count);
10540 map->res_count++;
10541#endif
10542 map->ref_count++;
10543 mutex_unlock(&map->s_lock);
10544}
10545
10546/*
10547 * vm_map_deallocate:
10548 *
10549 * Removes a reference from the specified map,
10550 * destroying it if no references remain.
10551 * The map should not be locked.
10552 */
10553void
10554vm_map_deallocate(
10555 register vm_map_t map)
10556{
10557 unsigned int ref;
10558
10559 if (map == VM_MAP_NULL)
10560 return;
10561
10562 mutex_lock(&map->s_lock);
10563 ref = --map->ref_count;
10564 if (ref > 0) {
10565 vm_map_res_deallocate(map);
10566 mutex_unlock(&map->s_lock);
10567 return;
10568 }
10569 assert(map->ref_count == 0);
10570 mutex_unlock(&map->s_lock);
10571
10572#if TASK_SWAPPER
10573 /*
10574 * The map residence count isn't decremented here because
10575 * the vm_map_delete below will traverse the entire map,
10576 * deleting entries, and the residence counts on objects
10577 * and sharing maps will go away then.
10578 */
10579#endif
10580
10581 vm_map_destroy(map);
10582}
91447636 10583
91447636
A
10584
10585/* LP64todo - this whole mechanism is temporary. It should be redone when
10586 * the pmap layer can handle 64-bit address spaces. Until then, we trump
10587 * up a map entry for the 64-bit commpage above the map's max_offset.
10588 */
10589extern vm_map_t com_region_map64; /* the submap for 64-bit commpage */
0c530ab8
A
10590extern vm_map_t com_region_map32; /* the submap for 32-bit commpage */
10591
91447636 10592
0c530ab8
A
10593static void
10594vm_map_commpage(
10595 vm_map_t user_map,
10596 vm_map_t com_region_map, /* com_region_map32 or com_region_map64 */
10597 vm_map_offset_t base_address,
10598 vm_map_size_t size)
91447636
A
10599{
10600 vm_map_entry_t entry;
10601 vm_object_t object;
10602
0c530ab8 10603 vm_map_lock(user_map);
91447636
A
10604
10605 /* The commpage is necessarily the last entry in the map.
10606 * See if one is already there (not sure if this can happen???)
10607 */
0c530ab8
A
10608 entry = vm_map_last_entry(user_map);
10609 if (entry != vm_map_to_entry(user_map)) {
10610 if (entry->vme_end >= base_address) {
10611 vm_map_unlock(user_map);
91447636
A
10612 return;
10613 }
10614 }
10615
0c530ab8 10616 entry = vm_map_first_entry(com_region_map);
91447636
A
10617 object = entry->object.vm_object;
10618 vm_object_reference(object);
0c530ab8 10619
91447636
A
10620 /* We bypass vm_map_enter() because we are adding the entry past the
10621 * map's max_offset.
10622 */
10623 entry = vm_map_entry_insert(
0c530ab8
A
10624 user_map,
10625 vm_map_last_entry(user_map), /* insert after last entry */
10626 base_address,
10627 base_address + size,
91447636
A
10628 object,
10629 0, /* offset */
10630 FALSE, /* needs_copy */
10631 FALSE, /* is_shared */
10632 FALSE, /* in_transition */
0c530ab8
A
10633 VM_PROT_READ|VM_PROT_EXECUTE,
10634 VM_PROT_READ|VM_PROT_EXECUTE,
91447636
A
10635 VM_BEHAVIOR_DEFAULT,
10636 VM_INHERIT_NONE,
10637 1 ); /* wired_count */
10638
0c530ab8
A
10639 vm_map_unlock(user_map);
10640}
10641
10642#ifdef __i386__
10643void
10644vm_map_commpage32(
10645 vm_map_t map)
10646{
10647 vm_map_commpage(map,
10648 com_region_map32,
10649 (vm_map_offset_t) (unsigned) _COMM_PAGE32_BASE_ADDRESS,
10650 (vm_map_size_t) (unsigned) _COMM_PAGE32_AREA_USED);
91447636 10651}
0c530ab8
A
10652#endif /* __i386__ */
10653
91447636 10654
5d5c5d0d 10655
0c530ab8
A
10656void
10657vm_map_commpage64(
10658 vm_map_t map)
10659{
10660
10661 vm_map_commpage(map,
10662 com_region_map64,
10663 (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS,
10664 (vm_map_size_t) _COMM_PAGE64_AREA_USED);
10665}
91447636 10666
c0fea474 10667void
0c530ab8 10668vm_map_remove_commpage(
91447636
A
10669 vm_map_t map )
10670{
10671 vm_map_entry_t entry;
91447636
A
10672
10673 while( 1 ) {
10674 vm_map_lock(map);
10675
10676 entry = vm_map_last_entry(map);
0c530ab8 10677
91447636 10678 if ((entry == vm_map_to_entry(map)) ||
0c530ab8 10679 (entry->vme_start < map->max_offset))
91447636
A
10680 break;
10681
10682 /* clearing the wired count isn't strictly correct */
10683 entry->wired_count = 0;
10684 vm_map_entry_delete(map,entry);
91447636
A
10685 }
10686
10687 vm_map_unlock(map);
91447636
A
10688}
10689
0c530ab8
A
10690void
10691vm_map_disable_NX(vm_map_t map)
10692{
10693 if (map == NULL)
10694 return;
10695 if (map->pmap == NULL)
10696 return;
10697
10698 pmap_disable_NX(map->pmap);
10699}
10700
10701/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
10702 * more descriptive.
10703 */
10704void
10705vm_map_set_32bit(vm_map_t map)
10706{
10707 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
10708}
10709
10710
10711void
10712vm_map_set_64bit(vm_map_t map)
10713{
10714 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
10715}
10716
10717vm_map_offset_t
10718vm_compute_max_offset(unsigned is64)
10719{
10720 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
10721}
10722
10723boolean_t
10724vm_map_has_4GB_pagezero(vm_map_t map)
10725{
10726 /*
10727 * XXX FBDP
10728 * We should lock the VM map (for read) here but we can get away
10729 * with it for now because there can't really be any race condition:
10730 * the VM map's min_offset is changed only when the VM map is created
10731 * and when the zero page is established (when the binary gets loaded),
10732 * and this routine gets called only when the task terminates and the
10733 * VM map is being torn down, and when a new map is created via
10734 * load_machfile()/execve().
10735 */
10736 return (map->min_offset >= 0x100000000ULL);
10737}
10738
10739void
10740vm_map_set_4GB_pagezero(vm_map_t map)
10741{
10742 pmap_set_4GB_pagezero(map->pmap);
10743}
10744
10745void
10746vm_map_clear_4GB_pagezero(vm_map_t map)
10747{
10748 pmap_clear_4GB_pagezero(map->pmap);
10749}
10750
10751/*
10752 * Raise a VM map's minimum offset.
10753 * To strictly enforce "page zero" reservation.
10754 */
10755kern_return_t
10756vm_map_raise_min_offset(
10757 vm_map_t map,
10758 vm_map_offset_t new_min_offset)
10759{
10760 vm_map_entry_t first_entry;
10761
10762 new_min_offset = vm_map_round_page(new_min_offset);
10763
10764 vm_map_lock(map);
10765
10766 if (new_min_offset < map->min_offset) {
10767 /*
10768 * Can't move min_offset backwards, as that would expose
10769 * a part of the address space that was previously, and for
10770 * possibly good reasons, inaccessible.
10771 */
10772 vm_map_unlock(map);
10773 return KERN_INVALID_ADDRESS;
10774 }
10775
10776 first_entry = vm_map_first_entry(map);
10777 if (first_entry != vm_map_to_entry(map) &&
10778 first_entry->vme_start < new_min_offset) {
10779 /*
10780 * Some memory was already allocated below the new
10781 * minimun offset. It's too late to change it now...
10782 */
10783 vm_map_unlock(map);
10784 return KERN_NO_SPACE;
10785 }
10786
10787 map->min_offset = new_min_offset;
10788
10789 vm_map_unlock(map);
10790
10791 return KERN_SUCCESS;
10792}