]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
2ed4261d837ea681e950da46184d0678e52dfa9e
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * @OSF_COPYRIGHT@
25 */
26 /*
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
30 *
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
36 *
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 *
41 * Carnegie Mellon requests users of this software to return to
42 *
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
47 *
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
50 */
51 /*
52 */
53 /*
54 * File: vm/vm_object.c
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 *
57 * Virtual memory object module.
58 */
59
60 #include <mach_pagemap.h>
61 #include <task_swapper.h>
62
63 #include <mach/mach_types.h>
64 #include <mach/memory_object.h>
65 #include <mach/memory_object_default.h>
66 #include <mach/memory_object_control_server.h>
67 #include <mach/vm_param.h>
68
69 #include <ipc/ipc_types.h>
70 #include <ipc/ipc_port.h>
71
72 #include <kern/kern_types.h>
73 #include <kern/assert.h>
74 #include <kern/lock.h>
75 #include <kern/queue.h>
76 #include <kern/xpr.h>
77 #include <kern/zalloc.h>
78 #include <kern/host.h>
79 #include <kern/host_statistics.h>
80 #include <kern/processor.h>
81 #include <kern/misc_protos.h>
82
83 #include <vm/memory_object.h>
84 #include <vm/vm_fault.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_protos.h>
90
91 /*
92 * Virtual memory objects maintain the actual data
93 * associated with allocated virtual memory. A given
94 * page of memory exists within exactly one object.
95 *
96 * An object is only deallocated when all "references"
97 * are given up.
98 *
99 * Associated with each object is a list of all resident
100 * memory pages belonging to that object; this list is
101 * maintained by the "vm_page" module, but locked by the object's
102 * lock.
103 *
104 * Each object also records the memory object reference
105 * that is used by the kernel to request and write
106 * back data (the memory object, field "pager"), etc...
107 *
108 * Virtual memory objects are allocated to provide
109 * zero-filled memory (vm_allocate) or map a user-defined
110 * memory object into a virtual address space (vm_map).
111 *
112 * Virtual memory objects that refer to a user-defined
113 * memory object are called "permanent", because all changes
114 * made in virtual memory are reflected back to the
115 * memory manager, which may then store it permanently.
116 * Other virtual memory objects are called "temporary",
117 * meaning that changes need be written back only when
118 * necessary to reclaim pages, and that storage associated
119 * with the object can be discarded once it is no longer
120 * mapped.
121 *
122 * A permanent memory object may be mapped into more
123 * than one virtual address space. Moreover, two threads
124 * may attempt to make the first mapping of a memory
125 * object concurrently. Only one thread is allowed to
126 * complete this mapping; all others wait for the
127 * "pager_initialized" field is asserted, indicating
128 * that the first thread has initialized all of the
129 * necessary fields in the virtual memory object structure.
130 *
131 * The kernel relies on a *default memory manager* to
132 * provide backing storage for the zero-filled virtual
133 * memory objects. The pager memory objects associated
134 * with these temporary virtual memory objects are only
135 * requested from the default memory manager when it
136 * becomes necessary. Virtual memory objects
137 * that depend on the default memory manager are called
138 * "internal". The "pager_created" field is provided to
139 * indicate whether these ports have ever been allocated.
140 *
141 * The kernel may also create virtual memory objects to
142 * hold changed pages after a copy-on-write operation.
143 * In this case, the virtual memory object (and its
144 * backing storage -- its memory object) only contain
145 * those pages that have been changed. The "shadow"
146 * field refers to the virtual memory object that contains
147 * the remainder of the contents. The "shadow_offset"
148 * field indicates where in the "shadow" these contents begin.
149 * The "copy" field refers to a virtual memory object
150 * to which changed pages must be copied before changing
151 * this object, in order to implement another form
152 * of copy-on-write optimization.
153 *
154 * The virtual memory object structure also records
155 * the attributes associated with its memory object.
156 * The "pager_ready", "can_persist" and "copy_strategy"
157 * fields represent those attributes. The "cached_list"
158 * field is used in the implementation of the persistence
159 * attribute.
160 *
161 * ZZZ Continue this comment.
162 */
163
164 /* Forward declarations for internal functions. */
165 static kern_return_t vm_object_terminate(
166 vm_object_t object);
167
168 extern void vm_object_remove(
169 vm_object_t object);
170
171 static vm_object_t vm_object_cache_trim(
172 boolean_t called_from_vm_object_deallocate);
173
174 static void vm_object_deactivate_all_pages(
175 vm_object_t object);
176
177 static kern_return_t vm_object_copy_call(
178 vm_object_t src_object,
179 vm_object_offset_t src_offset,
180 vm_object_size_t size,
181 vm_object_t *_result_object);
182
183 static void vm_object_do_collapse(
184 vm_object_t object,
185 vm_object_t backing_object);
186
187 static void vm_object_do_bypass(
188 vm_object_t object,
189 vm_object_t backing_object);
190
191 static void vm_object_release_pager(
192 memory_object_t pager);
193
194 static zone_t vm_object_zone; /* vm backing store zone */
195
196 /*
197 * All wired-down kernel memory belongs to a single virtual
198 * memory object (kernel_object) to avoid wasting data structures.
199 */
200 static struct vm_object kernel_object_store;
201 __private_extern__ vm_object_t kernel_object = &kernel_object_store;
202
203 /*
204 * The submap object is used as a placeholder for vm_map_submap
205 * operations. The object is declared in vm_map.c because it
206 * is exported by the vm_map module. The storage is declared
207 * here because it must be initialized here.
208 */
209 static struct vm_object vm_submap_object_store;
210
211 /*
212 * Virtual memory objects are initialized from
213 * a template (see vm_object_allocate).
214 *
215 * When adding a new field to the virtual memory
216 * object structure, be sure to add initialization
217 * (see _vm_object_allocate()).
218 */
219 static struct vm_object vm_object_template;
220
221 /*
222 * Virtual memory objects that are not referenced by
223 * any address maps, but that are allowed to persist
224 * (an attribute specified by the associated memory manager),
225 * are kept in a queue (vm_object_cached_list).
226 *
227 * When an object from this queue is referenced again,
228 * for example to make another address space mapping,
229 * it must be removed from the queue. That is, the
230 * queue contains *only* objects with zero references.
231 *
232 * The kernel may choose to terminate objects from this
233 * queue in order to reclaim storage. The current policy
234 * is to permit a fixed maximum number of unreferenced
235 * objects (vm_object_cached_max).
236 *
237 * A spin lock (accessed by routines
238 * vm_object_cache_{lock,lock_try,unlock}) governs the
239 * object cache. It must be held when objects are
240 * added to or removed from the cache (in vm_object_terminate).
241 * The routines that acquire a reference to a virtual
242 * memory object based on one of the memory object ports
243 * must also lock the cache.
244 *
245 * Ideally, the object cache should be more isolated
246 * from the reference mechanism, so that the lock need
247 * not be held to make simple references.
248 */
249 static queue_head_t vm_object_cached_list;
250 static int vm_object_cached_count=0;
251 static int vm_object_cached_high; /* highest # cached objects */
252 static int vm_object_cached_max = 512; /* may be patched*/
253
254 static decl_mutex_data(,vm_object_cached_lock_data)
255
256 #define vm_object_cache_lock() \
257 mutex_lock(&vm_object_cached_lock_data)
258 #define vm_object_cache_lock_try() \
259 mutex_try(&vm_object_cached_lock_data)
260 #define vm_object_cache_unlock() \
261 mutex_unlock(&vm_object_cached_lock_data)
262
263 #define VM_OBJECT_HASH_COUNT 1024
264 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
265 static struct zone *vm_object_hash_zone;
266
267 struct vm_object_hash_entry {
268 queue_chain_t hash_link; /* hash chain link */
269 memory_object_t pager; /* pager we represent */
270 vm_object_t object; /* corresponding object */
271 boolean_t waiting; /* someone waiting for
272 * termination */
273 };
274
275 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
276 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
277
278 #define VM_OBJECT_HASH_SHIFT 8
279 #define vm_object_hash(pager) \
280 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
281
282 void vm_object_hash_entry_free(
283 vm_object_hash_entry_t entry);
284
285 /*
286 * vm_object_hash_lookup looks up a pager in the hashtable
287 * and returns the corresponding entry, with optional removal.
288 */
289
290 static vm_object_hash_entry_t
291 vm_object_hash_lookup(
292 memory_object_t pager,
293 boolean_t remove_entry)
294 {
295 register queue_t bucket;
296 register vm_object_hash_entry_t entry;
297
298 bucket = &vm_object_hashtable[vm_object_hash(pager)];
299
300 entry = (vm_object_hash_entry_t)queue_first(bucket);
301 while (!queue_end(bucket, (queue_entry_t)entry)) {
302 if (entry->pager == pager && !remove_entry)
303 return(entry);
304 else if (entry->pager == pager) {
305 queue_remove(bucket, entry,
306 vm_object_hash_entry_t, hash_link);
307 return(entry);
308 }
309
310 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
311 }
312
313 return(VM_OBJECT_HASH_ENTRY_NULL);
314 }
315
316 /*
317 * vm_object_hash_enter enters the specified
318 * pager / cache object association in the hashtable.
319 */
320
321 static void
322 vm_object_hash_insert(
323 vm_object_hash_entry_t entry)
324 {
325 register queue_t bucket;
326
327 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
328
329 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
330 }
331
332 static vm_object_hash_entry_t
333 vm_object_hash_entry_alloc(
334 memory_object_t pager)
335 {
336 vm_object_hash_entry_t entry;
337
338 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
339 entry->pager = pager;
340 entry->object = VM_OBJECT_NULL;
341 entry->waiting = FALSE;
342
343 return(entry);
344 }
345
346 void
347 vm_object_hash_entry_free(
348 vm_object_hash_entry_t entry)
349 {
350 zfree(vm_object_hash_zone, entry);
351 }
352
353 /*
354 * vm_object_allocate:
355 *
356 * Returns a new object with the given size.
357 */
358
359 __private_extern__ void
360 _vm_object_allocate(
361 vm_object_size_t size,
362 vm_object_t object)
363 {
364 XPR(XPR_VM_OBJECT,
365 "vm_object_allocate, object 0x%X size 0x%X\n",
366 (integer_t)object, size, 0,0,0);
367
368 *object = vm_object_template;
369 queue_init(&object->memq);
370 queue_init(&object->msr_q);
371 #ifdef UPL_DEBUG
372 queue_init(&object->uplq);
373 #endif /* UPL_DEBUG */
374 vm_object_lock_init(object);
375 object->size = size;
376 }
377
378 __private_extern__ vm_object_t
379 vm_object_allocate(
380 vm_object_size_t size)
381 {
382 register vm_object_t object;
383
384 object = (vm_object_t) zalloc(vm_object_zone);
385
386 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
387
388 if (object != VM_OBJECT_NULL)
389 _vm_object_allocate(size, object);
390
391 return object;
392 }
393
394 /*
395 * vm_object_bootstrap:
396 *
397 * Initialize the VM objects module.
398 */
399 __private_extern__ void
400 vm_object_bootstrap(void)
401 {
402 register int i;
403
404 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
405 round_page_32(512*1024),
406 round_page_32(12*1024),
407 "vm objects");
408
409 queue_init(&vm_object_cached_list);
410 mutex_init(&vm_object_cached_lock_data, 0);
411
412 vm_object_hash_zone =
413 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
414 round_page_32(512*1024),
415 round_page_32(12*1024),
416 "vm object hash entries");
417
418 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
419 queue_init(&vm_object_hashtable[i]);
420
421 /*
422 * Fill in a template object, for quick initialization
423 */
424
425 /* memq; Lock; init after allocation */
426 vm_object_template.size = 0;
427 vm_object_template.memq_hint = VM_PAGE_NULL;
428 vm_object_template.ref_count = 1;
429 #if TASK_SWAPPER
430 vm_object_template.res_count = 1;
431 #endif /* TASK_SWAPPER */
432 vm_object_template.resident_page_count = 0;
433 vm_object_template.copy = VM_OBJECT_NULL;
434 vm_object_template.shadow = VM_OBJECT_NULL;
435 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
436 vm_object_template.cow_hint = ~(vm_offset_t)0;
437 vm_object_template.true_share = FALSE;
438
439 vm_object_template.pager = MEMORY_OBJECT_NULL;
440 vm_object_template.paging_offset = 0;
441 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
442 /* msr_q; init after allocation */
443
444 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
445 vm_object_template.absent_count = 0;
446 vm_object_template.paging_in_progress = 0;
447
448 /* Begin bitfields */
449 vm_object_template.all_wanted = 0; /* all bits FALSE */
450 vm_object_template.pager_created = FALSE;
451 vm_object_template.pager_initialized = FALSE;
452 vm_object_template.pager_ready = FALSE;
453 vm_object_template.pager_trusted = FALSE;
454 vm_object_template.can_persist = FALSE;
455 vm_object_template.internal = TRUE;
456 vm_object_template.temporary = TRUE;
457 vm_object_template.private = FALSE;
458 vm_object_template.pageout = FALSE;
459 vm_object_template.alive = TRUE;
460 vm_object_template.purgable = VM_OBJECT_NONPURGABLE;
461 vm_object_template.silent_overwrite = FALSE;
462 vm_object_template.advisory_pageout = FALSE;
463 vm_object_template.shadowed = FALSE;
464 vm_object_template.terminating = FALSE;
465 vm_object_template.shadow_severed = FALSE;
466 vm_object_template.phys_contiguous = FALSE;
467 vm_object_template.nophyscache = FALSE;
468 /* End bitfields */
469
470 /* cache bitfields */
471 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
472
473 /* cached_list; init after allocation */
474 vm_object_template.last_alloc = (vm_object_offset_t) 0;
475 vm_object_template.cluster_size = 0;
476 #if MACH_PAGEMAP
477 vm_object_template.existence_map = VM_EXTERNAL_NULL;
478 #endif /* MACH_PAGEMAP */
479 #if MACH_ASSERT
480 vm_object_template.paging_object = VM_OBJECT_NULL;
481 #endif /* MACH_ASSERT */
482
483 /*
484 * Initialize the "kernel object"
485 */
486
487 kernel_object = &kernel_object_store;
488
489 /*
490 * Note that in the following size specifications, we need to add 1 because
491 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
492 */
493
494 #ifdef ppc
495 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
496 kernel_object);
497 #else
498 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
499 kernel_object);
500 #endif
501 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
502
503 /*
504 * Initialize the "submap object". Make it as large as the
505 * kernel object so that no limit is imposed on submap sizes.
506 */
507
508 vm_submap_object = &vm_submap_object_store;
509 #ifdef ppc
510 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
511 vm_submap_object);
512 #else
513 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
514 vm_submap_object);
515 #endif
516 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
517
518 /*
519 * Create an "extra" reference to this object so that we never
520 * try to deallocate it; zfree doesn't like to be called with
521 * non-zone memory.
522 */
523 vm_object_reference(vm_submap_object);
524
525 #if MACH_PAGEMAP
526 vm_external_module_initialize();
527 #endif /* MACH_PAGEMAP */
528 }
529
530 __private_extern__ void
531 vm_object_init(void)
532 {
533 /*
534 * Finish initializing the kernel object.
535 */
536 }
537
538 /* remove the typedef below when emergency work-around is taken out */
539 typedef struct vnode_pager {
540 memory_object_t pager;
541 memory_object_t pager_handle; /* pager */
542 memory_object_control_t control_handle; /* memory object's control handle */
543 void *vnode_handle; /* vnode handle */
544 } *vnode_pager_t;
545
546 #define MIGHT_NOT_CACHE_SHADOWS 1
547 #if MIGHT_NOT_CACHE_SHADOWS
548 static int cache_shadows = TRUE;
549 #endif /* MIGHT_NOT_CACHE_SHADOWS */
550
551 /*
552 * vm_object_deallocate:
553 *
554 * Release a reference to the specified object,
555 * gained either through a vm_object_allocate
556 * or a vm_object_reference call. When all references
557 * are gone, storage associated with this object
558 * may be relinquished.
559 *
560 * No object may be locked.
561 */
562 __private_extern__ void
563 vm_object_deallocate(
564 register vm_object_t object)
565 {
566 boolean_t retry_cache_trim = FALSE;
567 vm_object_t shadow = VM_OBJECT_NULL;
568
569 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
570 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
571
572
573 while (object != VM_OBJECT_NULL) {
574
575 /*
576 * The cache holds a reference (uncounted) to
577 * the object; we must lock it before removing
578 * the object.
579 */
580 for (;;) {
581 vm_object_cache_lock();
582
583 /*
584 * if we try to take a regular lock here
585 * we risk deadlocking against someone
586 * holding a lock on this object while
587 * trying to vm_object_deallocate a different
588 * object
589 */
590 if (vm_object_lock_try(object))
591 break;
592 vm_object_cache_unlock();
593 mutex_pause(); /* wait a bit */
594 }
595 assert(object->ref_count > 0);
596
597 /*
598 * If the object has a named reference, and only
599 * that reference would remain, inform the pager
600 * about the last "mapping" reference going away.
601 */
602 if ((object->ref_count == 2) && (object->named)) {
603 memory_object_t pager = object->pager;
604
605 /* Notify the Pager that there are no */
606 /* more mappers for this object */
607
608 if (pager != MEMORY_OBJECT_NULL) {
609 vm_object_unlock(object);
610 vm_object_cache_unlock();
611
612 memory_object_unmap(pager);
613
614 for (;;) {
615 vm_object_cache_lock();
616
617 /*
618 * if we try to take a regular lock here
619 * we risk deadlocking against someone
620 * holding a lock on this object while
621 * trying to vm_object_deallocate a different
622 * object
623 */
624 if (vm_object_lock_try(object))
625 break;
626 vm_object_cache_unlock();
627 mutex_pause(); /* wait a bit */
628 }
629 assert(object->ref_count > 0);
630 }
631 }
632
633 /*
634 * Lose the reference. If other references
635 * remain, then we are done, unless we need
636 * to retry a cache trim.
637 * If it is the last reference, then keep it
638 * until any pending initialization is completed.
639 */
640
641 /* if the object is terminating, it cannot go into */
642 /* the cache and we obviously should not call */
643 /* terminate again. */
644
645 if ((object->ref_count > 1) || object->terminating) {
646 object->ref_count--;
647 vm_object_res_deallocate(object);
648 vm_object_cache_unlock();
649
650 if (object->ref_count == 1 &&
651 object->shadow != VM_OBJECT_NULL) {
652 /*
653 * We don't use this VM object anymore. We
654 * would like to collapse it into its parent(s),
655 * but we don't have any pointers back to these
656 * parent object(s).
657 * But we can try and collapse this object with
658 * its own shadows, in case these are useless
659 * too...
660 */
661 vm_object_collapse(object, 0);
662 }
663
664 vm_object_unlock(object);
665 if (retry_cache_trim &&
666 ((object = vm_object_cache_trim(TRUE)) !=
667 VM_OBJECT_NULL)) {
668 continue;
669 }
670 return;
671 }
672
673 /*
674 * We have to wait for initialization
675 * before destroying or caching the object.
676 */
677
678 if (object->pager_created && ! object->pager_initialized) {
679 assert(! object->can_persist);
680 vm_object_assert_wait(object,
681 VM_OBJECT_EVENT_INITIALIZED,
682 THREAD_UNINT);
683 vm_object_unlock(object);
684 vm_object_cache_unlock();
685 thread_block(THREAD_CONTINUE_NULL);
686 continue;
687 }
688
689 /*
690 * If this object can persist, then enter it in
691 * the cache. Otherwise, terminate it.
692 *
693 * NOTE: Only permanent objects are cached, and
694 * permanent objects cannot have shadows. This
695 * affects the residence counting logic in a minor
696 * way (can do it in-line, mostly).
697 */
698
699 if ((object->can_persist) && (object->alive)) {
700 /*
701 * Now it is safe to decrement reference count,
702 * and to return if reference count is > 0.
703 */
704 if (--object->ref_count > 0) {
705 vm_object_res_deallocate(object);
706 vm_object_unlock(object);
707 vm_object_cache_unlock();
708 if (retry_cache_trim &&
709 ((object = vm_object_cache_trim(TRUE)) !=
710 VM_OBJECT_NULL)) {
711 continue;
712 }
713 return;
714 }
715
716 #if MIGHT_NOT_CACHE_SHADOWS
717 /*
718 * Remove shadow now if we don't
719 * want to cache shadows.
720 */
721 if (! cache_shadows) {
722 shadow = object->shadow;
723 object->shadow = VM_OBJECT_NULL;
724 }
725 #endif /* MIGHT_NOT_CACHE_SHADOWS */
726
727 /*
728 * Enter the object onto the queue of
729 * cached objects, and deactivate
730 * all of its pages.
731 */
732 assert(object->shadow == VM_OBJECT_NULL);
733 VM_OBJ_RES_DECR(object);
734 XPR(XPR_VM_OBJECT,
735 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
736 (integer_t)object,
737 (integer_t)vm_object_cached_list.next,
738 (integer_t)vm_object_cached_list.prev,0,0);
739
740 vm_object_cached_count++;
741 if (vm_object_cached_count > vm_object_cached_high)
742 vm_object_cached_high = vm_object_cached_count;
743 queue_enter(&vm_object_cached_list, object,
744 vm_object_t, cached_list);
745 vm_object_cache_unlock();
746 vm_object_deactivate_all_pages(object);
747 vm_object_unlock(object);
748
749 #if MIGHT_NOT_CACHE_SHADOWS
750 /*
751 * If we have a shadow that we need
752 * to deallocate, do so now, remembering
753 * to trim the cache later.
754 */
755 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
756 object = shadow;
757 retry_cache_trim = TRUE;
758 continue;
759 }
760 #endif /* MIGHT_NOT_CACHE_SHADOWS */
761
762 /*
763 * Trim the cache. If the cache trim
764 * returns with a shadow for us to deallocate,
765 * then remember to retry the cache trim
766 * when we are done deallocating the shadow.
767 * Otherwise, we are done.
768 */
769
770 object = vm_object_cache_trim(TRUE);
771 if (object == VM_OBJECT_NULL) {
772 return;
773 }
774 retry_cache_trim = TRUE;
775
776 } else {
777 /*
778 * This object is not cachable; terminate it.
779 */
780 XPR(XPR_VM_OBJECT,
781 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
782 (integer_t)object, object->resident_page_count,
783 object->paging_in_progress,
784 (void *)current_thread(),object->ref_count);
785
786 VM_OBJ_RES_DECR(object); /* XXX ? */
787 /*
788 * Terminate this object. If it had a shadow,
789 * then deallocate it; otherwise, if we need
790 * to retry a cache trim, do so now; otherwise,
791 * we are done. "pageout" objects have a shadow,
792 * but maintain a "paging reference" rather than
793 * a normal reference.
794 */
795 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
796 if(vm_object_terminate(object) != KERN_SUCCESS) {
797 return;
798 }
799 if (shadow != VM_OBJECT_NULL) {
800 object = shadow;
801 continue;
802 }
803 if (retry_cache_trim &&
804 ((object = vm_object_cache_trim(TRUE)) !=
805 VM_OBJECT_NULL)) {
806 continue;
807 }
808 return;
809 }
810 }
811 assert(! retry_cache_trim);
812 }
813
814 /*
815 * Check to see whether we really need to trim
816 * down the cache. If so, remove an object from
817 * the cache, terminate it, and repeat.
818 *
819 * Called with, and returns with, cache lock unlocked.
820 */
821 vm_object_t
822 vm_object_cache_trim(
823 boolean_t called_from_vm_object_deallocate)
824 {
825 register vm_object_t object = VM_OBJECT_NULL;
826 vm_object_t shadow;
827
828 for (;;) {
829
830 /*
831 * If we no longer need to trim the cache,
832 * then we are done.
833 */
834
835 vm_object_cache_lock();
836 if (vm_object_cached_count <= vm_object_cached_max) {
837 vm_object_cache_unlock();
838 return VM_OBJECT_NULL;
839 }
840
841 /*
842 * We must trim down the cache, so remove
843 * the first object in the cache.
844 */
845 XPR(XPR_VM_OBJECT,
846 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
847 (integer_t)vm_object_cached_list.next,
848 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
849
850 object = (vm_object_t) queue_first(&vm_object_cached_list);
851 if(object == (vm_object_t) &vm_object_cached_list) {
852 /* something's wrong with the calling parameter or */
853 /* the value of vm_object_cached_count, just fix */
854 /* and return */
855 if(vm_object_cached_max < 0)
856 vm_object_cached_max = 0;
857 vm_object_cached_count = 0;
858 vm_object_cache_unlock();
859 return VM_OBJECT_NULL;
860 }
861 vm_object_lock(object);
862 queue_remove(&vm_object_cached_list, object, vm_object_t,
863 cached_list);
864 vm_object_cached_count--;
865
866 /*
867 * Since this object is in the cache, we know
868 * that it is initialized and has no references.
869 * Take a reference to avoid recursive deallocations.
870 */
871
872 assert(object->pager_initialized);
873 assert(object->ref_count == 0);
874 object->ref_count++;
875
876 /*
877 * Terminate the object.
878 * If the object had a shadow, we let vm_object_deallocate
879 * deallocate it. "pageout" objects have a shadow, but
880 * maintain a "paging reference" rather than a normal
881 * reference.
882 * (We are careful here to limit recursion.)
883 */
884 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
885 if(vm_object_terminate(object) != KERN_SUCCESS)
886 continue;
887 if (shadow != VM_OBJECT_NULL) {
888 if (called_from_vm_object_deallocate) {
889 return shadow;
890 } else {
891 vm_object_deallocate(shadow);
892 }
893 }
894 }
895 }
896
897 boolean_t vm_object_terminate_remove_all = FALSE;
898
899 /*
900 * Routine: vm_object_terminate
901 * Purpose:
902 * Free all resources associated with a vm_object.
903 * In/out conditions:
904 * Upon entry, the object must be locked,
905 * and the object must have exactly one reference.
906 *
907 * The shadow object reference is left alone.
908 *
909 * The object must be unlocked if its found that pages
910 * must be flushed to a backing object. If someone
911 * manages to map the object while it is being flushed
912 * the object is returned unlocked and unchanged. Otherwise,
913 * upon exit, the cache will be unlocked, and the
914 * object will cease to exist.
915 */
916 static kern_return_t
917 vm_object_terminate(
918 register vm_object_t object)
919 {
920 memory_object_t pager;
921 register vm_page_t p;
922 vm_object_t shadow_object;
923
924 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
925 (integer_t)object, object->ref_count, 0, 0, 0);
926
927 if (!object->pageout && (!object->temporary || object->can_persist)
928 && (object->pager != NULL || object->shadow_severed)) {
929 vm_object_cache_unlock();
930 while (!queue_empty(&object->memq)) {
931 /*
932 * Clear pager_trusted bit so that the pages get yanked
933 * out of the object instead of cleaned in place. This
934 * prevents a deadlock in XMM and makes more sense anyway.
935 */
936 object->pager_trusted = FALSE;
937
938 p = (vm_page_t) queue_first(&object->memq);
939
940 VM_PAGE_CHECK(p);
941
942 if (p->busy || p->cleaning) {
943 if(p->cleaning || p->absent) {
944 vm_object_paging_wait(object, THREAD_UNINT);
945 continue;
946 } else {
947 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
948 }
949 }
950
951 vm_page_lock_queues();
952 p->busy = TRUE;
953 VM_PAGE_QUEUES_REMOVE(p);
954 vm_page_unlock_queues();
955
956 if (p->absent || p->private) {
957
958 /*
959 * For private pages, VM_PAGE_FREE just
960 * leaves the page structure around for
961 * its owner to clean up. For absent
962 * pages, the structure is returned to
963 * the appropriate pool.
964 */
965
966 goto free_page;
967 }
968
969 if (p->fictitious)
970 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
971
972 if (!p->dirty)
973 p->dirty = pmap_is_modified(p->phys_page);
974
975 if ((p->dirty || p->precious) && !p->error && object->alive) {
976 vm_pageout_cluster(p); /* flush page */
977 vm_object_paging_wait(object, THREAD_UNINT);
978 XPR(XPR_VM_OBJECT,
979 "vm_object_terminate restart, object 0x%X ref %d\n",
980 (integer_t)object, object->ref_count, 0, 0, 0);
981 } else {
982 free_page:
983 VM_PAGE_FREE(p);
984 }
985 }
986 vm_object_unlock(object);
987 vm_object_cache_lock();
988 vm_object_lock(object);
989 }
990
991 /*
992 * Make sure the object isn't already being terminated
993 */
994 if(object->terminating) {
995 object->ref_count -= 1;
996 assert(object->ref_count > 0);
997 vm_object_cache_unlock();
998 vm_object_unlock(object);
999 return KERN_FAILURE;
1000 }
1001
1002 /*
1003 * Did somebody get a reference to the object while we were
1004 * cleaning it?
1005 */
1006 if(object->ref_count != 1) {
1007 object->ref_count -= 1;
1008 assert(object->ref_count > 0);
1009 vm_object_res_deallocate(object);
1010 vm_object_cache_unlock();
1011 vm_object_unlock(object);
1012 return KERN_FAILURE;
1013 }
1014
1015 /*
1016 * Make sure no one can look us up now.
1017 */
1018
1019 object->terminating = TRUE;
1020 object->alive = FALSE;
1021 vm_object_remove(object);
1022
1023 /*
1024 * Detach the object from its shadow if we are the shadow's
1025 * copy. The reference we hold on the shadow must be dropped
1026 * by our caller.
1027 */
1028 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1029 !(object->pageout)) {
1030 vm_object_lock(shadow_object);
1031 if (shadow_object->copy == object)
1032 shadow_object->copy = VM_OBJECT_NULL;
1033 vm_object_unlock(shadow_object);
1034 }
1035
1036 /*
1037 * The pageout daemon might be playing with our pages.
1038 * Now that the object is dead, it won't touch any more
1039 * pages, but some pages might already be on their way out.
1040 * Hence, we wait until the active paging activities have ceased
1041 * before we break the association with the pager itself.
1042 */
1043 while (object->paging_in_progress != 0) {
1044 vm_object_cache_unlock();
1045 vm_object_wait(object,
1046 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1047 THREAD_UNINT);
1048 vm_object_cache_lock();
1049 vm_object_lock(object);
1050 }
1051
1052 pager = object->pager;
1053 object->pager = MEMORY_OBJECT_NULL;
1054
1055 if (pager != MEMORY_OBJECT_NULL)
1056 memory_object_control_disable(object->pager_control);
1057 vm_object_cache_unlock();
1058
1059 object->ref_count--;
1060 #if TASK_SWAPPER
1061 assert(object->res_count == 0);
1062 #endif /* TASK_SWAPPER */
1063
1064 assert (object->ref_count == 0);
1065
1066 /*
1067 * Clean or free the pages, as appropriate.
1068 * It is possible for us to find busy/absent pages,
1069 * if some faults on this object were aborted.
1070 */
1071 if (object->pageout) {
1072 assert(shadow_object != VM_OBJECT_NULL);
1073 assert(shadow_object == object->shadow);
1074
1075 vm_pageout_object_terminate(object);
1076
1077 } else if ((object->temporary && !object->can_persist) ||
1078 (pager == MEMORY_OBJECT_NULL)) {
1079 while (!queue_empty(&object->memq)) {
1080 p = (vm_page_t) queue_first(&object->memq);
1081
1082 VM_PAGE_CHECK(p);
1083 VM_PAGE_FREE(p);
1084 }
1085 } else if (!queue_empty(&object->memq)) {
1086 panic("vm_object_terminate: queue just emptied isn't");
1087 }
1088
1089 assert(object->paging_in_progress == 0);
1090 assert(object->ref_count == 0);
1091
1092 /*
1093 * If the pager has not already been released by
1094 * vm_object_destroy, we need to terminate it and
1095 * release our reference to it here.
1096 */
1097 if (pager != MEMORY_OBJECT_NULL) {
1098 vm_object_unlock(object);
1099 vm_object_release_pager(pager);
1100 vm_object_lock(object);
1101 }
1102
1103 /* kick off anyone waiting on terminating */
1104 object->terminating = FALSE;
1105 vm_object_paging_begin(object);
1106 vm_object_paging_end(object);
1107 vm_object_unlock(object);
1108
1109 #if MACH_PAGEMAP
1110 vm_external_destroy(object->existence_map, object->size);
1111 #endif /* MACH_PAGEMAP */
1112
1113 /*
1114 * Free the space for the object.
1115 */
1116 zfree(vm_object_zone, object);
1117 return KERN_SUCCESS;
1118 }
1119
1120 /*
1121 * Routine: vm_object_pager_wakeup
1122 * Purpose: Wake up anyone waiting for termination of a pager.
1123 */
1124
1125 static void
1126 vm_object_pager_wakeup(
1127 memory_object_t pager)
1128 {
1129 vm_object_hash_entry_t entry;
1130 boolean_t waiting = FALSE;
1131
1132 /*
1133 * If anyone was waiting for the memory_object_terminate
1134 * to be queued, wake them up now.
1135 */
1136 vm_object_cache_lock();
1137 entry = vm_object_hash_lookup(pager, TRUE);
1138 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1139 waiting = entry->waiting;
1140 vm_object_cache_unlock();
1141 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1142 if (waiting)
1143 thread_wakeup((event_t) pager);
1144 vm_object_hash_entry_free(entry);
1145 }
1146 }
1147
1148 /*
1149 * Routine: vm_object_release_pager
1150 * Purpose: Terminate the pager and, upon completion,
1151 * release our last reference to it.
1152 * just like memory_object_terminate, except
1153 * that we wake up anyone blocked in vm_object_enter
1154 * waiting for termination message to be queued
1155 * before calling memory_object_init.
1156 */
1157 static void
1158 vm_object_release_pager(
1159 memory_object_t pager)
1160 {
1161
1162 /*
1163 * Terminate the pager.
1164 */
1165
1166 (void) memory_object_terminate(pager);
1167
1168 /*
1169 * Wakeup anyone waiting for this terminate
1170 */
1171 vm_object_pager_wakeup(pager);
1172
1173 /*
1174 * Release reference to pager.
1175 */
1176 memory_object_deallocate(pager);
1177 }
1178
1179 /*
1180 * Routine: vm_object_destroy
1181 * Purpose:
1182 * Shut down a VM object, despite the
1183 * presence of address map (or other) references
1184 * to the vm_object.
1185 */
1186 kern_return_t
1187 vm_object_destroy(
1188 vm_object_t object,
1189 __unused kern_return_t reason)
1190 {
1191 memory_object_t old_pager;
1192
1193 if (object == VM_OBJECT_NULL)
1194 return(KERN_SUCCESS);
1195
1196 /*
1197 * Remove the pager association immediately.
1198 *
1199 * This will prevent the memory manager from further
1200 * meddling. [If it wanted to flush data or make
1201 * other changes, it should have done so before performing
1202 * the destroy call.]
1203 */
1204
1205 vm_object_cache_lock();
1206 vm_object_lock(object);
1207 object->can_persist = FALSE;
1208 object->named = FALSE;
1209 object->alive = FALSE;
1210
1211 /*
1212 * Rip out the pager from the vm_object now...
1213 */
1214
1215 vm_object_remove(object);
1216 old_pager = object->pager;
1217 object->pager = MEMORY_OBJECT_NULL;
1218 if (old_pager != MEMORY_OBJECT_NULL)
1219 memory_object_control_disable(object->pager_control);
1220 vm_object_cache_unlock();
1221
1222 /*
1223 * Wait for the existing paging activity (that got
1224 * through before we nulled out the pager) to subside.
1225 */
1226
1227 vm_object_paging_wait(object, THREAD_UNINT);
1228 vm_object_unlock(object);
1229
1230 /*
1231 * Terminate the object now.
1232 */
1233 if (old_pager != MEMORY_OBJECT_NULL) {
1234 vm_object_release_pager(old_pager);
1235
1236 /*
1237 * JMM - Release the caller's reference. This assumes the
1238 * caller had a reference to release, which is a big (but
1239 * currently valid) assumption if this is driven from the
1240 * vnode pager (it is holding a named reference when making
1241 * this call)..
1242 */
1243 vm_object_deallocate(object);
1244
1245 }
1246 return(KERN_SUCCESS);
1247 }
1248
1249 /*
1250 * vm_object_deactivate_pages
1251 *
1252 * Deactivate all pages in the specified object. (Keep its pages
1253 * in memory even though it is no longer referenced.)
1254 *
1255 * The object must be locked.
1256 */
1257 static void
1258 vm_object_deactivate_all_pages(
1259 register vm_object_t object)
1260 {
1261 register vm_page_t p;
1262
1263 queue_iterate(&object->memq, p, vm_page_t, listq) {
1264 vm_page_lock_queues();
1265 if (!p->busy)
1266 vm_page_deactivate(p);
1267 vm_page_unlock_queues();
1268 }
1269 }
1270
1271 __private_extern__ void
1272 vm_object_deactivate_pages(
1273 vm_object_t object,
1274 vm_object_offset_t offset,
1275 vm_object_size_t size,
1276 boolean_t kill_page)
1277 {
1278 vm_object_t orig_object;
1279 int pages_moved = 0;
1280 int pages_found = 0;
1281
1282 /*
1283 * entered with object lock held, acquire a paging reference to
1284 * prevent the memory_object and control ports from
1285 * being destroyed.
1286 */
1287 orig_object = object;
1288
1289 for (;;) {
1290 register vm_page_t m;
1291 vm_object_offset_t toffset;
1292 vm_object_size_t tsize;
1293
1294 vm_object_paging_begin(object);
1295 vm_page_lock_queues();
1296
1297 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1298
1299 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1300
1301 pages_found++;
1302
1303 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1304
1305 assert(!m->laundry);
1306
1307 m->reference = FALSE;
1308 pmap_clear_reference(m->phys_page);
1309
1310 if ((kill_page) && (object->internal)) {
1311 m->precious = FALSE;
1312 m->dirty = FALSE;
1313 pmap_clear_modify(m->phys_page);
1314 vm_external_state_clr(object->existence_map, offset);
1315 }
1316 VM_PAGE_QUEUES_REMOVE(m);
1317
1318 assert(!m->laundry);
1319 assert(m->object != kernel_object);
1320 assert(m->pageq.next == NULL &&
1321 m->pageq.prev == NULL);
1322 if(m->zero_fill) {
1323 queue_enter_first(
1324 &vm_page_queue_zf,
1325 m, vm_page_t, pageq);
1326 } else {
1327 queue_enter_first(
1328 &vm_page_queue_inactive,
1329 m, vm_page_t, pageq);
1330 }
1331
1332 m->inactive = TRUE;
1333 if (!m->fictitious)
1334 vm_page_inactive_count++;
1335
1336 pages_moved++;
1337 }
1338 }
1339 }
1340 vm_page_unlock_queues();
1341 vm_object_paging_end(object);
1342
1343 if (object->shadow) {
1344 vm_object_t tmp_object;
1345
1346 kill_page = 0;
1347
1348 offset += object->shadow_offset;
1349
1350 tmp_object = object->shadow;
1351 vm_object_lock(tmp_object);
1352
1353 if (object != orig_object)
1354 vm_object_unlock(object);
1355 object = tmp_object;
1356 } else
1357 break;
1358 }
1359 if (object != orig_object)
1360 vm_object_unlock(object);
1361 }
1362
1363 /*
1364 * Routine: vm_object_pmap_protect
1365 *
1366 * Purpose:
1367 * Reduces the permission for all physical
1368 * pages in the specified object range.
1369 *
1370 * If removing write permission only, it is
1371 * sufficient to protect only the pages in
1372 * the top-level object; only those pages may
1373 * have write permission.
1374 *
1375 * If removing all access, we must follow the
1376 * shadow chain from the top-level object to
1377 * remove access to all pages in shadowed objects.
1378 *
1379 * The object must *not* be locked. The object must
1380 * be temporary/internal.
1381 *
1382 * If pmap is not NULL, this routine assumes that
1383 * the only mappings for the pages are in that
1384 * pmap.
1385 */
1386
1387 __private_extern__ void
1388 vm_object_pmap_protect(
1389 register vm_object_t object,
1390 register vm_object_offset_t offset,
1391 vm_object_size_t size,
1392 pmap_t pmap,
1393 vm_map_offset_t pmap_start,
1394 vm_prot_t prot)
1395 {
1396 if (object == VM_OBJECT_NULL)
1397 return;
1398 size = vm_object_round_page(size);
1399 offset = vm_object_trunc_page(offset);
1400
1401 vm_object_lock(object);
1402
1403 assert(object->internal);
1404
1405 while (TRUE) {
1406 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1407 vm_object_unlock(object);
1408 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1409 return;
1410 }
1411
1412 /* if we are doing large ranges with respect to resident */
1413 /* page count then we should interate over pages otherwise */
1414 /* inverse page look-up will be faster */
1415 if (ptoa_64(object->resident_page_count / 4) < size) {
1416 vm_page_t p;
1417 vm_object_offset_t end;
1418
1419 end = offset + size;
1420
1421 if (pmap != PMAP_NULL) {
1422 queue_iterate(&object->memq, p, vm_page_t, listq) {
1423 if (!p->fictitious &&
1424 (offset <= p->offset) && (p->offset < end)) {
1425 vm_map_offset_t start;
1426
1427 start = pmap_start + p->offset - offset;
1428 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1429 }
1430 }
1431 } else {
1432 queue_iterate(&object->memq, p, vm_page_t, listq) {
1433 if (!p->fictitious &&
1434 (offset <= p->offset) && (p->offset < end)) {
1435
1436 pmap_page_protect(p->phys_page,
1437 prot & ~p->page_lock);
1438 }
1439 }
1440 }
1441 } else {
1442 vm_page_t p;
1443 vm_object_offset_t end;
1444 vm_object_offset_t target_off;
1445
1446 end = offset + size;
1447
1448 if (pmap != PMAP_NULL) {
1449 for(target_off = offset;
1450 target_off < end;
1451 target_off += PAGE_SIZE) {
1452 p = vm_page_lookup(object, target_off);
1453 if (p != VM_PAGE_NULL) {
1454 vm_offset_t start;
1455 start = pmap_start +
1456 (vm_offset_t)(p->offset - offset);
1457 pmap_protect(pmap, start,
1458 start + PAGE_SIZE, prot);
1459 }
1460 }
1461 } else {
1462 for(target_off = offset;
1463 target_off < end; target_off += PAGE_SIZE) {
1464 p = vm_page_lookup(object, target_off);
1465 if (p != VM_PAGE_NULL) {
1466 pmap_page_protect(p->phys_page,
1467 prot & ~p->page_lock);
1468 }
1469 }
1470 }
1471 }
1472
1473 if (prot == VM_PROT_NONE) {
1474 /*
1475 * Must follow shadow chain to remove access
1476 * to pages in shadowed objects.
1477 */
1478 register vm_object_t next_object;
1479
1480 next_object = object->shadow;
1481 if (next_object != VM_OBJECT_NULL) {
1482 offset += object->shadow_offset;
1483 vm_object_lock(next_object);
1484 vm_object_unlock(object);
1485 object = next_object;
1486 }
1487 else {
1488 /*
1489 * End of chain - we are done.
1490 */
1491 break;
1492 }
1493 }
1494 else {
1495 /*
1496 * Pages in shadowed objects may never have
1497 * write permission - we may stop here.
1498 */
1499 break;
1500 }
1501 }
1502
1503 vm_object_unlock(object);
1504 }
1505
1506 /*
1507 * Routine: vm_object_copy_slowly
1508 *
1509 * Description:
1510 * Copy the specified range of the source
1511 * virtual memory object without using
1512 * protection-based optimizations (such
1513 * as copy-on-write). The pages in the
1514 * region are actually copied.
1515 *
1516 * In/out conditions:
1517 * The caller must hold a reference and a lock
1518 * for the source virtual memory object. The source
1519 * object will be returned *unlocked*.
1520 *
1521 * Results:
1522 * If the copy is completed successfully, KERN_SUCCESS is
1523 * returned. If the caller asserted the interruptible
1524 * argument, and an interruption occurred while waiting
1525 * for a user-generated event, MACH_SEND_INTERRUPTED is
1526 * returned. Other values may be returned to indicate
1527 * hard errors during the copy operation.
1528 *
1529 * A new virtual memory object is returned in a
1530 * parameter (_result_object). The contents of this
1531 * new object, starting at a zero offset, are a copy
1532 * of the source memory region. In the event of
1533 * an error, this parameter will contain the value
1534 * VM_OBJECT_NULL.
1535 */
1536 __private_extern__ kern_return_t
1537 vm_object_copy_slowly(
1538 register vm_object_t src_object,
1539 vm_object_offset_t src_offset,
1540 vm_object_size_t size,
1541 boolean_t interruptible,
1542 vm_object_t *_result_object) /* OUT */
1543 {
1544 vm_object_t new_object;
1545 vm_object_offset_t new_offset;
1546
1547 vm_object_offset_t src_lo_offset = src_offset;
1548 vm_object_offset_t src_hi_offset = src_offset + size;
1549
1550 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1551 src_object, src_offset, size, 0, 0);
1552
1553 if (size == 0) {
1554 vm_object_unlock(src_object);
1555 *_result_object = VM_OBJECT_NULL;
1556 return(KERN_INVALID_ARGUMENT);
1557 }
1558
1559 /*
1560 * Prevent destruction of the source object while we copy.
1561 */
1562
1563 assert(src_object->ref_count > 0);
1564 src_object->ref_count++;
1565 VM_OBJ_RES_INCR(src_object);
1566 vm_object_unlock(src_object);
1567
1568 /*
1569 * Create a new object to hold the copied pages.
1570 * A few notes:
1571 * We fill the new object starting at offset 0,
1572 * regardless of the input offset.
1573 * We don't bother to lock the new object within
1574 * this routine, since we have the only reference.
1575 */
1576
1577 new_object = vm_object_allocate(size);
1578 new_offset = 0;
1579 vm_object_lock(new_object);
1580
1581 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1582
1583 for ( ;
1584 size != 0 ;
1585 src_offset += PAGE_SIZE_64,
1586 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1587 ) {
1588 vm_page_t new_page;
1589 vm_fault_return_t result;
1590
1591 while ((new_page = vm_page_alloc(new_object, new_offset))
1592 == VM_PAGE_NULL) {
1593 if (!vm_page_wait(interruptible)) {
1594 vm_object_unlock(new_object);
1595 vm_object_deallocate(new_object);
1596 vm_object_deallocate(src_object);
1597 *_result_object = VM_OBJECT_NULL;
1598 return(MACH_SEND_INTERRUPTED);
1599 }
1600 }
1601
1602 do {
1603 vm_prot_t prot = VM_PROT_READ;
1604 vm_page_t _result_page;
1605 vm_page_t top_page;
1606 register
1607 vm_page_t result_page;
1608 kern_return_t error_code;
1609
1610 vm_object_lock(src_object);
1611 vm_object_paging_begin(src_object);
1612
1613 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1614 result = vm_fault_page(src_object, src_offset,
1615 VM_PROT_READ, FALSE, interruptible,
1616 src_lo_offset, src_hi_offset,
1617 VM_BEHAVIOR_SEQUENTIAL,
1618 &prot, &_result_page, &top_page,
1619 (int *)0,
1620 &error_code, FALSE, FALSE, NULL, 0);
1621
1622 switch(result) {
1623 case VM_FAULT_SUCCESS:
1624 result_page = _result_page;
1625
1626 /*
1627 * We don't need to hold the object
1628 * lock -- the busy page will be enough.
1629 * [We don't care about picking up any
1630 * new modifications.]
1631 *
1632 * Copy the page to the new object.
1633 *
1634 * POLICY DECISION:
1635 * If result_page is clean,
1636 * we could steal it instead
1637 * of copying.
1638 */
1639
1640 vm_object_unlock(result_page->object);
1641 vm_page_copy(result_page, new_page);
1642
1643 /*
1644 * Let go of both pages (make them
1645 * not busy, perform wakeup, activate).
1646 */
1647
1648 new_page->busy = FALSE;
1649 new_page->dirty = TRUE;
1650 vm_object_lock(result_page->object);
1651 PAGE_WAKEUP_DONE(result_page);
1652
1653 vm_page_lock_queues();
1654 if (!result_page->active &&
1655 !result_page->inactive)
1656 vm_page_activate(result_page);
1657 vm_page_activate(new_page);
1658 vm_page_unlock_queues();
1659
1660 /*
1661 * Release paging references and
1662 * top-level placeholder page, if any.
1663 */
1664
1665 vm_fault_cleanup(result_page->object,
1666 top_page);
1667
1668 break;
1669
1670 case VM_FAULT_RETRY:
1671 break;
1672
1673 case VM_FAULT_FICTITIOUS_SHORTAGE:
1674 vm_page_more_fictitious();
1675 break;
1676
1677 case VM_FAULT_MEMORY_SHORTAGE:
1678 if (vm_page_wait(interruptible))
1679 break;
1680 /* fall thru */
1681
1682 case VM_FAULT_INTERRUPTED:
1683 vm_page_free(new_page);
1684 vm_object_unlock(new_object);
1685 vm_object_deallocate(new_object);
1686 vm_object_deallocate(src_object);
1687 *_result_object = VM_OBJECT_NULL;
1688 return(MACH_SEND_INTERRUPTED);
1689
1690 case VM_FAULT_MEMORY_ERROR:
1691 /*
1692 * A policy choice:
1693 * (a) ignore pages that we can't
1694 * copy
1695 * (b) return the null object if
1696 * any page fails [chosen]
1697 */
1698
1699 vm_page_lock_queues();
1700 vm_page_free(new_page);
1701 vm_page_unlock_queues();
1702 vm_object_unlock(new_object);
1703 vm_object_deallocate(new_object);
1704 vm_object_deallocate(src_object);
1705 *_result_object = VM_OBJECT_NULL;
1706 return(error_code ? error_code:
1707 KERN_MEMORY_ERROR);
1708 }
1709 } while (result != VM_FAULT_SUCCESS);
1710 }
1711
1712 /*
1713 * Lose the extra reference, and return our object.
1714 */
1715
1716 vm_object_unlock(new_object);
1717 vm_object_deallocate(src_object);
1718 *_result_object = new_object;
1719 return(KERN_SUCCESS);
1720 }
1721
1722 /*
1723 * Routine: vm_object_copy_quickly
1724 *
1725 * Purpose:
1726 * Copy the specified range of the source virtual
1727 * memory object, if it can be done without waiting
1728 * for user-generated events.
1729 *
1730 * Results:
1731 * If the copy is successful, the copy is returned in
1732 * the arguments; otherwise, the arguments are not
1733 * affected.
1734 *
1735 * In/out conditions:
1736 * The object should be unlocked on entry and exit.
1737 */
1738
1739 /*ARGSUSED*/
1740 __private_extern__ boolean_t
1741 vm_object_copy_quickly(
1742 vm_object_t *_object, /* INOUT */
1743 __unused vm_object_offset_t offset, /* IN */
1744 __unused vm_object_size_t size, /* IN */
1745 boolean_t *_src_needs_copy, /* OUT */
1746 boolean_t *_dst_needs_copy) /* OUT */
1747 {
1748 vm_object_t object = *_object;
1749 memory_object_copy_strategy_t copy_strategy;
1750
1751 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1752 *_object, offset, size, 0, 0);
1753 if (object == VM_OBJECT_NULL) {
1754 *_src_needs_copy = FALSE;
1755 *_dst_needs_copy = FALSE;
1756 return(TRUE);
1757 }
1758
1759 vm_object_lock(object);
1760
1761 copy_strategy = object->copy_strategy;
1762
1763 switch (copy_strategy) {
1764 case MEMORY_OBJECT_COPY_SYMMETRIC:
1765
1766 /*
1767 * Symmetric copy strategy.
1768 * Make another reference to the object.
1769 * Leave object/offset unchanged.
1770 */
1771
1772 assert(object->ref_count > 0);
1773 object->ref_count++;
1774 vm_object_res_reference(object);
1775 object->shadowed = TRUE;
1776 vm_object_unlock(object);
1777
1778 /*
1779 * Both source and destination must make
1780 * shadows, and the source must be made
1781 * read-only if not already.
1782 */
1783
1784 *_src_needs_copy = TRUE;
1785 *_dst_needs_copy = TRUE;
1786
1787 break;
1788
1789 case MEMORY_OBJECT_COPY_DELAY:
1790 vm_object_unlock(object);
1791 return(FALSE);
1792
1793 default:
1794 vm_object_unlock(object);
1795 return(FALSE);
1796 }
1797 return(TRUE);
1798 }
1799
1800 static int copy_call_count = 0;
1801 static int copy_call_sleep_count = 0;
1802 static int copy_call_restart_count = 0;
1803
1804 /*
1805 * Routine: vm_object_copy_call [internal]
1806 *
1807 * Description:
1808 * Copy the source object (src_object), using the
1809 * user-managed copy algorithm.
1810 *
1811 * In/out conditions:
1812 * The source object must be locked on entry. It
1813 * will be *unlocked* on exit.
1814 *
1815 * Results:
1816 * If the copy is successful, KERN_SUCCESS is returned.
1817 * A new object that represents the copied virtual
1818 * memory is returned in a parameter (*_result_object).
1819 * If the return value indicates an error, this parameter
1820 * is not valid.
1821 */
1822 static kern_return_t
1823 vm_object_copy_call(
1824 vm_object_t src_object,
1825 vm_object_offset_t src_offset,
1826 vm_object_size_t size,
1827 vm_object_t *_result_object) /* OUT */
1828 {
1829 kern_return_t kr;
1830 vm_object_t copy;
1831 boolean_t check_ready = FALSE;
1832
1833 /*
1834 * If a copy is already in progress, wait and retry.
1835 *
1836 * XXX
1837 * Consider making this call interruptable, as Mike
1838 * intended it to be.
1839 *
1840 * XXXO
1841 * Need a counter or version or something to allow
1842 * us to use the copy that the currently requesting
1843 * thread is obtaining -- is it worth adding to the
1844 * vm object structure? Depends how common this case it.
1845 */
1846 copy_call_count++;
1847 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1848 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1849 THREAD_UNINT);
1850 copy_call_restart_count++;
1851 }
1852
1853 /*
1854 * Indicate (for the benefit of memory_object_create_copy)
1855 * that we want a copy for src_object. (Note that we cannot
1856 * do a real assert_wait before calling memory_object_copy,
1857 * so we simply set the flag.)
1858 */
1859
1860 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1861 vm_object_unlock(src_object);
1862
1863 /*
1864 * Ask the memory manager to give us a memory object
1865 * which represents a copy of the src object.
1866 * The memory manager may give us a memory object
1867 * which we already have, or it may give us a
1868 * new memory object. This memory object will arrive
1869 * via memory_object_create_copy.
1870 */
1871
1872 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1873 if (kr != KERN_SUCCESS) {
1874 return kr;
1875 }
1876
1877 /*
1878 * Wait for the copy to arrive.
1879 */
1880 vm_object_lock(src_object);
1881 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1882 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1883 THREAD_UNINT);
1884 copy_call_sleep_count++;
1885 }
1886 Retry:
1887 assert(src_object->copy != VM_OBJECT_NULL);
1888 copy = src_object->copy;
1889 if (!vm_object_lock_try(copy)) {
1890 vm_object_unlock(src_object);
1891 mutex_pause(); /* wait a bit */
1892 vm_object_lock(src_object);
1893 goto Retry;
1894 }
1895 if (copy->size < src_offset+size)
1896 copy->size = src_offset+size;
1897
1898 if (!copy->pager_ready)
1899 check_ready = TRUE;
1900
1901 /*
1902 * Return the copy.
1903 */
1904 *_result_object = copy;
1905 vm_object_unlock(copy);
1906 vm_object_unlock(src_object);
1907
1908 /* Wait for the copy to be ready. */
1909 if (check_ready == TRUE) {
1910 vm_object_lock(copy);
1911 while (!copy->pager_ready) {
1912 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1913 }
1914 vm_object_unlock(copy);
1915 }
1916
1917 return KERN_SUCCESS;
1918 }
1919
1920 static int copy_delayed_lock_collisions = 0;
1921 static int copy_delayed_max_collisions = 0;
1922 static int copy_delayed_lock_contention = 0;
1923 static int copy_delayed_protect_iterate = 0;
1924
1925 /*
1926 * Routine: vm_object_copy_delayed [internal]
1927 *
1928 * Description:
1929 * Copy the specified virtual memory object, using
1930 * the asymmetric copy-on-write algorithm.
1931 *
1932 * In/out conditions:
1933 * The src_object must be locked on entry. It will be unlocked
1934 * on exit - so the caller must also hold a reference to it.
1935 *
1936 * This routine will not block waiting for user-generated
1937 * events. It is not interruptible.
1938 */
1939 __private_extern__ vm_object_t
1940 vm_object_copy_delayed(
1941 vm_object_t src_object,
1942 vm_object_offset_t src_offset,
1943 vm_object_size_t size)
1944 {
1945 vm_object_t new_copy = VM_OBJECT_NULL;
1946 vm_object_t old_copy;
1947 vm_page_t p;
1948 vm_object_size_t copy_size = src_offset + size;
1949
1950 int collisions = 0;
1951 /*
1952 * The user-level memory manager wants to see all of the changes
1953 * to this object, but it has promised not to make any changes on
1954 * its own.
1955 *
1956 * Perform an asymmetric copy-on-write, as follows:
1957 * Create a new object, called a "copy object" to hold
1958 * pages modified by the new mapping (i.e., the copy,
1959 * not the original mapping).
1960 * Record the original object as the backing object for
1961 * the copy object. If the original mapping does not
1962 * change a page, it may be used read-only by the copy.
1963 * Record the copy object in the original object.
1964 * When the original mapping causes a page to be modified,
1965 * it must be copied to a new page that is "pushed" to
1966 * the copy object.
1967 * Mark the new mapping (the copy object) copy-on-write.
1968 * This makes the copy object itself read-only, allowing
1969 * it to be reused if the original mapping makes no
1970 * changes, and simplifying the synchronization required
1971 * in the "push" operation described above.
1972 *
1973 * The copy-on-write is said to be assymetric because the original
1974 * object is *not* marked copy-on-write. A copied page is pushed
1975 * to the copy object, regardless which party attempted to modify
1976 * the page.
1977 *
1978 * Repeated asymmetric copy operations may be done. If the
1979 * original object has not been changed since the last copy, its
1980 * copy object can be reused. Otherwise, a new copy object can be
1981 * inserted between the original object and its previous copy
1982 * object. Since any copy object is read-only, this cannot affect
1983 * affect the contents of the previous copy object.
1984 *
1985 * Note that a copy object is higher in the object tree than the
1986 * original object; therefore, use of the copy object recorded in
1987 * the original object must be done carefully, to avoid deadlock.
1988 */
1989
1990 Retry:
1991
1992 /*
1993 * Wait for paging in progress.
1994 */
1995 if (!src_object->true_share)
1996 vm_object_paging_wait(src_object, THREAD_UNINT);
1997
1998 /*
1999 * See whether we can reuse the result of a previous
2000 * copy operation.
2001 */
2002
2003 old_copy = src_object->copy;
2004 if (old_copy != VM_OBJECT_NULL) {
2005 /*
2006 * Try to get the locks (out of order)
2007 */
2008 if (!vm_object_lock_try(old_copy)) {
2009 vm_object_unlock(src_object);
2010 mutex_pause();
2011
2012 /* Heisenberg Rules */
2013 copy_delayed_lock_collisions++;
2014 if (collisions++ == 0)
2015 copy_delayed_lock_contention++;
2016
2017 if (collisions > copy_delayed_max_collisions)
2018 copy_delayed_max_collisions = collisions;
2019
2020 vm_object_lock(src_object);
2021 goto Retry;
2022 }
2023
2024 /*
2025 * Determine whether the old copy object has
2026 * been modified.
2027 */
2028
2029 if (old_copy->resident_page_count == 0 &&
2030 !old_copy->pager_created) {
2031 /*
2032 * It has not been modified.
2033 *
2034 * Return another reference to
2035 * the existing copy-object if
2036 * we can safely grow it (if
2037 * needed).
2038 */
2039
2040 if (old_copy->size < copy_size) {
2041 /*
2042 * We can't perform a delayed copy if any of the
2043 * pages in the extended range are wired (because
2044 * we can't safely take write permission away from
2045 * wired pages). If the pages aren't wired, then
2046 * go ahead and protect them.
2047 */
2048 copy_delayed_protect_iterate++;
2049 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2050 if (!p->fictitious &&
2051 p->offset >= old_copy->size &&
2052 p->offset < copy_size) {
2053 if (p->wire_count > 0) {
2054 vm_object_unlock(old_copy);
2055 vm_object_unlock(src_object);
2056
2057 if (new_copy != VM_OBJECT_NULL) {
2058 vm_object_unlock(new_copy);
2059 vm_object_deallocate(new_copy);
2060 }
2061
2062 return VM_OBJECT_NULL;
2063 } else {
2064 pmap_page_protect(p->phys_page,
2065 (VM_PROT_ALL & ~VM_PROT_WRITE &
2066 ~p->page_lock));
2067 }
2068 }
2069 }
2070 old_copy->size = copy_size;
2071 }
2072
2073 vm_object_reference_locked(old_copy);
2074 vm_object_unlock(old_copy);
2075 vm_object_unlock(src_object);
2076
2077 if (new_copy != VM_OBJECT_NULL) {
2078 vm_object_unlock(new_copy);
2079 vm_object_deallocate(new_copy);
2080 }
2081
2082 return(old_copy);
2083 }
2084
2085 /*
2086 * Adjust the size argument so that the newly-created
2087 * copy object will be large enough to back either the
2088 * old copy object or the new mapping.
2089 */
2090 if (old_copy->size > copy_size)
2091 copy_size = old_copy->size;
2092
2093 if (new_copy == VM_OBJECT_NULL) {
2094 vm_object_unlock(old_copy);
2095 vm_object_unlock(src_object);
2096 new_copy = vm_object_allocate(copy_size);
2097 vm_object_lock(src_object);
2098 vm_object_lock(new_copy);
2099 goto Retry;
2100 }
2101 new_copy->size = copy_size;
2102
2103 /*
2104 * The copy-object is always made large enough to
2105 * completely shadow the original object, since
2106 * it may have several users who want to shadow
2107 * the original object at different points.
2108 */
2109
2110 assert((old_copy->shadow == src_object) &&
2111 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2112
2113 } else if (new_copy == VM_OBJECT_NULL) {
2114 vm_object_unlock(src_object);
2115 new_copy = vm_object_allocate(copy_size);
2116 vm_object_lock(src_object);
2117 vm_object_lock(new_copy);
2118 goto Retry;
2119 }
2120
2121 /*
2122 * We now have the src object locked, and the new copy object
2123 * allocated and locked (and potentially the old copy locked).
2124 * Before we go any further, make sure we can still perform
2125 * a delayed copy, as the situation may have changed.
2126 *
2127 * Specifically, we can't perform a delayed copy if any of the
2128 * pages in the range are wired (because we can't safely take
2129 * write permission away from wired pages). If the pages aren't
2130 * wired, then go ahead and protect them.
2131 */
2132 copy_delayed_protect_iterate++;
2133 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2134 if (!p->fictitious && p->offset < copy_size) {
2135 if (p->wire_count > 0) {
2136 if (old_copy)
2137 vm_object_unlock(old_copy);
2138 vm_object_unlock(src_object);
2139 vm_object_unlock(new_copy);
2140 vm_object_deallocate(new_copy);
2141 return VM_OBJECT_NULL;
2142 } else {
2143 pmap_page_protect(p->phys_page,
2144 (VM_PROT_ALL & ~VM_PROT_WRITE &
2145 ~p->page_lock));
2146 }
2147 }
2148 }
2149
2150 if (old_copy != VM_OBJECT_NULL) {
2151 /*
2152 * Make the old copy-object shadow the new one.
2153 * It will receive no more pages from the original
2154 * object.
2155 */
2156
2157 src_object->ref_count--; /* remove ref. from old_copy */
2158 assert(src_object->ref_count > 0);
2159 old_copy->shadow = new_copy;
2160 assert(new_copy->ref_count > 0);
2161 new_copy->ref_count++; /* for old_copy->shadow ref. */
2162
2163 #if TASK_SWAPPER
2164 if (old_copy->res_count) {
2165 VM_OBJ_RES_INCR(new_copy);
2166 VM_OBJ_RES_DECR(src_object);
2167 }
2168 #endif
2169
2170 vm_object_unlock(old_copy); /* done with old_copy */
2171 }
2172
2173 /*
2174 * Point the new copy at the existing object.
2175 */
2176 new_copy->shadow = src_object;
2177 new_copy->shadow_offset = 0;
2178 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2179 assert(src_object->ref_count > 0);
2180 src_object->ref_count++;
2181 VM_OBJ_RES_INCR(src_object);
2182 src_object->copy = new_copy;
2183 vm_object_unlock(src_object);
2184 vm_object_unlock(new_copy);
2185
2186 XPR(XPR_VM_OBJECT,
2187 "vm_object_copy_delayed: used copy object %X for source %X\n",
2188 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2189
2190 return(new_copy);
2191 }
2192
2193 /*
2194 * Routine: vm_object_copy_strategically
2195 *
2196 * Purpose:
2197 * Perform a copy according to the source object's
2198 * declared strategy. This operation may block,
2199 * and may be interrupted.
2200 */
2201 __private_extern__ kern_return_t
2202 vm_object_copy_strategically(
2203 register vm_object_t src_object,
2204 vm_object_offset_t src_offset,
2205 vm_object_size_t size,
2206 vm_object_t *dst_object, /* OUT */
2207 vm_object_offset_t *dst_offset, /* OUT */
2208 boolean_t *dst_needs_copy) /* OUT */
2209 {
2210 boolean_t result;
2211 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2212 memory_object_copy_strategy_t copy_strategy;
2213
2214 assert(src_object != VM_OBJECT_NULL);
2215
2216 vm_object_lock(src_object);
2217
2218 /*
2219 * The copy strategy is only valid if the memory manager
2220 * is "ready". Internal objects are always ready.
2221 */
2222
2223 while (!src_object->internal && !src_object->pager_ready) {
2224 wait_result_t wait_result;
2225
2226 wait_result = vm_object_sleep( src_object,
2227 VM_OBJECT_EVENT_PAGER_READY,
2228 interruptible);
2229 if (wait_result != THREAD_AWAKENED) {
2230 vm_object_unlock(src_object);
2231 *dst_object = VM_OBJECT_NULL;
2232 *dst_offset = 0;
2233 *dst_needs_copy = FALSE;
2234 return(MACH_SEND_INTERRUPTED);
2235 }
2236 }
2237
2238 copy_strategy = src_object->copy_strategy;
2239
2240 /*
2241 * Use the appropriate copy strategy.
2242 */
2243
2244 switch (copy_strategy) {
2245 case MEMORY_OBJECT_COPY_DELAY:
2246 *dst_object = vm_object_copy_delayed(src_object,
2247 src_offset, size);
2248 if (*dst_object != VM_OBJECT_NULL) {
2249 *dst_offset = src_offset;
2250 *dst_needs_copy = TRUE;
2251 result = KERN_SUCCESS;
2252 break;
2253 }
2254 vm_object_lock(src_object);
2255 /* fall thru when delayed copy not allowed */
2256
2257 case MEMORY_OBJECT_COPY_NONE:
2258 result = vm_object_copy_slowly(src_object, src_offset, size,
2259 interruptible, dst_object);
2260 if (result == KERN_SUCCESS) {
2261 *dst_offset = 0;
2262 *dst_needs_copy = FALSE;
2263 }
2264 break;
2265
2266 case MEMORY_OBJECT_COPY_CALL:
2267 result = vm_object_copy_call(src_object, src_offset, size,
2268 dst_object);
2269 if (result == KERN_SUCCESS) {
2270 *dst_offset = src_offset;
2271 *dst_needs_copy = TRUE;
2272 }
2273 break;
2274
2275 case MEMORY_OBJECT_COPY_SYMMETRIC:
2276 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2277 vm_object_unlock(src_object);
2278 result = KERN_MEMORY_RESTART_COPY;
2279 break;
2280
2281 default:
2282 panic("copy_strategically: bad strategy");
2283 result = KERN_INVALID_ARGUMENT;
2284 }
2285 return(result);
2286 }
2287
2288 /*
2289 * vm_object_shadow:
2290 *
2291 * Create a new object which is backed by the
2292 * specified existing object range. The source
2293 * object reference is deallocated.
2294 *
2295 * The new object and offset into that object
2296 * are returned in the source parameters.
2297 */
2298 boolean_t vm_object_shadow_check = FALSE;
2299
2300 __private_extern__ boolean_t
2301 vm_object_shadow(
2302 vm_object_t *object, /* IN/OUT */
2303 vm_object_offset_t *offset, /* IN/OUT */
2304 vm_object_size_t length)
2305 {
2306 register vm_object_t source;
2307 register vm_object_t result;
2308
2309 source = *object;
2310 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2311
2312 /*
2313 * Determine if we really need a shadow.
2314 */
2315
2316 if (vm_object_shadow_check && source->ref_count == 1 &&
2317 (source->shadow == VM_OBJECT_NULL ||
2318 source->shadow->copy == VM_OBJECT_NULL))
2319 {
2320 source->shadowed = FALSE;
2321 return FALSE;
2322 }
2323
2324 /*
2325 * Allocate a new object with the given length
2326 */
2327
2328 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2329 panic("vm_object_shadow: no object for shadowing");
2330
2331 /*
2332 * The new object shadows the source object, adding
2333 * a reference to it. Our caller changes his reference
2334 * to point to the new object, removing a reference to
2335 * the source object. Net result: no change of reference
2336 * count.
2337 */
2338 result->shadow = source;
2339
2340 /*
2341 * Store the offset into the source object,
2342 * and fix up the offset into the new object.
2343 */
2344
2345 result->shadow_offset = *offset;
2346
2347 /*
2348 * Return the new things
2349 */
2350
2351 *offset = 0;
2352 *object = result;
2353 return TRUE;
2354 }
2355
2356 /*
2357 * The relationship between vm_object structures and
2358 * the memory_object requires careful synchronization.
2359 *
2360 * All associations are created by memory_object_create_named
2361 * for external pagers and vm_object_pager_create for internal
2362 * objects as follows:
2363 *
2364 * pager: the memory_object itself, supplied by
2365 * the user requesting a mapping (or the kernel,
2366 * when initializing internal objects); the
2367 * kernel simulates holding send rights by keeping
2368 * a port reference;
2369 *
2370 * pager_request:
2371 * the memory object control port,
2372 * created by the kernel; the kernel holds
2373 * receive (and ownership) rights to this
2374 * port, but no other references.
2375 *
2376 * When initialization is complete, the "initialized" field
2377 * is asserted. Other mappings using a particular memory object,
2378 * and any references to the vm_object gained through the
2379 * port association must wait for this initialization to occur.
2380 *
2381 * In order to allow the memory manager to set attributes before
2382 * requests (notably virtual copy operations, but also data or
2383 * unlock requests) are made, a "ready" attribute is made available.
2384 * Only the memory manager may affect the value of this attribute.
2385 * Its value does not affect critical kernel functions, such as
2386 * internal object initialization or destruction. [Furthermore,
2387 * memory objects created by the kernel are assumed to be ready
2388 * immediately; the default memory manager need not explicitly
2389 * set the "ready" attribute.]
2390 *
2391 * [Both the "initialized" and "ready" attribute wait conditions
2392 * use the "pager" field as the wait event.]
2393 *
2394 * The port associations can be broken down by any of the
2395 * following routines:
2396 * vm_object_terminate:
2397 * No references to the vm_object remain, and
2398 * the object cannot (or will not) be cached.
2399 * This is the normal case, and is done even
2400 * though one of the other cases has already been
2401 * done.
2402 * memory_object_destroy:
2403 * The memory manager has requested that the
2404 * kernel relinquish references to the memory
2405 * object. [The memory manager may not want to
2406 * destroy the memory object, but may wish to
2407 * refuse or tear down existing memory mappings.]
2408 *
2409 * Each routine that breaks an association must break all of
2410 * them at once. At some later time, that routine must clear
2411 * the pager field and release the memory object references.
2412 * [Furthermore, each routine must cope with the simultaneous
2413 * or previous operations of the others.]
2414 *
2415 * In addition to the lock on the object, the vm_object_cache_lock
2416 * governs the associations. References gained through the
2417 * association require use of the cache lock.
2418 *
2419 * Because the pager field may be cleared spontaneously, it
2420 * cannot be used to determine whether a memory object has
2421 * ever been associated with a particular vm_object. [This
2422 * knowledge is important to the shadow object mechanism.]
2423 * For this reason, an additional "created" attribute is
2424 * provided.
2425 *
2426 * During various paging operations, the pager reference found in the
2427 * vm_object must be valid. To prevent this from being released,
2428 * (other than being removed, i.e., made null), routines may use
2429 * the vm_object_paging_begin/end routines [actually, macros].
2430 * The implementation uses the "paging_in_progress" and "wanted" fields.
2431 * [Operations that alter the validity of the pager values include the
2432 * termination routines and vm_object_collapse.]
2433 */
2434
2435 #if 0
2436 static void vm_object_abort_activity(
2437 vm_object_t object);
2438
2439 /*
2440 * Routine: vm_object_abort_activity [internal use only]
2441 * Purpose:
2442 * Abort paging requests pending on this object.
2443 * In/out conditions:
2444 * The object is locked on entry and exit.
2445 */
2446 static void
2447 vm_object_abort_activity(
2448 vm_object_t object)
2449 {
2450 register
2451 vm_page_t p;
2452 vm_page_t next;
2453
2454 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
2455 (integer_t)object, 0, 0, 0, 0);
2456
2457 /*
2458 * Abort all activity that would be waiting
2459 * for a result on this memory object.
2460 *
2461 * We could also choose to destroy all pages
2462 * that we have in memory for this object, but
2463 * we don't.
2464 */
2465
2466 p = (vm_page_t) queue_first(&object->memq);
2467 while (!queue_end(&object->memq, (queue_entry_t) p)) {
2468 next = (vm_page_t) queue_next(&p->listq);
2469
2470 /*
2471 * If it's being paged in, destroy it.
2472 * If an unlock has been requested, start it again.
2473 */
2474
2475 if (p->busy && p->absent) {
2476 VM_PAGE_FREE(p);
2477 }
2478 else {
2479 if (p->unlock_request != VM_PROT_NONE)
2480 p->unlock_request = VM_PROT_NONE;
2481 PAGE_WAKEUP(p);
2482 }
2483
2484 p = next;
2485 }
2486
2487 /*
2488 * Wake up threads waiting for the memory object to
2489 * become ready.
2490 */
2491
2492 object->pager_ready = TRUE;
2493 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2494 }
2495
2496 /*
2497 * Routine: vm_object_pager_dead
2498 *
2499 * Purpose:
2500 * A port is being destroy, and the IPC kobject code
2501 * can't tell if it represents a pager port or not.
2502 * So this function is called each time it sees a port
2503 * die.
2504 * THIS IS HORRIBLY INEFFICIENT. We should only call
2505 * this routine if we had requested a notification on
2506 * the port.
2507 */
2508
2509 __private_extern__ void
2510 vm_object_pager_dead(
2511 ipc_port_t pager)
2512 {
2513 vm_object_t object;
2514 vm_object_hash_entry_t entry;
2515
2516 /*
2517 * Perform essentially the same operations as in vm_object_lookup,
2518 * except that this time we look up based on the memory_object
2519 * port, not the control port.
2520 */
2521 vm_object_cache_lock();
2522 entry = vm_object_hash_lookup(pager, FALSE);
2523 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2524 entry->object == VM_OBJECT_NULL) {
2525 vm_object_cache_unlock();
2526 return;
2527 }
2528
2529 object = entry->object;
2530 entry->object = VM_OBJECT_NULL;
2531
2532 vm_object_lock(object);
2533 if (object->ref_count == 0) {
2534 XPR(XPR_VM_OBJECT_CACHE,
2535 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2536 (integer_t)object,
2537 (integer_t)vm_object_cached_list.next,
2538 (integer_t)vm_object_cached_list.prev, 0,0);
2539
2540 queue_remove(&vm_object_cached_list, object,
2541 vm_object_t, cached_list);
2542 vm_object_cached_count--;
2543 }
2544 object->ref_count++;
2545 vm_object_res_reference(object);
2546
2547 object->can_persist = FALSE;
2548
2549 assert(object->pager == pager);
2550
2551 /*
2552 * Remove the pager association.
2553 *
2554 * Note that the memory_object itself is dead, so
2555 * we don't bother with it.
2556 */
2557
2558 object->pager = MEMORY_OBJECT_NULL;
2559
2560 vm_object_unlock(object);
2561 vm_object_cache_unlock();
2562
2563 vm_object_pager_wakeup(pager);
2564
2565 /*
2566 * Release the pager reference. Note that there's no
2567 * point in trying the memory_object_terminate call
2568 * because the memory_object itself is dead. Also
2569 * release the memory_object_control reference, since
2570 * the pager didn't do that either.
2571 */
2572
2573 memory_object_deallocate(pager);
2574 memory_object_control_deallocate(object->pager_request);
2575
2576
2577 /*
2578 * Restart pending page requests
2579 */
2580 vm_object_lock(object);
2581 vm_object_abort_activity(object);
2582 vm_object_unlock(object);
2583
2584 /*
2585 * Lose the object reference.
2586 */
2587
2588 vm_object_deallocate(object);
2589 }
2590 #endif
2591
2592 /*
2593 * Routine: vm_object_enter
2594 * Purpose:
2595 * Find a VM object corresponding to the given
2596 * pager; if no such object exists, create one,
2597 * and initialize the pager.
2598 */
2599 vm_object_t
2600 vm_object_enter(
2601 memory_object_t pager,
2602 vm_object_size_t size,
2603 boolean_t internal,
2604 boolean_t init,
2605 boolean_t named)
2606 {
2607 register vm_object_t object;
2608 vm_object_t new_object;
2609 boolean_t must_init;
2610 vm_object_hash_entry_t entry, new_entry;
2611
2612 if (pager == MEMORY_OBJECT_NULL)
2613 return(vm_object_allocate(size));
2614
2615 new_object = VM_OBJECT_NULL;
2616 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2617 must_init = init;
2618
2619 /*
2620 * Look for an object associated with this port.
2621 */
2622
2623 vm_object_cache_lock();
2624 do {
2625 entry = vm_object_hash_lookup(pager, FALSE);
2626
2627 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2628 if (new_object == VM_OBJECT_NULL) {
2629 /*
2630 * We must unlock to create a new object;
2631 * if we do so, we must try the lookup again.
2632 */
2633 vm_object_cache_unlock();
2634 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2635 new_entry = vm_object_hash_entry_alloc(pager);
2636 new_object = vm_object_allocate(size);
2637 vm_object_cache_lock();
2638 } else {
2639 /*
2640 * Lookup failed twice, and we have something
2641 * to insert; set the object.
2642 */
2643 vm_object_hash_insert(new_entry);
2644 entry = new_entry;
2645 entry->object = new_object;
2646 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2647 new_object = VM_OBJECT_NULL;
2648 must_init = TRUE;
2649 }
2650 } else if (entry->object == VM_OBJECT_NULL) {
2651 /*
2652 * If a previous object is being terminated,
2653 * we must wait for the termination message
2654 * to be queued (and lookup the entry again).
2655 */
2656 entry->waiting = TRUE;
2657 entry = VM_OBJECT_HASH_ENTRY_NULL;
2658 assert_wait((event_t) pager, THREAD_UNINT);
2659 vm_object_cache_unlock();
2660 thread_block(THREAD_CONTINUE_NULL);
2661 vm_object_cache_lock();
2662 }
2663 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
2664
2665 object = entry->object;
2666 assert(object != VM_OBJECT_NULL);
2667
2668 if (!must_init) {
2669 vm_object_lock(object);
2670 assert(!internal || object->internal);
2671 if (named) {
2672 assert(!object->named);
2673 object->named = TRUE;
2674 }
2675 if (object->ref_count == 0) {
2676 XPR(XPR_VM_OBJECT_CACHE,
2677 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2678 (integer_t)object,
2679 (integer_t)vm_object_cached_list.next,
2680 (integer_t)vm_object_cached_list.prev, 0,0);
2681 queue_remove(&vm_object_cached_list, object,
2682 vm_object_t, cached_list);
2683 vm_object_cached_count--;
2684 }
2685 object->ref_count++;
2686 vm_object_res_reference(object);
2687 vm_object_unlock(object);
2688
2689 VM_STAT(hits++);
2690 }
2691 assert(object->ref_count > 0);
2692
2693 VM_STAT(lookups++);
2694
2695 vm_object_cache_unlock();
2696
2697 XPR(XPR_VM_OBJECT,
2698 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2699 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2700
2701 /*
2702 * If we raced to create a vm_object but lost, let's
2703 * throw away ours.
2704 */
2705
2706 if (new_object != VM_OBJECT_NULL)
2707 vm_object_deallocate(new_object);
2708
2709 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2710 vm_object_hash_entry_free(new_entry);
2711
2712 if (must_init) {
2713 memory_object_control_t control;
2714
2715 /*
2716 * Allocate request port.
2717 */
2718
2719 control = memory_object_control_allocate(object);
2720 assert (control != MEMORY_OBJECT_CONTROL_NULL);
2721
2722 vm_object_lock(object);
2723 assert(object != kernel_object);
2724
2725 /*
2726 * Copy the reference we were given.
2727 */
2728
2729 memory_object_reference(pager);
2730 object->pager_created = TRUE;
2731 object->pager = pager;
2732 object->internal = internal;
2733 object->pager_trusted = internal;
2734 if (!internal) {
2735 /* copy strategy invalid until set by memory manager */
2736 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2737 }
2738 object->pager_control = control;
2739 object->pager_ready = FALSE;
2740
2741 vm_object_unlock(object);
2742
2743 /*
2744 * Let the pager know we're using it.
2745 */
2746
2747 (void) memory_object_init(pager,
2748 object->pager_control,
2749 PAGE_SIZE);
2750
2751 vm_object_lock(object);
2752 if (named)
2753 object->named = TRUE;
2754 if (internal) {
2755 object->pager_ready = TRUE;
2756 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2757 }
2758
2759 object->pager_initialized = TRUE;
2760 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2761 } else {
2762 vm_object_lock(object);
2763 }
2764
2765 /*
2766 * [At this point, the object must be locked]
2767 */
2768
2769 /*
2770 * Wait for the work above to be done by the first
2771 * thread to map this object.
2772 */
2773
2774 while (!object->pager_initialized) {
2775 vm_object_sleep(object,
2776 VM_OBJECT_EVENT_INITIALIZED,
2777 THREAD_UNINT);
2778 }
2779 vm_object_unlock(object);
2780
2781 XPR(XPR_VM_OBJECT,
2782 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2783 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2784 return(object);
2785 }
2786
2787 /*
2788 * Routine: vm_object_pager_create
2789 * Purpose:
2790 * Create a memory object for an internal object.
2791 * In/out conditions:
2792 * The object is locked on entry and exit;
2793 * it may be unlocked within this call.
2794 * Limitations:
2795 * Only one thread may be performing a
2796 * vm_object_pager_create on an object at
2797 * a time. Presumably, only the pageout
2798 * daemon will be using this routine.
2799 */
2800
2801 void
2802 vm_object_pager_create(
2803 register vm_object_t object)
2804 {
2805 memory_object_t pager;
2806 vm_object_hash_entry_t entry;
2807 #if MACH_PAGEMAP
2808 vm_object_size_t size;
2809 vm_external_map_t map;
2810 #endif /* MACH_PAGEMAP */
2811
2812 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2813 (integer_t)object, 0,0,0,0);
2814
2815 assert(object != kernel_object);
2816
2817 if (memory_manager_default_check() != KERN_SUCCESS)
2818 return;
2819
2820 /*
2821 * Prevent collapse or termination by holding a paging reference
2822 */
2823
2824 vm_object_paging_begin(object);
2825 if (object->pager_created) {
2826 /*
2827 * Someone else got to it first...
2828 * wait for them to finish initializing the ports
2829 */
2830 while (!object->pager_initialized) {
2831 vm_object_sleep(object,
2832 VM_OBJECT_EVENT_INITIALIZED,
2833 THREAD_UNINT);
2834 }
2835 vm_object_paging_end(object);
2836 return;
2837 }
2838
2839 /*
2840 * Indicate that a memory object has been assigned
2841 * before dropping the lock, to prevent a race.
2842 */
2843
2844 object->pager_created = TRUE;
2845 object->paging_offset = 0;
2846
2847 #if MACH_PAGEMAP
2848 size = object->size;
2849 #endif /* MACH_PAGEMAP */
2850 vm_object_unlock(object);
2851
2852 #if MACH_PAGEMAP
2853 map = vm_external_create(size);
2854 vm_object_lock(object);
2855 assert(object->size == size);
2856 object->existence_map = map;
2857 vm_object_unlock(object);
2858 #endif /* MACH_PAGEMAP */
2859
2860 /*
2861 * Create the [internal] pager, and associate it with this object.
2862 *
2863 * We make the association here so that vm_object_enter()
2864 * can look up the object to complete initializing it. No
2865 * user will ever map this object.
2866 */
2867 {
2868 memory_object_default_t dmm;
2869 vm_size_t cluster_size;
2870
2871 /* acquire a reference for the default memory manager */
2872 dmm = memory_manager_default_reference(&cluster_size);
2873 assert(cluster_size >= PAGE_SIZE);
2874
2875 object->cluster_size = cluster_size; /* XXX ??? */
2876 assert(object->temporary);
2877
2878 /* create our new memory object */
2879 (void) memory_object_create(dmm, object->size, &pager);
2880
2881 memory_object_default_deallocate(dmm);
2882 }
2883
2884 entry = vm_object_hash_entry_alloc(pager);
2885
2886 vm_object_cache_lock();
2887 vm_object_hash_insert(entry);
2888
2889 entry->object = object;
2890 vm_object_cache_unlock();
2891
2892 /*
2893 * A reference was returned by
2894 * memory_object_create(), and it is
2895 * copied by vm_object_enter().
2896 */
2897
2898 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2899 panic("vm_object_pager_create: mismatch");
2900
2901 /*
2902 * Drop the reference we were passed.
2903 */
2904 memory_object_deallocate(pager);
2905
2906 vm_object_lock(object);
2907
2908 /*
2909 * Release the paging reference
2910 */
2911 vm_object_paging_end(object);
2912 }
2913
2914 /*
2915 * Routine: vm_object_remove
2916 * Purpose:
2917 * Eliminate the pager/object association
2918 * for this pager.
2919 * Conditions:
2920 * The object cache must be locked.
2921 */
2922 __private_extern__ void
2923 vm_object_remove(
2924 vm_object_t object)
2925 {
2926 memory_object_t pager;
2927
2928 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
2929 vm_object_hash_entry_t entry;
2930
2931 entry = vm_object_hash_lookup(pager, FALSE);
2932 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2933 entry->object = VM_OBJECT_NULL;
2934 }
2935
2936 }
2937
2938 /*
2939 * Global variables for vm_object_collapse():
2940 *
2941 * Counts for normal collapses and bypasses.
2942 * Debugging variables, to watch or disable collapse.
2943 */
2944 static long object_collapses = 0;
2945 static long object_bypasses = 0;
2946
2947 static boolean_t vm_object_collapse_allowed = TRUE;
2948 static boolean_t vm_object_bypass_allowed = TRUE;
2949
2950 static int vm_external_discarded;
2951 static int vm_external_collapsed;
2952
2953 unsigned long vm_object_collapse_encrypted = 0;
2954
2955 /*
2956 * Routine: vm_object_do_collapse
2957 * Purpose:
2958 * Collapse an object with the object backing it.
2959 * Pages in the backing object are moved into the
2960 * parent, and the backing object is deallocated.
2961 * Conditions:
2962 * Both objects and the cache are locked; the page
2963 * queues are unlocked.
2964 *
2965 */
2966 static void
2967 vm_object_do_collapse(
2968 vm_object_t object,
2969 vm_object_t backing_object)
2970 {
2971 vm_page_t p, pp;
2972 vm_object_offset_t new_offset, backing_offset;
2973 vm_object_size_t size;
2974
2975 backing_offset = object->shadow_offset;
2976 size = object->size;
2977
2978 /*
2979 * Move all in-memory pages from backing_object
2980 * to the parent. Pages that have been paged out
2981 * will be overwritten by any of the parent's
2982 * pages that shadow them.
2983 */
2984
2985 while (!queue_empty(&backing_object->memq)) {
2986
2987 p = (vm_page_t) queue_first(&backing_object->memq);
2988
2989 new_offset = (p->offset - backing_offset);
2990
2991 assert(!p->busy || p->absent);
2992
2993 /*
2994 * If the parent has a page here, or if
2995 * this page falls outside the parent,
2996 * dispose of it.
2997 *
2998 * Otherwise, move it as planned.
2999 */
3000
3001 if (p->offset < backing_offset || new_offset >= size) {
3002 VM_PAGE_FREE(p);
3003 } else {
3004 /*
3005 * ENCRYPTED SWAP:
3006 * The encryption key includes the "pager" and the
3007 * "paging_offset". These might not be the same in
3008 * the new object, so we can't just move an encrypted
3009 * page from one object to the other. We can't just
3010 * decrypt the page here either, because that would drop
3011 * the object lock.
3012 * The caller should check for encrypted pages before
3013 * attempting to collapse.
3014 */
3015 ASSERT_PAGE_DECRYPTED(p);
3016
3017 pp = vm_page_lookup(object, new_offset);
3018 if (pp == VM_PAGE_NULL) {
3019
3020 /*
3021 * Parent now has no page.
3022 * Move the backing object's page up.
3023 */
3024
3025 vm_page_rename(p, object, new_offset);
3026 #if MACH_PAGEMAP
3027 } else if (pp->absent) {
3028
3029 /*
3030 * Parent has an absent page...
3031 * it's not being paged in, so
3032 * it must really be missing from
3033 * the parent.
3034 *
3035 * Throw out the absent page...
3036 * any faults looking for that
3037 * page will restart with the new
3038 * one.
3039 */
3040
3041 VM_PAGE_FREE(pp);
3042 vm_page_rename(p, object, new_offset);
3043 #endif /* MACH_PAGEMAP */
3044 } else {
3045 assert(! pp->absent);
3046
3047 /*
3048 * Parent object has a real page.
3049 * Throw away the backing object's
3050 * page.
3051 */
3052 VM_PAGE_FREE(p);
3053 }
3054 }
3055 }
3056
3057 #if !MACH_PAGEMAP
3058 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL
3059 || (!backing_object->pager_created
3060 && backing_object->pager == MEMORY_OBJECT_NULL));
3061 #else
3062 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3063 #endif /* !MACH_PAGEMAP */
3064
3065 if (backing_object->pager != MEMORY_OBJECT_NULL) {
3066 vm_object_hash_entry_t entry;
3067
3068 /*
3069 * Move the pager from backing_object to object.
3070 *
3071 * XXX We're only using part of the paging space
3072 * for keeps now... we ought to discard the
3073 * unused portion.
3074 */
3075
3076 assert(!object->paging_in_progress);
3077 object->pager = backing_object->pager;
3078 entry = vm_object_hash_lookup(object->pager, FALSE);
3079 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3080 entry->object = object;
3081 object->pager_created = backing_object->pager_created;
3082 object->pager_control = backing_object->pager_control;
3083 object->pager_ready = backing_object->pager_ready;
3084 object->pager_initialized = backing_object->pager_initialized;
3085 object->cluster_size = backing_object->cluster_size;
3086 object->paging_offset =
3087 backing_object->paging_offset + backing_offset;
3088 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
3089 memory_object_control_collapse(object->pager_control,
3090 object);
3091 }
3092 }
3093
3094 vm_object_cache_unlock();
3095
3096 #if MACH_PAGEMAP
3097 /*
3098 * If the shadow offset is 0, the use the existence map from
3099 * the backing object if there is one. If the shadow offset is
3100 * not zero, toss it.
3101 *
3102 * XXX - If the shadow offset is not 0 then a bit copy is needed
3103 * if the map is to be salvaged. For now, we just just toss the
3104 * old map, giving the collapsed object no map. This means that
3105 * the pager is invoked for zero fill pages. If analysis shows
3106 * that this happens frequently and is a performance hit, then
3107 * this code should be fixed to salvage the map.
3108 */
3109 assert(object->existence_map == VM_EXTERNAL_NULL);
3110 if (backing_offset || (size != backing_object->size)) {
3111 vm_external_discarded++;
3112 vm_external_destroy(backing_object->existence_map,
3113 backing_object->size);
3114 }
3115 else {
3116 vm_external_collapsed++;
3117 object->existence_map = backing_object->existence_map;
3118 }
3119 backing_object->existence_map = VM_EXTERNAL_NULL;
3120 #endif /* MACH_PAGEMAP */
3121
3122 /*
3123 * Object now shadows whatever backing_object did.
3124 * Note that the reference to backing_object->shadow
3125 * moves from within backing_object to within object.
3126 */
3127
3128 assert(!object->phys_contiguous);
3129 assert(!backing_object->phys_contiguous);
3130 object->shadow = backing_object->shadow;
3131 if (object->shadow) {
3132 object->shadow_offset += backing_object->shadow_offset;
3133 } else {
3134 /* no shadow, therefore no shadow offset... */
3135 object->shadow_offset = 0;
3136 }
3137 assert((object->shadow == VM_OBJECT_NULL) ||
3138 (object->shadow->copy != backing_object));
3139
3140 /*
3141 * Discard backing_object.
3142 *
3143 * Since the backing object has no pages, no
3144 * pager left, and no object references within it,
3145 * all that is necessary is to dispose of it.
3146 */
3147
3148 assert((backing_object->ref_count == 1) &&
3149 (backing_object->resident_page_count == 0) &&
3150 (backing_object->paging_in_progress == 0));
3151
3152 backing_object->alive = FALSE;
3153 vm_object_unlock(backing_object);
3154
3155 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3156 (integer_t)backing_object, 0,0,0,0);
3157
3158 zfree(vm_object_zone, backing_object);
3159
3160 object_collapses++;
3161 }
3162
3163 static void
3164 vm_object_do_bypass(
3165 vm_object_t object,
3166 vm_object_t backing_object)
3167 {
3168 /*
3169 * Make the parent shadow the next object
3170 * in the chain.
3171 */
3172
3173 #if TASK_SWAPPER
3174 /*
3175 * Do object reference in-line to
3176 * conditionally increment shadow's
3177 * residence count. If object is not
3178 * resident, leave residence count
3179 * on shadow alone.
3180 */
3181 if (backing_object->shadow != VM_OBJECT_NULL) {
3182 vm_object_lock(backing_object->shadow);
3183 backing_object->shadow->ref_count++;
3184 if (object->res_count != 0)
3185 vm_object_res_reference(backing_object->shadow);
3186 vm_object_unlock(backing_object->shadow);
3187 }
3188 #else /* TASK_SWAPPER */
3189 vm_object_reference(backing_object->shadow);
3190 #endif /* TASK_SWAPPER */
3191
3192 assert(!object->phys_contiguous);
3193 assert(!backing_object->phys_contiguous);
3194 object->shadow = backing_object->shadow;
3195 if (object->shadow) {
3196 object->shadow_offset += backing_object->shadow_offset;
3197 } else {
3198 /* no shadow, therefore no shadow offset... */
3199 object->shadow_offset = 0;
3200 }
3201
3202 /*
3203 * Backing object might have had a copy pointer
3204 * to us. If it did, clear it.
3205 */
3206 if (backing_object->copy == object) {
3207 backing_object->copy = VM_OBJECT_NULL;
3208 }
3209
3210 /*
3211 * Drop the reference count on backing_object.
3212 #if TASK_SWAPPER
3213 * Since its ref_count was at least 2, it
3214 * will not vanish; so we don't need to call
3215 * vm_object_deallocate.
3216 * [FBDP: that doesn't seem to be true any more]
3217 *
3218 * The res_count on the backing object is
3219 * conditionally decremented. It's possible
3220 * (via vm_pageout_scan) to get here with
3221 * a "swapped" object, which has a 0 res_count,
3222 * in which case, the backing object res_count
3223 * is already down by one.
3224 #else
3225 * Don't call vm_object_deallocate unless
3226 * ref_count drops to zero.
3227 *
3228 * The ref_count can drop to zero here if the
3229 * backing object could be bypassed but not
3230 * collapsed, such as when the backing object
3231 * is temporary and cachable.
3232 #endif
3233 */
3234 if (backing_object->ref_count > 1) {
3235 backing_object->ref_count--;
3236 #if TASK_SWAPPER
3237 if (object->res_count != 0)
3238 vm_object_res_deallocate(backing_object);
3239 assert(backing_object->ref_count > 0);
3240 #endif /* TASK_SWAPPER */
3241 vm_object_unlock(backing_object);
3242 } else {
3243
3244 /*
3245 * Drop locks so that we can deallocate
3246 * the backing object.
3247 */
3248
3249 #if TASK_SWAPPER
3250 if (object->res_count == 0) {
3251 /* XXX get a reference for the deallocate below */
3252 vm_object_res_reference(backing_object);
3253 }
3254 #endif /* TASK_SWAPPER */
3255 vm_object_unlock(object);
3256 vm_object_unlock(backing_object);
3257 vm_object_deallocate(backing_object);
3258
3259 /*
3260 * Relock object. We don't have to reverify
3261 * its state since vm_object_collapse will
3262 * do that for us as it starts at the
3263 * top of its loop.
3264 */
3265
3266 vm_object_lock(object);
3267 }
3268
3269 object_bypasses++;
3270 }
3271
3272
3273 /*
3274 * vm_object_collapse:
3275 *
3276 * Perform an object collapse or an object bypass if appropriate.
3277 * The real work of collapsing and bypassing is performed in
3278 * the routines vm_object_do_collapse and vm_object_do_bypass.
3279 *
3280 * Requires that the object be locked and the page queues be unlocked.
3281 *
3282 */
3283 static unsigned long vm_object_collapse_calls = 0;
3284 static unsigned long vm_object_collapse_objects = 0;
3285 static unsigned long vm_object_collapse_do_collapse = 0;
3286 static unsigned long vm_object_collapse_do_bypass = 0;
3287 __private_extern__ void
3288 vm_object_collapse(
3289 register vm_object_t object,
3290 register vm_object_offset_t hint_offset)
3291 {
3292 register vm_object_t backing_object;
3293 register unsigned int rcount;
3294 register unsigned int size;
3295 vm_object_offset_t collapse_min_offset;
3296 vm_object_offset_t collapse_max_offset;
3297 vm_page_t page;
3298 vm_object_t original_object;
3299
3300 vm_object_collapse_calls++;
3301
3302 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3303 return;
3304 }
3305
3306 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3307 (integer_t)object, 0,0,0,0);
3308
3309 if (object == VM_OBJECT_NULL)
3310 return;
3311
3312 original_object = object;
3313
3314 while (TRUE) {
3315 vm_object_collapse_objects++;
3316 /*
3317 * Verify that the conditions are right for either
3318 * collapse or bypass:
3319 */
3320
3321 /*
3322 * There is a backing object, and
3323 */
3324
3325 backing_object = object->shadow;
3326 if (backing_object == VM_OBJECT_NULL) {
3327 if (object != original_object) {
3328 vm_object_unlock(object);
3329 }
3330 return;
3331 }
3332
3333 /*
3334 * No pages in the object are currently
3335 * being paged out, and
3336 */
3337 if (object->paging_in_progress != 0 ||
3338 object->absent_count != 0) {
3339 /* try and collapse the rest of the shadow chain */
3340 vm_object_lock(backing_object);
3341 if (object != original_object) {
3342 vm_object_unlock(object);
3343 }
3344 object = backing_object;
3345 continue;
3346 }
3347
3348 vm_object_lock(backing_object);
3349
3350 /*
3351 * ...
3352 * The backing object is not read_only,
3353 * and no pages in the backing object are
3354 * currently being paged out.
3355 * The backing object is internal.
3356 *
3357 */
3358
3359 if (!backing_object->internal ||
3360 backing_object->paging_in_progress != 0) {
3361 /* try and collapse the rest of the shadow chain */
3362 if (object != original_object) {
3363 vm_object_unlock(object);
3364 }
3365 object = backing_object;
3366 continue;
3367 }
3368
3369 /*
3370 * The backing object can't be a copy-object:
3371 * the shadow_offset for the copy-object must stay
3372 * as 0. Furthermore (for the 'we have all the
3373 * pages' case), if we bypass backing_object and
3374 * just shadow the next object in the chain, old
3375 * pages from that object would then have to be copied
3376 * BOTH into the (former) backing_object and into the
3377 * parent object.
3378 */
3379 if (backing_object->shadow != VM_OBJECT_NULL &&
3380 backing_object->shadow->copy == backing_object) {
3381 /* try and collapse the rest of the shadow chain */
3382 if (object != original_object) {
3383 vm_object_unlock(object);
3384 }
3385 object = backing_object;
3386 continue;
3387 }
3388
3389 /*
3390 * We can now try to either collapse the backing
3391 * object (if the parent is the only reference to
3392 * it) or (perhaps) remove the parent's reference
3393 * to it.
3394 *
3395 * If there is exactly one reference to the backing
3396 * object, we may be able to collapse it into the
3397 * parent.
3398 *
3399 * If MACH_PAGEMAP is defined:
3400 * The parent must not have a pager created for it,
3401 * since collapsing a backing_object dumps new pages
3402 * into the parent that its pager doesn't know about
3403 * (and the collapse code can't merge the existence
3404 * maps).
3405 * Otherwise:
3406 * As long as one of the objects is still not known
3407 * to the pager, we can collapse them.
3408 */
3409 if (backing_object->ref_count == 1 &&
3410 (!object->pager_created
3411 #if !MACH_PAGEMAP
3412 || !backing_object->pager_created
3413 #endif /*!MACH_PAGEMAP */
3414 ) && vm_object_collapse_allowed) {
3415
3416 XPR(XPR_VM_OBJECT,
3417 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3418 (integer_t)backing_object, (integer_t)object,
3419 (integer_t)backing_object->pager,
3420 (integer_t)backing_object->pager_control, 0);
3421
3422 /*
3423 * We need the cache lock for collapsing,
3424 * but we must not deadlock.
3425 */
3426
3427 if (! vm_object_cache_lock_try()) {
3428 if (object != original_object) {
3429 vm_object_unlock(object);
3430 }
3431 vm_object_unlock(backing_object);
3432 return;
3433 }
3434
3435 /*
3436 * ENCRYPTED SWAP
3437 * We can't collapse the object if it contains
3438 * any encypted page, because the encryption key
3439 * includes the <object,offset> info. We can't
3440 * drop the object lock in vm_object_do_collapse()
3441 * so we can't decrypt the page there either.
3442 */
3443 if (vm_pages_encrypted) {
3444 collapse_min_offset = object->shadow_offset;
3445 collapse_max_offset =
3446 object->shadow_offset + object->size;
3447 queue_iterate(&backing_object->memq,
3448 page, vm_page_t, listq) {
3449 if (page->encrypted &&
3450 (page->offset >=
3451 collapse_min_offset) &&
3452 (page->offset <
3453 collapse_max_offset)) {
3454 /*
3455 * We found an encrypted page
3456 * in the backing object,
3457 * within the range covered
3458 * by the parent object: we can
3459 * not collapse them.
3460 */
3461 vm_object_collapse_encrypted++;
3462 vm_object_cache_unlock();
3463 goto try_bypass;
3464 }
3465 }
3466 }
3467
3468 /*
3469 * Collapse the object with its backing
3470 * object, and try again with the object's
3471 * new backing object.
3472 */
3473
3474 vm_object_do_collapse(object, backing_object);
3475 vm_object_collapse_do_collapse++;
3476 continue;
3477 }
3478
3479 try_bypass:
3480 /*
3481 * Collapsing the backing object was not possible
3482 * or permitted, so let's try bypassing it.
3483 */
3484
3485 if (! vm_object_bypass_allowed) {
3486 /* try and collapse the rest of the shadow chain */
3487 if (object != original_object) {
3488 vm_object_unlock(object);
3489 }
3490 object = backing_object;
3491 continue;
3492 }
3493
3494
3495 /*
3496 * If the object doesn't have all its pages present,
3497 * we have to make sure no pages in the backing object
3498 * "show through" before bypassing it.
3499 */
3500 size = atop(object->size);
3501 rcount = object->resident_page_count;
3502 if (rcount != size) {
3503 vm_object_offset_t offset;
3504 vm_object_offset_t backing_offset;
3505 unsigned int backing_rcount;
3506 unsigned int lookups = 0;
3507
3508 /*
3509 * If the backing object has a pager but no pagemap,
3510 * then we cannot bypass it, because we don't know
3511 * what pages it has.
3512 */
3513 if (backing_object->pager_created
3514 #if MACH_PAGEMAP
3515 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3516 #endif /* MACH_PAGEMAP */
3517 ) {
3518 /* try and collapse the rest of the shadow chain */
3519 if (object != original_object) {
3520 vm_object_unlock(object);
3521 }
3522 object = backing_object;
3523 continue;
3524 }
3525
3526 /*
3527 * If the object has a pager but no pagemap,
3528 * then we cannot bypass it, because we don't know
3529 * what pages it has.
3530 */
3531 if (object->pager_created
3532 #if MACH_PAGEMAP
3533 && (object->existence_map == VM_EXTERNAL_NULL)
3534 #endif /* MACH_PAGEMAP */
3535 ) {
3536 /* try and collapse the rest of the shadow chain */
3537 if (object != original_object) {
3538 vm_object_unlock(object);
3539 }
3540 object = backing_object;
3541 continue;
3542 }
3543
3544 /*
3545 * If all of the pages in the backing object are
3546 * shadowed by the parent object, the parent
3547 * object no longer has to shadow the backing
3548 * object; it can shadow the next one in the
3549 * chain.
3550 *
3551 * If the backing object has existence info,
3552 * we must check examine its existence info
3553 * as well.
3554 *
3555 */
3556
3557 backing_offset = object->shadow_offset;
3558 backing_rcount = backing_object->resident_page_count;
3559
3560 #define EXISTS_IN_OBJECT(obj, off, rc) \
3561 (vm_external_state_get((obj)->existence_map, \
3562 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3563 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3564
3565 /*
3566 * Check the hint location first
3567 * (since it is often the quickest way out of here).
3568 */
3569 if (object->cow_hint != ~(vm_offset_t)0)
3570 hint_offset = (vm_object_offset_t)object->cow_hint;
3571 else
3572 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
3573 (hint_offset - 8 * PAGE_SIZE_64) : 0;
3574
3575 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
3576 backing_offset, backing_rcount) &&
3577 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
3578 /* dependency right at the hint */
3579 object->cow_hint = (vm_offset_t)hint_offset;
3580 /* try and collapse the rest of the shadow chain */
3581 if (object != original_object) {
3582 vm_object_unlock(object);
3583 }
3584 object = backing_object;
3585 continue;
3586 }
3587
3588 /*
3589 * If the object's window onto the backing_object
3590 * is large compared to the number of resident
3591 * pages in the backing object, it makes sense to
3592 * walk the backing_object's resident pages first.
3593 *
3594 * NOTE: Pages may be in both the existence map and
3595 * resident. So, we can't permanently decrement
3596 * the rcount here because the second loop may
3597 * find the same pages in the backing object'
3598 * existence map that we found here and we would
3599 * double-decrement the rcount. We also may or
3600 * may not have found the
3601 */
3602 if (backing_rcount && size >
3603 ((backing_object->existence_map) ?
3604 backing_rcount : (backing_rcount >> 1))) {
3605 unsigned int rc = rcount;
3606 vm_page_t p;
3607
3608 backing_rcount = backing_object->resident_page_count;
3609 p = (vm_page_t)queue_first(&backing_object->memq);
3610 do {
3611 /* Until we get more than one lookup lock */
3612 if (lookups > 256) {
3613 lookups = 0;
3614 delay(1);
3615 }
3616
3617 offset = (p->offset - backing_offset);
3618 if (offset < object->size &&
3619 offset != hint_offset &&
3620 !EXISTS_IN_OBJECT(object, offset, rc)) {
3621 /* found a dependency */
3622 object->cow_hint = (vm_offset_t)offset;
3623 break;
3624 }
3625 p = (vm_page_t) queue_next(&p->listq);
3626
3627 } while (--backing_rcount);
3628 if (backing_rcount != 0 ) {
3629 /* try and collapse the rest of the shadow chain */
3630 if (object != original_object) {
3631 vm_object_unlock(object);
3632 }
3633 object = backing_object;
3634 continue;
3635 }
3636 }
3637
3638 /*
3639 * Walk through the offsets looking for pages in the
3640 * backing object that show through to the object.
3641 */
3642 if (backing_rcount || backing_object->existence_map) {
3643 offset = hint_offset;
3644
3645 while((offset =
3646 (offset + PAGE_SIZE_64 < object->size) ?
3647 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
3648
3649 /* Until we get more than one lookup lock */
3650 if (lookups > 256) {
3651 lookups = 0;
3652 delay(1);
3653 }
3654
3655 if (EXISTS_IN_OBJECT(backing_object, offset +
3656 backing_offset, backing_rcount) &&
3657 !EXISTS_IN_OBJECT(object, offset, rcount)) {
3658 /* found a dependency */
3659 object->cow_hint = (vm_offset_t)offset;
3660 break;
3661 }
3662 }
3663 if (offset != hint_offset) {
3664 /* try and collapse the rest of the shadow chain */
3665 if (object != original_object) {
3666 vm_object_unlock(object);
3667 }
3668 object = backing_object;
3669 continue;
3670 }
3671 }
3672 }
3673
3674 /* reset the offset hint for any objects deeper in the chain */
3675 object->cow_hint = (vm_offset_t)0;
3676
3677 /*
3678 * All interesting pages in the backing object
3679 * already live in the parent or its pager.
3680 * Thus we can bypass the backing object.
3681 */
3682
3683 vm_object_do_bypass(object, backing_object);
3684 vm_object_collapse_do_bypass++;
3685
3686 /*
3687 * Try again with this object's new backing object.
3688 */
3689
3690 continue;
3691 }
3692
3693 if (object != original_object) {
3694 vm_object_unlock(object);
3695 }
3696 }
3697
3698 /*
3699 * Routine: vm_object_page_remove: [internal]
3700 * Purpose:
3701 * Removes all physical pages in the specified
3702 * object range from the object's list of pages.
3703 *
3704 * In/out conditions:
3705 * The object must be locked.
3706 * The object must not have paging_in_progress, usually
3707 * guaranteed by not having a pager.
3708 */
3709 unsigned int vm_object_page_remove_lookup = 0;
3710 unsigned int vm_object_page_remove_iterate = 0;
3711
3712 __private_extern__ void
3713 vm_object_page_remove(
3714 register vm_object_t object,
3715 register vm_object_offset_t start,
3716 register vm_object_offset_t end)
3717 {
3718 register vm_page_t p, next;
3719
3720 /*
3721 * One and two page removals are most popular.
3722 * The factor of 16 here is somewhat arbitrary.
3723 * It balances vm_object_lookup vs iteration.
3724 */
3725
3726 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
3727 vm_object_page_remove_lookup++;
3728
3729 for (; start < end; start += PAGE_SIZE_64) {
3730 p = vm_page_lookup(object, start);
3731 if (p != VM_PAGE_NULL) {
3732 assert(!p->cleaning && !p->pageout);
3733 if (!p->fictitious)
3734 pmap_disconnect(p->phys_page);
3735 VM_PAGE_FREE(p);
3736 }
3737 }
3738 } else {
3739 vm_object_page_remove_iterate++;
3740
3741 p = (vm_page_t) queue_first(&object->memq);
3742 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3743 next = (vm_page_t) queue_next(&p->listq);
3744 if ((start <= p->offset) && (p->offset < end)) {
3745 assert(!p->cleaning && !p->pageout);
3746 if (!p->fictitious)
3747 pmap_disconnect(p->phys_page);
3748 VM_PAGE_FREE(p);
3749 }
3750 p = next;
3751 }
3752 }
3753 }
3754
3755
3756 /*
3757 * Routine: vm_object_coalesce
3758 * Function: Coalesces two objects backing up adjoining
3759 * regions of memory into a single object.
3760 *
3761 * returns TRUE if objects were combined.
3762 *
3763 * NOTE: Only works at the moment if the second object is NULL -
3764 * if it's not, which object do we lock first?
3765 *
3766 * Parameters:
3767 * prev_object First object to coalesce
3768 * prev_offset Offset into prev_object
3769 * next_object Second object into coalesce
3770 * next_offset Offset into next_object
3771 *
3772 * prev_size Size of reference to prev_object
3773 * next_size Size of reference to next_object
3774 *
3775 * Conditions:
3776 * The object(s) must *not* be locked. The map must be locked
3777 * to preserve the reference to the object(s).
3778 */
3779 static int vm_object_coalesce_count = 0;
3780
3781 __private_extern__ boolean_t
3782 vm_object_coalesce(
3783 register vm_object_t prev_object,
3784 vm_object_t next_object,
3785 vm_object_offset_t prev_offset,
3786 __unused vm_object_offset_t next_offset,
3787 vm_object_size_t prev_size,
3788 vm_object_size_t next_size)
3789 {
3790 vm_object_size_t newsize;
3791
3792 #ifdef lint
3793 next_offset++;
3794 #endif /* lint */
3795
3796 if (next_object != VM_OBJECT_NULL) {
3797 return(FALSE);
3798 }
3799
3800 if (prev_object == VM_OBJECT_NULL) {
3801 return(TRUE);
3802 }
3803
3804 XPR(XPR_VM_OBJECT,
3805 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3806 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3807
3808 vm_object_lock(prev_object);
3809
3810 /*
3811 * Try to collapse the object first
3812 */
3813 vm_object_collapse(prev_object, prev_offset);
3814
3815 /*
3816 * Can't coalesce if pages not mapped to
3817 * prev_entry may be in use any way:
3818 * . more than one reference
3819 * . paged out
3820 * . shadows another object
3821 * . has a copy elsewhere
3822 * . is purgable
3823 * . paging references (pages might be in page-list)
3824 */
3825
3826 if ((prev_object->ref_count > 1) ||
3827 prev_object->pager_created ||
3828 (prev_object->shadow != VM_OBJECT_NULL) ||
3829 (prev_object->copy != VM_OBJECT_NULL) ||
3830 (prev_object->true_share != FALSE) ||
3831 (prev_object->purgable != VM_OBJECT_NONPURGABLE) ||
3832 (prev_object->paging_in_progress != 0)) {
3833 vm_object_unlock(prev_object);
3834 return(FALSE);
3835 }
3836
3837 vm_object_coalesce_count++;
3838
3839 /*
3840 * Remove any pages that may still be in the object from
3841 * a previous deallocation.
3842 */
3843 vm_object_page_remove(prev_object,
3844 prev_offset + prev_size,
3845 prev_offset + prev_size + next_size);
3846
3847 /*
3848 * Extend the object if necessary.
3849 */
3850 newsize = prev_offset + prev_size + next_size;
3851 if (newsize > prev_object->size) {
3852 #if MACH_PAGEMAP
3853 /*
3854 * We cannot extend an object that has existence info,
3855 * since the existence info might then fail to cover
3856 * the entire object.
3857 *
3858 * This assertion must be true because the object
3859 * has no pager, and we only create existence info
3860 * for objects with pagers.
3861 */
3862 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3863 #endif /* MACH_PAGEMAP */
3864 prev_object->size = newsize;
3865 }
3866
3867 vm_object_unlock(prev_object);
3868 return(TRUE);
3869 }
3870
3871 /*
3872 * Attach a set of physical pages to an object, so that they can
3873 * be mapped by mapping the object. Typically used to map IO memory.
3874 *
3875 * The mapping function and its private data are used to obtain the
3876 * physical addresses for each page to be mapped.
3877 */
3878 void
3879 vm_object_page_map(
3880 vm_object_t object,
3881 vm_object_offset_t offset,
3882 vm_object_size_t size,
3883 vm_object_offset_t (*map_fn)(void *map_fn_data,
3884 vm_object_offset_t offset),
3885 void *map_fn_data) /* private to map_fn */
3886 {
3887 int num_pages;
3888 int i;
3889 vm_page_t m;
3890 vm_page_t old_page;
3891 vm_object_offset_t addr;
3892
3893 num_pages = atop_64(size);
3894
3895 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3896
3897 addr = (*map_fn)(map_fn_data, offset);
3898
3899 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3900 vm_page_more_fictitious();
3901
3902 vm_object_lock(object);
3903 if ((old_page = vm_page_lookup(object, offset))
3904 != VM_PAGE_NULL)
3905 {
3906 vm_page_lock_queues();
3907 vm_page_free(old_page);
3908 vm_page_unlock_queues();
3909 }
3910
3911 vm_page_init(m, addr);
3912 /* private normally requires lock_queues but since we */
3913 /* are initializing the page, its not necessary here */
3914 m->private = TRUE; /* don`t free page */
3915 m->wire_count = 1;
3916 vm_page_insert(m, object, offset);
3917
3918 PAGE_WAKEUP_DONE(m);
3919 vm_object_unlock(object);
3920 }
3921 }
3922
3923 #include <mach_kdb.h>
3924
3925 #if MACH_KDB
3926 #include <ddb/db_output.h>
3927 #include <vm/vm_print.h>
3928
3929 #define printf kdbprintf
3930
3931 extern boolean_t vm_object_cached(
3932 vm_object_t object);
3933
3934 extern void print_bitstring(
3935 char byte);
3936
3937 boolean_t vm_object_print_pages = FALSE;
3938
3939 void
3940 print_bitstring(
3941 char byte)
3942 {
3943 printf("%c%c%c%c%c%c%c%c",
3944 ((byte & (1 << 0)) ? '1' : '0'),
3945 ((byte & (1 << 1)) ? '1' : '0'),
3946 ((byte & (1 << 2)) ? '1' : '0'),
3947 ((byte & (1 << 3)) ? '1' : '0'),
3948 ((byte & (1 << 4)) ? '1' : '0'),
3949 ((byte & (1 << 5)) ? '1' : '0'),
3950 ((byte & (1 << 6)) ? '1' : '0'),
3951 ((byte & (1 << 7)) ? '1' : '0'));
3952 }
3953
3954 boolean_t
3955 vm_object_cached(
3956 register vm_object_t object)
3957 {
3958 register vm_object_t o;
3959
3960 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3961 if (object == o) {
3962 return TRUE;
3963 }
3964 }
3965 return FALSE;
3966 }
3967
3968 #if MACH_PAGEMAP
3969 /*
3970 * vm_external_print: [ debug ]
3971 */
3972 void
3973 vm_external_print(
3974 vm_external_map_t emap,
3975 vm_size_t size)
3976 {
3977 if (emap == VM_EXTERNAL_NULL) {
3978 printf("0 ");
3979 } else {
3980 vm_size_t existence_size = stob(size);
3981 printf("{ size=%d, map=[", existence_size);
3982 if (existence_size > 0) {
3983 print_bitstring(emap[0]);
3984 }
3985 if (existence_size > 1) {
3986 print_bitstring(emap[1]);
3987 }
3988 if (existence_size > 2) {
3989 printf("...");
3990 print_bitstring(emap[existence_size-1]);
3991 }
3992 printf("] }\n");
3993 }
3994 return;
3995 }
3996 #endif /* MACH_PAGEMAP */
3997
3998 int
3999 vm_follow_object(
4000 vm_object_t object)
4001 {
4002 int count = 0;
4003 int orig_db_indent = db_indent;
4004
4005 while (TRUE) {
4006 if (object == VM_OBJECT_NULL) {
4007 db_indent = orig_db_indent;
4008 return count;
4009 }
4010
4011 count += 1;
4012
4013 iprintf("object 0x%x", object);
4014 printf(", shadow=0x%x", object->shadow);
4015 printf(", copy=0x%x", object->copy);
4016 printf(", pager=0x%x", object->pager);
4017 printf(", ref=%d\n", object->ref_count);
4018
4019 db_indent += 2;
4020 object = object->shadow;
4021 }
4022
4023 }
4024
4025 /*
4026 * vm_object_print: [ debug ]
4027 */
4028 void
4029 vm_object_print(
4030 db_addr_t db_addr,
4031 __unused boolean_t have_addr,
4032 __unused int arg_count,
4033 __unused char *modif)
4034 {
4035 vm_object_t object;
4036 register vm_page_t p;
4037 const char *s;
4038
4039 register int count;
4040
4041 object = (vm_object_t) (long) db_addr;
4042 if (object == VM_OBJECT_NULL)
4043 return;
4044
4045 iprintf("object 0x%x\n", object);
4046
4047 db_indent += 2;
4048
4049 iprintf("size=0x%x", object->size);
4050 printf(", cluster=0x%x", object->cluster_size);
4051 printf(", memq_hint=%p", object->memq_hint);
4052 printf(", ref_count=%d\n", object->ref_count);
4053 iprintf("");
4054 #if TASK_SWAPPER
4055 printf("res_count=%d, ", object->res_count);
4056 #endif /* TASK_SWAPPER */
4057 printf("resident_page_count=%d\n", object->resident_page_count);
4058
4059 iprintf("shadow=0x%x", object->shadow);
4060 if (object->shadow) {
4061 register int i = 0;
4062 vm_object_t shadow = object;
4063 while((shadow = shadow->shadow))
4064 i++;
4065 printf(" (depth %d)", i);
4066 }
4067 printf(", copy=0x%x", object->copy);
4068 printf(", shadow_offset=0x%x", object->shadow_offset);
4069 printf(", last_alloc=0x%x\n", object->last_alloc);
4070
4071 iprintf("pager=0x%x", object->pager);
4072 printf(", paging_offset=0x%x", object->paging_offset);
4073 printf(", pager_control=0x%x\n", object->pager_control);
4074
4075 iprintf("copy_strategy=%d[", object->copy_strategy);
4076 switch (object->copy_strategy) {
4077 case MEMORY_OBJECT_COPY_NONE:
4078 printf("copy_none");
4079 break;
4080
4081 case MEMORY_OBJECT_COPY_CALL:
4082 printf("copy_call");
4083 break;
4084
4085 case MEMORY_OBJECT_COPY_DELAY:
4086 printf("copy_delay");
4087 break;
4088
4089 case MEMORY_OBJECT_COPY_SYMMETRIC:
4090 printf("copy_symmetric");
4091 break;
4092
4093 case MEMORY_OBJECT_COPY_INVALID:
4094 printf("copy_invalid");
4095 break;
4096
4097 default:
4098 printf("?");
4099 }
4100 printf("]");
4101 printf(", absent_count=%d\n", object->absent_count);
4102
4103 iprintf("all_wanted=0x%x<", object->all_wanted);
4104 s = "";
4105 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4106 printf("%sinit", s);
4107 s = ",";
4108 }
4109 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4110 printf("%sready", s);
4111 s = ",";
4112 }
4113 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4114 printf("%spaging", s);
4115 s = ",";
4116 }
4117 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
4118 printf("%sabsent", s);
4119 s = ",";
4120 }
4121 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4122 printf("%slock", s);
4123 s = ",";
4124 }
4125 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4126 printf("%suncaching", s);
4127 s = ",";
4128 }
4129 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4130 printf("%scopy_call", s);
4131 s = ",";
4132 }
4133 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4134 printf("%scaching", s);
4135 s = ",";
4136 }
4137 printf(">");
4138 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4139
4140 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4141 (object->pager_created ? "" : "!"),
4142 (object->pager_initialized ? "" : "!"),
4143 (object->pager_ready ? "" : "!"),
4144 (object->can_persist ? "" : "!"),
4145 (object->pager_trusted ? "" : "!"),
4146 (object->pageout ? "" : "!"),
4147 (object->internal ? "internal" : "external"),
4148 (object->temporary ? "temporary" : "permanent"));
4149 iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
4150 (object->alive ? "" : "!"),
4151 ((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"),
4152 ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"),
4153 ((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"),
4154 (object->shadowed ? "" : "!"),
4155 (vm_object_cached(object) ? "" : "!"),
4156 (object->private ? "" : "!"));
4157 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4158 (object->advisory_pageout ? "" : "!"),
4159 (object->silent_overwrite ? "" : "!"));
4160
4161 #if MACH_PAGEMAP
4162 iprintf("existence_map=");
4163 vm_external_print(object->existence_map, object->size);
4164 #endif /* MACH_PAGEMAP */
4165 #if MACH_ASSERT
4166 iprintf("paging_object=0x%x\n", object->paging_object);
4167 #endif /* MACH_ASSERT */
4168
4169 if (vm_object_print_pages) {
4170 count = 0;
4171 p = (vm_page_t) queue_first(&object->memq);
4172 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4173 if (count == 0) {
4174 iprintf("memory:=");
4175 } else if (count == 2) {
4176 printf("\n");
4177 iprintf(" ...");
4178 count = 0;
4179 } else {
4180 printf(",");
4181 }
4182 count++;
4183
4184 printf("(off=0x%llX,page=%p)", p->offset, p);
4185 p = (vm_page_t) queue_next(&p->listq);
4186 }
4187 if (count != 0) {
4188 printf("\n");
4189 }
4190 }
4191 db_indent -= 2;
4192 }
4193
4194
4195 /*
4196 * vm_object_find [ debug ]
4197 *
4198 * Find all tasks which reference the given vm_object.
4199 */
4200
4201 boolean_t vm_object_find(vm_object_t object);
4202 boolean_t vm_object_print_verbose = FALSE;
4203
4204 boolean_t
4205 vm_object_find(
4206 vm_object_t object)
4207 {
4208 task_t task;
4209 vm_map_t map;
4210 vm_map_entry_t entry;
4211 processor_set_t pset = &default_pset;
4212 boolean_t found = FALSE;
4213
4214 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
4215 map = task->map;
4216 for (entry = vm_map_first_entry(map);
4217 entry && entry != vm_map_to_entry(map);
4218 entry = entry->vme_next) {
4219
4220 vm_object_t obj;
4221
4222 /*
4223 * For the time being skip submaps,
4224 * only the kernel can have submaps,
4225 * and unless we are interested in
4226 * kernel objects, we can simply skip
4227 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4228 * for a full solution.
4229 */
4230 if (entry->is_sub_map)
4231 continue;
4232 if (entry)
4233 obj = entry->object.vm_object;
4234 else
4235 continue;
4236
4237 while (obj != VM_OBJECT_NULL) {
4238 if (obj == object) {
4239 if (!found) {
4240 printf("TASK\t\tMAP\t\tENTRY\n");
4241 found = TRUE;
4242 }
4243 printf("0x%x\t0x%x\t0x%x\n",
4244 task, map, entry);
4245 }
4246 obj = obj->shadow;
4247 }
4248 }
4249 }
4250
4251 return(found);
4252 }
4253
4254 #endif /* MACH_KDB */
4255
4256 kern_return_t
4257 vm_object_populate_with_private(
4258 vm_object_t object,
4259 vm_object_offset_t offset,
4260 ppnum_t phys_page,
4261 vm_size_t size)
4262 {
4263 ppnum_t base_page;
4264 vm_object_offset_t base_offset;
4265
4266
4267 if(!object->private)
4268 return KERN_FAILURE;
4269
4270 base_page = phys_page;
4271
4272 vm_object_lock(object);
4273 if(!object->phys_contiguous) {
4274 vm_page_t m;
4275 if((base_offset = trunc_page_64(offset)) != offset) {
4276 vm_object_unlock(object);
4277 return KERN_FAILURE;
4278 }
4279 base_offset += object->paging_offset;
4280 while(size) {
4281 m = vm_page_lookup(object, base_offset);
4282 if(m != VM_PAGE_NULL) {
4283 if(m->fictitious) {
4284 vm_page_lock_queues();
4285 m->fictitious = FALSE;
4286 m->private = TRUE;
4287 m->phys_page = base_page;
4288 if(!m->busy) {
4289 m->busy = TRUE;
4290 }
4291 if(!m->absent) {
4292 m->absent = TRUE;
4293 object->absent_count++;
4294 }
4295 m->list_req_pending = TRUE;
4296 vm_page_unlock_queues();
4297 } else if (m->phys_page != base_page) {
4298 /* pmap call to clear old mapping */
4299 pmap_disconnect(m->phys_page);
4300 m->phys_page = base_page;
4301 }
4302
4303 /*
4304 * ENCRYPTED SWAP:
4305 * We're not pointing to the same
4306 * physical page any longer and the
4307 * contents of the new one are not
4308 * supposed to be encrypted.
4309 * XXX What happens to the original
4310 * physical page. Is it lost ?
4311 */
4312 m->encrypted = FALSE;
4313
4314 } else {
4315 while ((m = vm_page_grab_fictitious())
4316 == VM_PAGE_NULL)
4317 vm_page_more_fictitious();
4318 vm_page_lock_queues();
4319 m->fictitious = FALSE;
4320 m->private = TRUE;
4321 m->phys_page = base_page;
4322 m->list_req_pending = TRUE;
4323 m->absent = TRUE;
4324 m->unusual = TRUE;
4325 object->absent_count++;
4326 vm_page_unlock_queues();
4327 vm_page_insert(m, object, base_offset);
4328 }
4329 base_page++; /* Go to the next physical page */
4330 base_offset += PAGE_SIZE;
4331 size -= PAGE_SIZE;
4332 }
4333 } else {
4334 /* NOTE: we should check the original settings here */
4335 /* if we have a size > zero a pmap call should be made */
4336 /* to disable the range */
4337
4338 /* pmap_? */
4339
4340 /* shadows on contiguous memory are not allowed */
4341 /* we therefore can use the offset field */
4342 object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
4343 object->size = size;
4344 }
4345 vm_object_unlock(object);
4346 return KERN_SUCCESS;
4347 }
4348
4349 /*
4350 * memory_object_free_from_cache:
4351 *
4352 * Walk the vm_object cache list, removing and freeing vm_objects
4353 * which are backed by the pager identified by the caller, (pager_id).
4354 * Remove up to "count" objects, if there are that may available
4355 * in the cache.
4356 *
4357 * Walk the list at most once, return the number of vm_objects
4358 * actually freed.
4359 */
4360
4361 __private_extern__ kern_return_t
4362 memory_object_free_from_cache(
4363 __unused host_t host,
4364 int *pager_id,
4365 int *count)
4366 {
4367
4368 int object_released = 0;
4369
4370 register vm_object_t object = VM_OBJECT_NULL;
4371 vm_object_t shadow;
4372
4373 /*
4374 if(host == HOST_NULL)
4375 return(KERN_INVALID_ARGUMENT);
4376 */
4377
4378 try_again:
4379 vm_object_cache_lock();
4380
4381 queue_iterate(&vm_object_cached_list, object,
4382 vm_object_t, cached_list) {
4383 if (object->pager && (pager_id == object->pager->pager)) {
4384 vm_object_lock(object);
4385 queue_remove(&vm_object_cached_list, object,
4386 vm_object_t, cached_list);
4387 vm_object_cached_count--;
4388
4389 /*
4390 * Since this object is in the cache, we know
4391 * that it is initialized and has only a pager's
4392 * (implicit) reference. Take a reference to avoid
4393 * recursive deallocations.
4394 */
4395
4396 assert(object->pager_initialized);
4397 assert(object->ref_count == 0);
4398 object->ref_count++;
4399
4400 /*
4401 * Terminate the object.
4402 * If the object had a shadow, we let
4403 * vm_object_deallocate deallocate it.
4404 * "pageout" objects have a shadow, but
4405 * maintain a "paging reference" rather
4406 * than a normal reference.
4407 * (We are careful here to limit recursion.)
4408 */
4409 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4410 if ((vm_object_terminate(object) == KERN_SUCCESS)
4411 && (shadow != VM_OBJECT_NULL)) {
4412 vm_object_deallocate(shadow);
4413 }
4414
4415 if(object_released++ == *count)
4416 return KERN_SUCCESS;
4417 goto try_again;
4418 }
4419 }
4420 vm_object_cache_unlock();
4421 *count = object_released;
4422 return KERN_SUCCESS;
4423 }
4424
4425
4426
4427 kern_return_t
4428 memory_object_create_named(
4429 memory_object_t pager,
4430 memory_object_offset_t size,
4431 memory_object_control_t *control)
4432 {
4433 vm_object_t object;
4434 vm_object_hash_entry_t entry;
4435
4436 *control = MEMORY_OBJECT_CONTROL_NULL;
4437 if (pager == MEMORY_OBJECT_NULL)
4438 return KERN_INVALID_ARGUMENT;
4439
4440 vm_object_cache_lock();
4441 entry = vm_object_hash_lookup(pager, FALSE);
4442 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4443 (entry->object != VM_OBJECT_NULL)) {
4444 if (entry->object->named == TRUE)
4445 panic("memory_object_create_named: caller already holds the right"); }
4446
4447 vm_object_cache_unlock();
4448 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4449 == VM_OBJECT_NULL) {
4450 return(KERN_INVALID_OBJECT);
4451 }
4452
4453 /* wait for object (if any) to be ready */
4454 if (object != VM_OBJECT_NULL) {
4455 vm_object_lock(object);
4456 object->named = TRUE;
4457 while (!object->pager_ready) {
4458 vm_object_sleep(object,
4459 VM_OBJECT_EVENT_PAGER_READY,
4460 THREAD_UNINT);
4461 }
4462 *control = object->pager_control;
4463 vm_object_unlock(object);
4464 }
4465 return (KERN_SUCCESS);
4466 }
4467
4468
4469 /*
4470 * Routine: memory_object_recover_named [user interface]
4471 * Purpose:
4472 * Attempt to recover a named reference for a VM object.
4473 * VM will verify that the object has not already started
4474 * down the termination path, and if it has, will optionally
4475 * wait for that to finish.
4476 * Returns:
4477 * KERN_SUCCESS - we recovered a named reference on the object
4478 * KERN_FAILURE - we could not recover a reference (object dead)
4479 * KERN_INVALID_ARGUMENT - bad memory object control
4480 */
4481 kern_return_t
4482 memory_object_recover_named(
4483 memory_object_control_t control,
4484 boolean_t wait_on_terminating)
4485 {
4486 vm_object_t object;
4487
4488 vm_object_cache_lock();
4489 object = memory_object_control_to_vm_object(control);
4490 if (object == VM_OBJECT_NULL) {
4491 vm_object_cache_unlock();
4492 return (KERN_INVALID_ARGUMENT);
4493 }
4494
4495 restart:
4496 vm_object_lock(object);
4497
4498 if (object->terminating && wait_on_terminating) {
4499 vm_object_cache_unlock();
4500 vm_object_wait(object,
4501 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4502 THREAD_UNINT);
4503 vm_object_cache_lock();
4504 goto restart;
4505 }
4506
4507 if (!object->alive) {
4508 vm_object_cache_unlock();
4509 vm_object_unlock(object);
4510 return KERN_FAILURE;
4511 }
4512
4513 if (object->named == TRUE) {
4514 vm_object_cache_unlock();
4515 vm_object_unlock(object);
4516 return KERN_SUCCESS;
4517 }
4518
4519 if((object->ref_count == 0) && (!object->terminating)){
4520 queue_remove(&vm_object_cached_list, object,
4521 vm_object_t, cached_list);
4522 vm_object_cached_count--;
4523 XPR(XPR_VM_OBJECT_CACHE,
4524 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4525 (integer_t)object,
4526 (integer_t)vm_object_cached_list.next,
4527 (integer_t)vm_object_cached_list.prev, 0,0);
4528 }
4529
4530 vm_object_cache_unlock();
4531
4532 object->named = TRUE;
4533 object->ref_count++;
4534 vm_object_res_reference(object);
4535 while (!object->pager_ready) {
4536 vm_object_sleep(object,
4537 VM_OBJECT_EVENT_PAGER_READY,
4538 THREAD_UNINT);
4539 }
4540 vm_object_unlock(object);
4541 return (KERN_SUCCESS);
4542 }
4543
4544
4545 /*
4546 * vm_object_release_name:
4547 *
4548 * Enforces name semantic on memory_object reference count decrement
4549 * This routine should not be called unless the caller holds a name
4550 * reference gained through the memory_object_create_named.
4551 *
4552 * If the TERMINATE_IDLE flag is set, the call will return if the
4553 * reference count is not 1. i.e. idle with the only remaining reference
4554 * being the name.
4555 * If the decision is made to proceed the name field flag is set to
4556 * false and the reference count is decremented. If the RESPECT_CACHE
4557 * flag is set and the reference count has gone to zero, the
4558 * memory_object is checked to see if it is cacheable otherwise when
4559 * the reference count is zero, it is simply terminated.
4560 */
4561
4562 __private_extern__ kern_return_t
4563 vm_object_release_name(
4564 vm_object_t object,
4565 int flags)
4566 {
4567 vm_object_t shadow;
4568 boolean_t original_object = TRUE;
4569
4570 while (object != VM_OBJECT_NULL) {
4571
4572 /*
4573 * The cache holds a reference (uncounted) to
4574 * the object. We must locke it before removing
4575 * the object.
4576 *
4577 */
4578
4579 vm_object_cache_lock();
4580 vm_object_lock(object);
4581 assert(object->alive);
4582 if(original_object)
4583 assert(object->named);
4584 assert(object->ref_count > 0);
4585
4586 /*
4587 * We have to wait for initialization before
4588 * destroying or caching the object.
4589 */
4590
4591 if (object->pager_created && !object->pager_initialized) {
4592 assert(!object->can_persist);
4593 vm_object_assert_wait(object,
4594 VM_OBJECT_EVENT_INITIALIZED,
4595 THREAD_UNINT);
4596 vm_object_unlock(object);
4597 vm_object_cache_unlock();
4598 thread_block(THREAD_CONTINUE_NULL);
4599 continue;
4600 }
4601
4602 if (((object->ref_count > 1)
4603 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4604 || (object->terminating)) {
4605 vm_object_unlock(object);
4606 vm_object_cache_unlock();
4607 return KERN_FAILURE;
4608 } else {
4609 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4610 vm_object_unlock(object);
4611 vm_object_cache_unlock();
4612 return KERN_SUCCESS;
4613 }
4614 }
4615
4616 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4617 (object->ref_count == 1)) {
4618 if(original_object)
4619 object->named = FALSE;
4620 vm_object_unlock(object);
4621 vm_object_cache_unlock();
4622 /* let vm_object_deallocate push this thing into */
4623 /* the cache, if that it is where it is bound */
4624 vm_object_deallocate(object);
4625 return KERN_SUCCESS;
4626 }
4627 VM_OBJ_RES_DECR(object);
4628 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4629 if(object->ref_count == 1) {
4630 if(vm_object_terminate(object) != KERN_SUCCESS) {
4631 if(original_object) {
4632 return KERN_FAILURE;
4633 } else {
4634 return KERN_SUCCESS;
4635 }
4636 }
4637 if (shadow != VM_OBJECT_NULL) {
4638 original_object = FALSE;
4639 object = shadow;
4640 continue;
4641 }
4642 return KERN_SUCCESS;
4643 } else {
4644 object->ref_count--;
4645 assert(object->ref_count > 0);
4646 if(original_object)
4647 object->named = FALSE;
4648 vm_object_unlock(object);
4649 vm_object_cache_unlock();
4650 return KERN_SUCCESS;
4651 }
4652 }
4653 /*NOTREACHED*/
4654 assert(0);
4655 return KERN_FAILURE;
4656 }
4657
4658
4659 __private_extern__ kern_return_t
4660 vm_object_lock_request(
4661 vm_object_t object,
4662 vm_object_offset_t offset,
4663 vm_object_size_t size,
4664 memory_object_return_t should_return,
4665 int flags,
4666 vm_prot_t prot)
4667 {
4668 __unused boolean_t should_flush;
4669
4670 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
4671
4672 XPR(XPR_MEMORY_OBJECT,
4673 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4674 (integer_t)object, offset, size,
4675 (((should_return&1)<<1)|should_flush), prot);
4676
4677 /*
4678 * Check for bogus arguments.
4679 */
4680 if (object == VM_OBJECT_NULL)
4681 return (KERN_INVALID_ARGUMENT);
4682
4683 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4684 return (KERN_INVALID_ARGUMENT);
4685
4686 size = round_page_64(size);
4687
4688 /*
4689 * Lock the object, and acquire a paging reference to
4690 * prevent the memory_object reference from being released.
4691 */
4692 vm_object_lock(object);
4693 vm_object_paging_begin(object);
4694
4695 (void)vm_object_update(object,
4696 offset, size, NULL, NULL, should_return, flags, prot);
4697
4698 vm_object_paging_end(object);
4699 vm_object_unlock(object);
4700
4701 return (KERN_SUCCESS);
4702 }
4703
4704 /*
4705 * Empty a purgable object by grabbing the physical pages assigned to it and
4706 * putting them on the free queue without writing them to backing store, etc.
4707 * When the pages are next touched they will be demand zero-fill pages. We
4708 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
4709 * skip referenced/dirty pages, pages on the active queue, etc. We're more
4710 * than happy to grab these since this is a purgable object. We mark the
4711 * object as "empty" after reaping its pages.
4712 *
4713 * On entry the object and page queues are locked, the object must be a
4714 * purgable object with no delayed copies pending.
4715 */
4716 unsigned int
4717 vm_object_purge(vm_object_t object)
4718 {
4719 vm_page_t p, next;
4720 unsigned int num_purged_pages;
4721 vm_page_t local_freeq;
4722 unsigned long local_freed;
4723 int purge_loop_quota;
4724 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
4725 #define PURGE_BATCH_FREE_LIMIT 50
4726 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
4727 #define PURGE_LOOP_QUOTA 100
4728
4729 num_purged_pages = 0;
4730 if (object->purgable == VM_OBJECT_NONPURGABLE)
4731 return num_purged_pages;
4732
4733 object->purgable = VM_OBJECT_PURGABLE_EMPTY;
4734
4735 assert(object->copy == VM_OBJECT_NULL);
4736 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4737 purge_loop_quota = PURGE_LOOP_QUOTA;
4738
4739 local_freeq = VM_PAGE_NULL;
4740 local_freed = 0;
4741
4742 /*
4743 * Go through the object's resident pages and try and discard them.
4744 */
4745 next = (vm_page_t)queue_first(&object->memq);
4746 while (!queue_end(&object->memq, (queue_entry_t)next)) {
4747 p = next;
4748 next = (vm_page_t)queue_next(&next->listq);
4749
4750 if (purge_loop_quota-- == 0) {
4751 /*
4752 * Avoid holding the page queues lock for too long.
4753 * Let someone else take it for a while if needed.
4754 * Keep holding the object's lock to guarantee that
4755 * the object's page list doesn't change under us
4756 * while we yield.
4757 */
4758 if (local_freeq != VM_PAGE_NULL) {
4759 /*
4760 * Flush our queue of pages to free.
4761 */
4762 vm_page_free_list(local_freeq);
4763 local_freeq = VM_PAGE_NULL;
4764 local_freed = 0;
4765 }
4766 vm_page_unlock_queues();
4767 mutex_pause();
4768 vm_page_lock_queues();
4769
4770 /* resume with the current page and a new quota */
4771 purge_loop_quota = PURGE_LOOP_QUOTA;
4772 }
4773
4774
4775 if (p->busy || p->cleaning || p->laundry ||
4776 p->list_req_pending) {
4777 /* page is being acted upon, so don't mess with it */
4778 continue;
4779 }
4780 if (p->wire_count) {
4781 /* don't discard a wired page */
4782 continue;
4783 }
4784
4785 if (p->tabled) {
4786 /* clean up the object/offset table */
4787 vm_page_remove(p);
4788 }
4789 if (p->absent) {
4790 /* update the object's count of absent pages */
4791 vm_object_absent_release(object);
4792 }
4793
4794 /* we can discard this page */
4795
4796 /* advertize that this page is in a transition state */
4797 p->busy = TRUE;
4798
4799 if (p->no_isync == TRUE) {
4800 /* the page hasn't been mapped yet */
4801 /* (optimization to delay the i-cache sync) */
4802 } else {
4803 /* unmap the page */
4804 int refmod_state;
4805
4806 refmod_state = pmap_disconnect(p->phys_page);
4807 if (refmod_state & VM_MEM_MODIFIED) {
4808 p->dirty = TRUE;
4809 }
4810 }
4811
4812 if (p->dirty || p->precious) {
4813 /* we saved the cost of cleaning this page ! */
4814 num_purged_pages++;
4815 vm_page_purged_count++;
4816 }
4817
4818 /* remove page from active or inactive queue... */
4819 VM_PAGE_QUEUES_REMOVE(p);
4820
4821 /* ... and put it on our queue of pages to free */
4822 assert(!p->laundry);
4823 assert(p->object != kernel_object);
4824 assert(p->pageq.next == NULL &&
4825 p->pageq.prev == NULL);
4826 p->pageq.next = (queue_entry_t) local_freeq;
4827 local_freeq = p;
4828 if (++local_freed >= PURGE_BATCH_FREE_LIMIT) {
4829 /* flush our queue of pages to free */
4830 vm_page_free_list(local_freeq);
4831 local_freeq = VM_PAGE_NULL;
4832 local_freed = 0;
4833 }
4834 }
4835
4836 /* flush our local queue of pages to free one last time */
4837 if (local_freeq != VM_PAGE_NULL) {
4838 vm_page_free_list(local_freeq);
4839 local_freeq = VM_PAGE_NULL;
4840 local_freed = 0;
4841 }
4842
4843 return num_purged_pages;
4844 }
4845
4846 /*
4847 * vm_object_purgable_control() allows the caller to control and investigate the
4848 * state of a purgable object. A purgable object is created via a call to
4849 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will
4850 * never be coalesced with any other object -- even other purgable objects --
4851 * and will thus always remain a distinct object. A purgable object has
4852 * special semantics when its reference count is exactly 1. If its reference
4853 * count is greater than 1, then a purgable object will behave like a normal
4854 * object and attempts to use this interface will result in an error return
4855 * of KERN_INVALID_ARGUMENT.
4856 *
4857 * A purgable object may be put into a "volatile" state which will make the
4858 * object's pages elligable for being reclaimed without paging to backing
4859 * store if the system runs low on memory. If the pages in a volatile
4860 * purgable object are reclaimed, the purgable object is said to have been
4861 * "emptied." When a purgable object is emptied the system will reclaim as
4862 * many pages from the object as it can in a convenient manner (pages already
4863 * en route to backing store or busy for other reasons are left as is). When
4864 * a purgable object is made volatile, its pages will generally be reclaimed
4865 * before other pages in the application's working set. This semantic is
4866 * generally used by applications which can recreate the data in the object
4867 * faster than it can be paged in. One such example might be media assets
4868 * which can be reread from a much faster RAID volume.
4869 *
4870 * A purgable object may be designated as "non-volatile" which means it will
4871 * behave like all other objects in the system with pages being written to and
4872 * read from backing store as needed to satisfy system memory needs. If the
4873 * object was emptied before the object was made non-volatile, that fact will
4874 * be returned as the old state of the purgable object (see
4875 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
4876 * were reclaimed as part of emptying the object will be refaulted in as
4877 * zero-fill on demand. It is up to the application to note that an object
4878 * was emptied and recreate the objects contents if necessary. When a
4879 * purgable object is made non-volatile, its pages will generally not be paged
4880 * out to backing store in the immediate future. A purgable object may also
4881 * be manually emptied.
4882 *
4883 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
4884 * volatile purgable object may be queried at any time. This information may
4885 * be used as a control input to let the application know when the system is
4886 * experiencing memory pressure and is reclaiming memory.
4887 *
4888 * The specified address may be any address within the purgable object. If
4889 * the specified address does not represent any object in the target task's
4890 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
4891 * object containing the specified address is not a purgable object, then
4892 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
4893 * returned.
4894 *
4895 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
4896 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
4897 * state is used to set the new state of the purgable object and return its
4898 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable
4899 * object is returned in the parameter state.
4900 *
4901 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
4902 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
4903 * the non-volatile, volatile and volatile/empty states described above.
4904 * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
4905 * immediately reclaim as many pages in the object as can be conveniently
4906 * collected (some may have already been written to backing store or be
4907 * otherwise busy).
4908 *
4909 * The process of making a purgable object non-volatile and determining its
4910 * previous state is atomic. Thus, if a purgable object is made
4911 * VM_PURGABLE_NONVOLATILE and the old state is returned as
4912 * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
4913 * completely intact and will remain so until the object is made volatile
4914 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
4915 * was reclaimed while it was in a volatile state and its previous contents
4916 * have been lost.
4917 */
4918 /*
4919 * The object must be locked.
4920 */
4921 kern_return_t
4922 vm_object_purgable_control(
4923 vm_object_t object,
4924 vm_purgable_t control,
4925 int *state)
4926 {
4927 int old_state;
4928 vm_page_t p;
4929
4930 if (object == VM_OBJECT_NULL) {
4931 /*
4932 * Object must already be present or it can't be purgable.
4933 */
4934 return KERN_INVALID_ARGUMENT;
4935 }
4936
4937 /*
4938 * Get current state of the purgable object.
4939 */
4940 switch (object->purgable) {
4941 case VM_OBJECT_NONPURGABLE:
4942 return KERN_INVALID_ARGUMENT;
4943
4944 case VM_OBJECT_PURGABLE_NONVOLATILE:
4945 old_state = VM_PURGABLE_NONVOLATILE;
4946 break;
4947
4948 case VM_OBJECT_PURGABLE_VOLATILE:
4949 old_state = VM_PURGABLE_VOLATILE;
4950 break;
4951
4952 case VM_OBJECT_PURGABLE_EMPTY:
4953 old_state = VM_PURGABLE_EMPTY;
4954 break;
4955
4956 default:
4957 old_state = VM_PURGABLE_NONVOLATILE;
4958 panic("Bad state (%d) for purgable object!\n",
4959 object->purgable);
4960 /*NOTREACHED*/
4961 }
4962
4963 /* purgable cant have delayed copies - now or in the future */
4964 assert(object->copy == VM_OBJECT_NULL);
4965 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4966
4967 /*
4968 * Execute the desired operation.
4969 */
4970 if (control == VM_PURGABLE_GET_STATE) {
4971 *state = old_state;
4972 return KERN_SUCCESS;
4973 }
4974
4975 switch (*state) {
4976 case VM_PURGABLE_NONVOLATILE:
4977 vm_page_lock_queues();
4978 if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) {
4979 assert(vm_page_purgeable_count >=
4980 object->resident_page_count);
4981 vm_page_purgeable_count -= object->resident_page_count;
4982 }
4983
4984 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
4985
4986 /*
4987 * If the object wasn't emptied, then mark all pages of the
4988 * object as referenced in order to give them a complete turn
4989 * of the virtual memory "clock" before becoming candidates
4990 * for paging out (if the system is suffering from memory
4991 * pressure). We don't really need to set the pmap reference
4992 * bits (which would be expensive) since the software copies
4993 * are believed if they're set to true ...
4994 */
4995 if (old_state != VM_PURGABLE_EMPTY) {
4996 for (p = (vm_page_t)queue_first(&object->memq);
4997 !queue_end(&object->memq, (queue_entry_t)p);
4998 p = (vm_page_t)queue_next(&p->listq))
4999 p->reference = TRUE;
5000 }
5001
5002 vm_page_unlock_queues();
5003
5004 break;
5005
5006 case VM_PURGABLE_VOLATILE:
5007 vm_page_lock_queues();
5008
5009 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5010 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5011 vm_page_purgeable_count += object->resident_page_count;
5012 }
5013
5014 object->purgable = VM_OBJECT_PURGABLE_VOLATILE;
5015
5016 /*
5017 * We want the newly volatile purgable object to be a
5018 * candidate for the pageout scan before other pages in the
5019 * application if the system is suffering from memory
5020 * pressure. To do this, we move a page of the object from
5021 * the active queue onto the inactive queue in order to
5022 * promote the object for early reclaim. We only need to move
5023 * a single page since the pageout scan will reap the entire
5024 * purgable object if it finds a single page in a volatile
5025 * state. Obviously we don't do this if there are no pages
5026 * associated with the object or we find a page of the object
5027 * already on the inactive queue.
5028 */
5029 for (p = (vm_page_t)queue_first(&object->memq);
5030 !queue_end(&object->memq, (queue_entry_t)p);
5031 p = (vm_page_t)queue_next(&p->listq)) {
5032 if (p->inactive) {
5033 /* already a page on the inactive queue */
5034 break;
5035 }
5036 if (p->active && !p->busy) {
5037 /* found one we can move */
5038 vm_page_deactivate(p);
5039 break;
5040 }
5041 }
5042 vm_page_unlock_queues();
5043
5044 break;
5045
5046
5047 case VM_PURGABLE_EMPTY:
5048 vm_page_lock_queues();
5049 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5050 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5051 vm_page_purgeable_count += object->resident_page_count;
5052 }
5053 (void) vm_object_purge(object);
5054 vm_page_unlock_queues();
5055 break;
5056
5057 }
5058 *state = old_state;
5059
5060 return KERN_SUCCESS;
5061 }
5062
5063 #if TASK_SWAPPER
5064 /*
5065 * vm_object_res_deallocate
5066 *
5067 * (recursively) decrement residence counts on vm objects and their shadows.
5068 * Called from vm_object_deallocate and when swapping out an object.
5069 *
5070 * The object is locked, and remains locked throughout the function,
5071 * even as we iterate down the shadow chain. Locks on intermediate objects
5072 * will be dropped, but not the original object.
5073 *
5074 * NOTE: this function used to use recursion, rather than iteration.
5075 */
5076
5077 __private_extern__ void
5078 vm_object_res_deallocate(
5079 vm_object_t object)
5080 {
5081 vm_object_t orig_object = object;
5082 /*
5083 * Object is locked so it can be called directly
5084 * from vm_object_deallocate. Original object is never
5085 * unlocked.
5086 */
5087 assert(object->res_count > 0);
5088 while (--object->res_count == 0) {
5089 assert(object->ref_count >= object->res_count);
5090 vm_object_deactivate_all_pages(object);
5091 /* iterate on shadow, if present */
5092 if (object->shadow != VM_OBJECT_NULL) {
5093 vm_object_t tmp_object = object->shadow;
5094 vm_object_lock(tmp_object);
5095 if (object != orig_object)
5096 vm_object_unlock(object);
5097 object = tmp_object;
5098 assert(object->res_count > 0);
5099 } else
5100 break;
5101 }
5102 if (object != orig_object)
5103 vm_object_unlock(object);
5104 }
5105
5106 /*
5107 * vm_object_res_reference
5108 *
5109 * Internal function to increment residence count on a vm object
5110 * and its shadows. It is called only from vm_object_reference, and
5111 * when swapping in a vm object, via vm_map_swap.
5112 *
5113 * The object is locked, and remains locked throughout the function,
5114 * even as we iterate down the shadow chain. Locks on intermediate objects
5115 * will be dropped, but not the original object.
5116 *
5117 * NOTE: this function used to use recursion, rather than iteration.
5118 */
5119
5120 __private_extern__ void
5121 vm_object_res_reference(
5122 vm_object_t object)
5123 {
5124 vm_object_t orig_object = object;
5125 /*
5126 * Object is locked, so this can be called directly
5127 * from vm_object_reference. This lock is never released.
5128 */
5129 while ((++object->res_count == 1) &&
5130 (object->shadow != VM_OBJECT_NULL)) {
5131 vm_object_t tmp_object = object->shadow;
5132
5133 assert(object->ref_count >= object->res_count);
5134 vm_object_lock(tmp_object);
5135 if (object != orig_object)
5136 vm_object_unlock(object);
5137 object = tmp_object;
5138 }
5139 if (object != orig_object)
5140 vm_object_unlock(object);
5141 assert(orig_object->ref_count >= orig_object->res_count);
5142 }
5143 #endif /* TASK_SWAPPER */
5144
5145 /*
5146 * vm_object_reference:
5147 *
5148 * Gets another reference to the given object.
5149 */
5150 #ifdef vm_object_reference
5151 #undef vm_object_reference
5152 #endif
5153 __private_extern__ void
5154 vm_object_reference(
5155 register vm_object_t object)
5156 {
5157 if (object == VM_OBJECT_NULL)
5158 return;
5159
5160 vm_object_lock(object);
5161 assert(object->ref_count > 0);
5162 vm_object_reference_locked(object);
5163 vm_object_unlock(object);
5164 }
5165
5166 #ifdef MACH_BSD
5167 /*
5168 * Scale the vm_object_cache
5169 * This is required to make sure that the vm_object_cache is big
5170 * enough to effectively cache the mapped file.
5171 * This is really important with UBC as all the regular file vnodes
5172 * have memory object associated with them. Havving this cache too
5173 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5174 *
5175 * This is also needed as number of vnodes can be dynamically scaled.
5176 */
5177 kern_return_t
5178 adjust_vm_object_cache(
5179 __unused vm_size_t oval,
5180 vm_size_t nval)
5181 {
5182 vm_object_cached_max = nval;
5183 vm_object_cache_trim(FALSE);
5184 return (KERN_SUCCESS);
5185 }
5186 #endif /* MACH_BSD */
5187
5188
5189 /*
5190 * vm_object_transpose
5191 *
5192 * This routine takes two VM objects of the same size and exchanges
5193 * their backing store.
5194 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5195 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5196 *
5197 * The VM objects must not be locked by caller.
5198 */
5199 kern_return_t
5200 vm_object_transpose(
5201 vm_object_t object1,
5202 vm_object_t object2,
5203 vm_object_size_t transpose_size)
5204 {
5205 vm_object_t tmp_object;
5206 kern_return_t retval;
5207 boolean_t object1_locked, object2_locked;
5208 boolean_t object1_paging, object2_paging;
5209 vm_page_t page;
5210 vm_object_offset_t page_offset;
5211
5212 tmp_object = VM_OBJECT_NULL;
5213 object1_locked = FALSE; object2_locked = FALSE;
5214 object1_paging = FALSE; object2_paging = FALSE;
5215
5216 if (object1 == object2 ||
5217 object1 == VM_OBJECT_NULL ||
5218 object2 == VM_OBJECT_NULL) {
5219 /*
5220 * If the 2 VM objects are the same, there's
5221 * no point in exchanging their backing store.
5222 */
5223 retval = KERN_INVALID_VALUE;
5224 goto done;
5225 }
5226
5227 vm_object_lock(object1);
5228 object1_locked = TRUE;
5229 if (object1->copy || object1->shadow || object1->shadowed ||
5230 object1->purgable != VM_OBJECT_NONPURGABLE) {
5231 /*
5232 * We don't deal with copy or shadow objects (yet).
5233 */
5234 retval = KERN_INVALID_VALUE;
5235 goto done;
5236 }
5237 /*
5238 * Since we're about to mess with the object's backing store,
5239 * mark it as "paging_in_progress". Note that this is not enough
5240 * to prevent any paging activity on this object, so the caller should
5241 * have "quiesced" the objects beforehand, via a UPL operation with
5242 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5243 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5244 */
5245 vm_object_paging_begin(object1);
5246 object1_paging = TRUE;
5247 vm_object_unlock(object1);
5248 object1_locked = FALSE;
5249
5250 /*
5251 * Same as above for the 2nd object...
5252 */
5253 vm_object_lock(object2);
5254 object2_locked = TRUE;
5255 if (object2->copy || object2->shadow || object2->shadowed ||
5256 object2->purgable != VM_OBJECT_NONPURGABLE) {
5257 retval = KERN_INVALID_VALUE;
5258 goto done;
5259 }
5260 vm_object_paging_begin(object2);
5261 object2_paging = TRUE;
5262 vm_object_unlock(object2);
5263 object2_locked = FALSE;
5264
5265 /*
5266 * Allocate a temporary VM object to hold object1's contents
5267 * while we copy object2 to object1.
5268 */
5269 tmp_object = vm_object_allocate(transpose_size);
5270 vm_object_lock(tmp_object);
5271 vm_object_paging_begin(tmp_object);
5272 tmp_object->can_persist = FALSE;
5273
5274 /*
5275 * Since we need to lock both objects at the same time,
5276 * make sure we always lock them in the same order to
5277 * avoid deadlocks.
5278 */
5279 if (object1 < object2) {
5280 vm_object_lock(object1);
5281 vm_object_lock(object2);
5282 } else {
5283 vm_object_lock(object2);
5284 vm_object_lock(object1);
5285 }
5286 object1_locked = TRUE;
5287 object2_locked = TRUE;
5288
5289 if (object1->size != object2->size ||
5290 object1->size != transpose_size) {
5291 /*
5292 * If the 2 objects don't have the same size, we can't
5293 * exchange their backing stores or one would overflow.
5294 * If their size doesn't match the caller's
5295 * "transpose_size", we can't do it either because the
5296 * transpose operation will affect the entire span of
5297 * the objects.
5298 */
5299 retval = KERN_INVALID_VALUE;
5300 goto done;
5301 }
5302
5303
5304 /*
5305 * Transpose the lists of resident pages.
5306 */
5307 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
5308 /*
5309 * No pages in object1, just transfer pages
5310 * from object2 to object1. No need to go through
5311 * an intermediate object.
5312 */
5313 while (!queue_empty(&object2->memq)) {
5314 page = (vm_page_t) queue_first(&object2->memq);
5315 vm_page_rename(page, object1, page->offset);
5316 }
5317 assert(queue_empty(&object2->memq));
5318 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
5319 /*
5320 * No pages in object2, just transfer pages
5321 * from object1 to object2. No need to go through
5322 * an intermediate object.
5323 */
5324 while (!queue_empty(&object1->memq)) {
5325 page = (vm_page_t) queue_first(&object1->memq);
5326 vm_page_rename(page, object2, page->offset);
5327 }
5328 assert(queue_empty(&object1->memq));
5329 } else {
5330 /* transfer object1's pages to tmp_object */
5331 vm_page_lock_queues();
5332 while (!queue_empty(&object1->memq)) {
5333 page = (vm_page_t) queue_first(&object1->memq);
5334 page_offset = page->offset;
5335 vm_page_remove(page);
5336 page->offset = page_offset;
5337 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
5338 }
5339 vm_page_unlock_queues();
5340 assert(queue_empty(&object1->memq));
5341 /* transfer object2's pages to object1 */
5342 while (!queue_empty(&object2->memq)) {
5343 page = (vm_page_t) queue_first(&object2->memq);
5344 vm_page_rename(page, object1, page->offset);
5345 }
5346 assert(queue_empty(&object2->memq));
5347 /* transfer tmp_object's pages to object1 */
5348 while (!queue_empty(&tmp_object->memq)) {
5349 page = (vm_page_t) queue_first(&tmp_object->memq);
5350 queue_remove(&tmp_object->memq, page,
5351 vm_page_t, listq);
5352 vm_page_insert(page, object2, page->offset);
5353 }
5354 assert(queue_empty(&tmp_object->memq));
5355 }
5356
5357 /* no need to transpose the size: they should be identical */
5358 assert(object1->size == object2->size);
5359
5360 #define __TRANSPOSE_FIELD(field) \
5361 MACRO_BEGIN \
5362 tmp_object->field = object1->field; \
5363 object1->field = object2->field; \
5364 object2->field = tmp_object->field; \
5365 MACRO_END
5366
5367 assert(!object1->copy);
5368 assert(!object2->copy);
5369
5370 assert(!object1->shadow);
5371 assert(!object2->shadow);
5372
5373 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
5374 __TRANSPOSE_FIELD(pager);
5375 __TRANSPOSE_FIELD(paging_offset);
5376
5377 __TRANSPOSE_FIELD(pager_control);
5378 /* update the memory_objects' pointers back to the VM objects */
5379 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5380 memory_object_control_collapse(object1->pager_control,
5381 object1);
5382 }
5383 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5384 memory_object_control_collapse(object2->pager_control,
5385 object2);
5386 }
5387
5388 __TRANSPOSE_FIELD(absent_count);
5389
5390 assert(object1->paging_in_progress);
5391 assert(object2->paging_in_progress);
5392
5393 __TRANSPOSE_FIELD(pager_created);
5394 __TRANSPOSE_FIELD(pager_initialized);
5395 __TRANSPOSE_FIELD(pager_ready);
5396 __TRANSPOSE_FIELD(pager_trusted);
5397 __TRANSPOSE_FIELD(internal);
5398 __TRANSPOSE_FIELD(temporary);
5399 __TRANSPOSE_FIELD(private);
5400 __TRANSPOSE_FIELD(pageout);
5401 __TRANSPOSE_FIELD(true_share);
5402 __TRANSPOSE_FIELD(phys_contiguous);
5403 __TRANSPOSE_FIELD(nophyscache);
5404 __TRANSPOSE_FIELD(last_alloc);
5405 __TRANSPOSE_FIELD(sequential);
5406 __TRANSPOSE_FIELD(cluster_size);
5407 __TRANSPOSE_FIELD(existence_map);
5408 __TRANSPOSE_FIELD(cow_hint);
5409 __TRANSPOSE_FIELD(wimg_bits);
5410
5411 #undef __TRANSPOSE_FIELD
5412
5413 retval = KERN_SUCCESS;
5414
5415 done:
5416 /*
5417 * Cleanup.
5418 */
5419 if (tmp_object != VM_OBJECT_NULL) {
5420 vm_object_paging_end(tmp_object);
5421 vm_object_unlock(tmp_object);
5422 /*
5423 * Re-initialize the temporary object to avoid
5424 * deallocating a real pager.
5425 */
5426 _vm_object_allocate(transpose_size, tmp_object);
5427 vm_object_deallocate(tmp_object);
5428 tmp_object = VM_OBJECT_NULL;
5429 }
5430
5431 if (object1_locked) {
5432 vm_object_unlock(object1);
5433 object1_locked = FALSE;
5434 }
5435 if (object2_locked) {
5436 vm_object_unlock(object2);
5437 object2_locked = FALSE;
5438 }
5439 if (object1_paging) {
5440 vm_object_lock(object1);
5441 vm_object_paging_end(object1);
5442 vm_object_unlock(object1);
5443 object1_paging = FALSE;
5444 }
5445 if (object2_paging) {
5446 vm_object_lock(object2);
5447 vm_object_paging_end(object2);
5448 vm_object_unlock(object2);
5449 object2_paging = FALSE;
5450 }
5451
5452 return retval;
5453 }