2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
36 * All Rights Reserved.
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
48 * Carnegie Mellon requests users of this software to return to
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
61 * File: vm/vm_object.c
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
64 * Virtual memory object module.
67 #include <mach_pagemap.h>
68 #include <task_swapper.h>
70 #include <mach/mach_types.h>
71 #include <mach/memory_object.h>
72 #include <mach/memory_object_default.h>
73 #include <mach/memory_object_control_server.h>
74 #include <mach/vm_param.h>
76 #include <ipc/ipc_types.h>
77 #include <ipc/ipc_port.h>
79 #include <kern/kern_types.h>
80 #include <kern/assert.h>
81 #include <kern/lock.h>
82 #include <kern/queue.h>
84 #include <kern/zalloc.h>
85 #include <kern/host.h>
86 #include <kern/host_statistics.h>
87 #include <kern/processor.h>
88 #include <kern/misc_protos.h>
90 #include <vm/memory_object.h>
91 #include <vm/vm_fault.h>
92 #include <vm/vm_map.h>
93 #include <vm/vm_object.h>
94 #include <vm/vm_page.h>
95 #include <vm/vm_pageout.h>
96 #include <vm/vm_protos.h>
99 * Virtual memory objects maintain the actual data
100 * associated with allocated virtual memory. A given
101 * page of memory exists within exactly one object.
103 * An object is only deallocated when all "references"
106 * Associated with each object is a list of all resident
107 * memory pages belonging to that object; this list is
108 * maintained by the "vm_page" module, but locked by the object's
111 * Each object also records the memory object reference
112 * that is used by the kernel to request and write
113 * back data (the memory object, field "pager"), etc...
115 * Virtual memory objects are allocated to provide
116 * zero-filled memory (vm_allocate) or map a user-defined
117 * memory object into a virtual address space (vm_map).
119 * Virtual memory objects that refer to a user-defined
120 * memory object are called "permanent", because all changes
121 * made in virtual memory are reflected back to the
122 * memory manager, which may then store it permanently.
123 * Other virtual memory objects are called "temporary",
124 * meaning that changes need be written back only when
125 * necessary to reclaim pages, and that storage associated
126 * with the object can be discarded once it is no longer
129 * A permanent memory object may be mapped into more
130 * than one virtual address space. Moreover, two threads
131 * may attempt to make the first mapping of a memory
132 * object concurrently. Only one thread is allowed to
133 * complete this mapping; all others wait for the
134 * "pager_initialized" field is asserted, indicating
135 * that the first thread has initialized all of the
136 * necessary fields in the virtual memory object structure.
138 * The kernel relies on a *default memory manager* to
139 * provide backing storage for the zero-filled virtual
140 * memory objects. The pager memory objects associated
141 * with these temporary virtual memory objects are only
142 * requested from the default memory manager when it
143 * becomes necessary. Virtual memory objects
144 * that depend on the default memory manager are called
145 * "internal". The "pager_created" field is provided to
146 * indicate whether these ports have ever been allocated.
148 * The kernel may also create virtual memory objects to
149 * hold changed pages after a copy-on-write operation.
150 * In this case, the virtual memory object (and its
151 * backing storage -- its memory object) only contain
152 * those pages that have been changed. The "shadow"
153 * field refers to the virtual memory object that contains
154 * the remainder of the contents. The "shadow_offset"
155 * field indicates where in the "shadow" these contents begin.
156 * The "copy" field refers to a virtual memory object
157 * to which changed pages must be copied before changing
158 * this object, in order to implement another form
159 * of copy-on-write optimization.
161 * The virtual memory object structure also records
162 * the attributes associated with its memory object.
163 * The "pager_ready", "can_persist" and "copy_strategy"
164 * fields represent those attributes. The "cached_list"
165 * field is used in the implementation of the persistence
168 * ZZZ Continue this comment.
171 /* Forward declarations for internal functions. */
172 static kern_return_t
vm_object_terminate(
175 extern void vm_object_remove(
178 static vm_object_t
vm_object_cache_trim(
179 boolean_t called_from_vm_object_deallocate
);
181 static void vm_object_deactivate_all_pages(
184 static kern_return_t
vm_object_copy_call(
185 vm_object_t src_object
,
186 vm_object_offset_t src_offset
,
187 vm_object_size_t size
,
188 vm_object_t
*_result_object
);
190 static void vm_object_do_collapse(
192 vm_object_t backing_object
);
194 static void vm_object_do_bypass(
196 vm_object_t backing_object
);
198 static void vm_object_release_pager(
199 memory_object_t pager
);
201 static zone_t vm_object_zone
; /* vm backing store zone */
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
207 static struct vm_object kernel_object_store
;
208 vm_object_t kernel_object
;
211 * The submap object is used as a placeholder for vm_map_submap
212 * operations. The object is declared in vm_map.c because it
213 * is exported by the vm_map module. The storage is declared
214 * here because it must be initialized here.
216 static struct vm_object vm_submap_object_store
;
219 * Virtual memory objects are initialized from
220 * a template (see vm_object_allocate).
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see _vm_object_allocate()).
226 static struct vm_object vm_object_template
;
229 * Virtual memory objects that are not referenced by
230 * any address maps, but that are allowed to persist
231 * (an attribute specified by the associated memory manager),
232 * are kept in a queue (vm_object_cached_list).
234 * When an object from this queue is referenced again,
235 * for example to make another address space mapping,
236 * it must be removed from the queue. That is, the
237 * queue contains *only* objects with zero references.
239 * The kernel may choose to terminate objects from this
240 * queue in order to reclaim storage. The current policy
241 * is to permit a fixed maximum number of unreferenced
242 * objects (vm_object_cached_max).
244 * A spin lock (accessed by routines
245 * vm_object_cache_{lock,lock_try,unlock}) governs the
246 * object cache. It must be held when objects are
247 * added to or removed from the cache (in vm_object_terminate).
248 * The routines that acquire a reference to a virtual
249 * memory object based on one of the memory object ports
250 * must also lock the cache.
252 * Ideally, the object cache should be more isolated
253 * from the reference mechanism, so that the lock need
254 * not be held to make simple references.
256 static queue_head_t vm_object_cached_list
;
257 static int vm_object_cached_count
=0;
258 static int vm_object_cached_high
; /* highest # cached objects */
259 static int vm_object_cached_max
= 512; /* may be patched*/
261 static decl_mutex_data(,vm_object_cached_lock_data
)
263 #define vm_object_cache_lock() \
264 mutex_lock(&vm_object_cached_lock_data)
265 #define vm_object_cache_lock_try() \
266 mutex_try(&vm_object_cached_lock_data)
267 #define vm_object_cache_unlock() \
268 mutex_unlock(&vm_object_cached_lock_data)
270 #define VM_OBJECT_HASH_COUNT 1024
271 static queue_head_t vm_object_hashtable
[VM_OBJECT_HASH_COUNT
];
272 static struct zone
*vm_object_hash_zone
;
274 struct vm_object_hash_entry
{
275 queue_chain_t hash_link
; /* hash chain link */
276 memory_object_t pager
; /* pager we represent */
277 vm_object_t object
; /* corresponding object */
278 boolean_t waiting
; /* someone waiting for
282 typedef struct vm_object_hash_entry
*vm_object_hash_entry_t
;
283 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
285 #define VM_OBJECT_HASH_SHIFT 8
286 #define vm_object_hash(pager) \
287 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
289 void vm_object_hash_entry_free(
290 vm_object_hash_entry_t entry
);
293 * vm_object_hash_lookup looks up a pager in the hashtable
294 * and returns the corresponding entry, with optional removal.
297 static vm_object_hash_entry_t
298 vm_object_hash_lookup(
299 memory_object_t pager
,
300 boolean_t remove_entry
)
302 register queue_t bucket
;
303 register vm_object_hash_entry_t entry
;
305 bucket
= &vm_object_hashtable
[vm_object_hash(pager
)];
307 entry
= (vm_object_hash_entry_t
)queue_first(bucket
);
308 while (!queue_end(bucket
, (queue_entry_t
)entry
)) {
309 if (entry
->pager
== pager
&& !remove_entry
)
311 else if (entry
->pager
== pager
) {
312 queue_remove(bucket
, entry
,
313 vm_object_hash_entry_t
, hash_link
);
317 entry
= (vm_object_hash_entry_t
)queue_next(&entry
->hash_link
);
320 return(VM_OBJECT_HASH_ENTRY_NULL
);
324 * vm_object_hash_enter enters the specified
325 * pager / cache object association in the hashtable.
329 vm_object_hash_insert(
330 vm_object_hash_entry_t entry
)
332 register queue_t bucket
;
334 bucket
= &vm_object_hashtable
[vm_object_hash(entry
->pager
)];
336 queue_enter(bucket
, entry
, vm_object_hash_entry_t
, hash_link
);
339 static vm_object_hash_entry_t
340 vm_object_hash_entry_alloc(
341 memory_object_t pager
)
343 vm_object_hash_entry_t entry
;
345 entry
= (vm_object_hash_entry_t
)zalloc(vm_object_hash_zone
);
346 entry
->pager
= pager
;
347 entry
->object
= VM_OBJECT_NULL
;
348 entry
->waiting
= FALSE
;
354 vm_object_hash_entry_free(
355 vm_object_hash_entry_t entry
)
357 zfree(vm_object_hash_zone
, entry
);
361 * vm_object_allocate:
363 * Returns a new object with the given size.
366 __private_extern__
void
368 vm_object_size_t size
,
372 "vm_object_allocate, object 0x%X size 0x%X\n",
373 (integer_t
)object
, size
, 0,0,0);
375 *object
= vm_object_template
;
376 queue_init(&object
->memq
);
377 queue_init(&object
->msr_q
);
379 queue_init(&object
->uplq
);
380 #endif /* UPL_DEBUG */
381 vm_object_lock_init(object
);
385 __private_extern__ vm_object_t
387 vm_object_size_t size
)
389 register vm_object_t object
;
391 object
= (vm_object_t
) zalloc(vm_object_zone
);
393 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
395 if (object
!= VM_OBJECT_NULL
)
396 _vm_object_allocate(size
, object
);
402 * vm_object_bootstrap:
404 * Initialize the VM objects module.
406 __private_extern__
void
407 vm_object_bootstrap(void)
411 vm_object_zone
= zinit((vm_size_t
) sizeof(struct vm_object
),
412 round_page_32(512*1024),
413 round_page_32(12*1024),
416 queue_init(&vm_object_cached_list
);
417 mutex_init(&vm_object_cached_lock_data
, 0);
419 vm_object_hash_zone
=
420 zinit((vm_size_t
) sizeof (struct vm_object_hash_entry
),
421 round_page_32(512*1024),
422 round_page_32(12*1024),
423 "vm object hash entries");
425 for (i
= 0; i
< VM_OBJECT_HASH_COUNT
; i
++)
426 queue_init(&vm_object_hashtable
[i
]);
429 * Fill in a template object, for quick initialization
432 /* memq; Lock; init after allocation */
433 vm_object_template
.size
= 0;
434 vm_object_template
.memq_hint
= VM_PAGE_NULL
;
435 vm_object_template
.ref_count
= 1;
437 vm_object_template
.res_count
= 1;
438 #endif /* TASK_SWAPPER */
439 vm_object_template
.resident_page_count
= 0;
440 vm_object_template
.copy
= VM_OBJECT_NULL
;
441 vm_object_template
.shadow
= VM_OBJECT_NULL
;
442 vm_object_template
.shadow_offset
= (vm_object_offset_t
) 0;
443 vm_object_template
.cow_hint
= ~(vm_offset_t
)0;
444 vm_object_template
.true_share
= FALSE
;
446 vm_object_template
.pager
= MEMORY_OBJECT_NULL
;
447 vm_object_template
.paging_offset
= 0;
448 vm_object_template
.pager_control
= MEMORY_OBJECT_CONTROL_NULL
;
449 /* msr_q; init after allocation */
451 vm_object_template
.copy_strategy
= MEMORY_OBJECT_COPY_SYMMETRIC
;
452 vm_object_template
.absent_count
= 0;
453 vm_object_template
.paging_in_progress
= 0;
455 /* Begin bitfields */
456 vm_object_template
.all_wanted
= 0; /* all bits FALSE */
457 vm_object_template
.pager_created
= FALSE
;
458 vm_object_template
.pager_initialized
= FALSE
;
459 vm_object_template
.pager_ready
= FALSE
;
460 vm_object_template
.pager_trusted
= FALSE
;
461 vm_object_template
.can_persist
= FALSE
;
462 vm_object_template
.internal
= TRUE
;
463 vm_object_template
.temporary
= TRUE
;
464 vm_object_template
.private = FALSE
;
465 vm_object_template
.pageout
= FALSE
;
466 vm_object_template
.alive
= TRUE
;
467 vm_object_template
.purgable
= VM_OBJECT_NONPURGABLE
;
468 vm_object_template
.silent_overwrite
= FALSE
;
469 vm_object_template
.advisory_pageout
= FALSE
;
470 vm_object_template
.shadowed
= FALSE
;
471 vm_object_template
.terminating
= FALSE
;
472 vm_object_template
.shadow_severed
= FALSE
;
473 vm_object_template
.phys_contiguous
= FALSE
;
474 vm_object_template
.nophyscache
= FALSE
;
477 /* cache bitfields */
478 vm_object_template
.wimg_bits
= VM_WIMG_DEFAULT
;
480 /* cached_list; init after allocation */
481 vm_object_template
.last_alloc
= (vm_object_offset_t
) 0;
482 vm_object_template
.cluster_size
= 0;
484 vm_object_template
.existence_map
= VM_EXTERNAL_NULL
;
485 #endif /* MACH_PAGEMAP */
487 vm_object_template
.paging_object
= VM_OBJECT_NULL
;
488 #endif /* MACH_ASSERT */
491 * Initialize the "kernel object"
494 kernel_object
= &kernel_object_store
;
497 * Note that in the following size specifications, we need to add 1 because
498 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
502 _vm_object_allocate((vm_last_addr
- VM_MIN_KERNEL_ADDRESS
) + 1,
505 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS
- VM_MIN_KERNEL_ADDRESS
) + 1,
508 kernel_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
511 * Initialize the "submap object". Make it as large as the
512 * kernel object so that no limit is imposed on submap sizes.
515 vm_submap_object
= &vm_submap_object_store
;
517 _vm_object_allocate((vm_last_addr
- VM_MIN_KERNEL_ADDRESS
) + 1,
520 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS
- VM_MIN_KERNEL_ADDRESS
) + 1,
523 vm_submap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
526 * Create an "extra" reference to this object so that we never
527 * try to deallocate it; zfree doesn't like to be called with
530 vm_object_reference(vm_submap_object
);
533 vm_external_module_initialize();
534 #endif /* MACH_PAGEMAP */
537 __private_extern__
void
541 * Finish initializing the kernel object.
545 /* remove the typedef below when emergency work-around is taken out */
546 typedef struct vnode_pager
{
547 memory_object_t pager
;
548 memory_object_t pager_handle
; /* pager */
549 memory_object_control_t control_handle
; /* memory object's control handle */
550 void *vnode_handle
; /* vnode handle */
553 #define MIGHT_NOT_CACHE_SHADOWS 1
554 #if MIGHT_NOT_CACHE_SHADOWS
555 static int cache_shadows
= TRUE
;
556 #endif /* MIGHT_NOT_CACHE_SHADOWS */
559 * vm_object_deallocate:
561 * Release a reference to the specified object,
562 * gained either through a vm_object_allocate
563 * or a vm_object_reference call. When all references
564 * are gone, storage associated with this object
565 * may be relinquished.
567 * No object may be locked.
569 __private_extern__
void
570 vm_object_deallocate(
571 register vm_object_t object
)
573 boolean_t retry_cache_trim
= FALSE
;
574 vm_object_t shadow
= VM_OBJECT_NULL
;
576 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
577 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
580 while (object
!= VM_OBJECT_NULL
) {
583 * The cache holds a reference (uncounted) to
584 * the object; we must lock it before removing
588 vm_object_cache_lock();
591 * if we try to take a regular lock here
592 * we risk deadlocking against someone
593 * holding a lock on this object while
594 * trying to vm_object_deallocate a different
597 if (vm_object_lock_try(object
))
599 vm_object_cache_unlock();
600 mutex_pause(); /* wait a bit */
602 assert(object
->ref_count
> 0);
605 * If the object has a named reference, and only
606 * that reference would remain, inform the pager
607 * about the last "mapping" reference going away.
609 if ((object
->ref_count
== 2) && (object
->named
)) {
610 memory_object_t pager
= object
->pager
;
612 /* Notify the Pager that there are no */
613 /* more mappers for this object */
615 if (pager
!= MEMORY_OBJECT_NULL
) {
616 vm_object_unlock(object
);
617 vm_object_cache_unlock();
619 memory_object_unmap(pager
);
622 vm_object_cache_lock();
625 * if we try to take a regular lock here
626 * we risk deadlocking against someone
627 * holding a lock on this object while
628 * trying to vm_object_deallocate a different
631 if (vm_object_lock_try(object
))
633 vm_object_cache_unlock();
634 mutex_pause(); /* wait a bit */
636 assert(object
->ref_count
> 0);
641 * Lose the reference. If other references
642 * remain, then we are done, unless we need
643 * to retry a cache trim.
644 * If it is the last reference, then keep it
645 * until any pending initialization is completed.
648 /* if the object is terminating, it cannot go into */
649 /* the cache and we obviously should not call */
650 /* terminate again. */
652 if ((object
->ref_count
> 1) || object
->terminating
) {
654 vm_object_res_deallocate(object
);
655 vm_object_cache_unlock();
657 if (object
->ref_count
== 1 &&
658 object
->shadow
!= VM_OBJECT_NULL
) {
660 * There's only one reference left on this
661 * VM object. We can't tell if it's a valid
662 * one (from a mapping for example) or if this
663 * object is just part of a possibly stale and
664 * useless shadow chain.
665 * We would like to try and collapse it into
666 * its parent, but we don't have any pointers
667 * back to this parent object.
668 * But we can try and collapse this object with
669 * its own shadows, in case these are useless
671 * We can't bypass this object though, since we
672 * don't know if this last reference on it is
675 vm_object_collapse(object
, 0, FALSE
);
678 vm_object_unlock(object
);
679 if (retry_cache_trim
&&
680 ((object
= vm_object_cache_trim(TRUE
)) !=
688 * We have to wait for initialization
689 * before destroying or caching the object.
692 if (object
->pager_created
&& ! object
->pager_initialized
) {
693 assert(! object
->can_persist
);
694 vm_object_assert_wait(object
,
695 VM_OBJECT_EVENT_INITIALIZED
,
697 vm_object_unlock(object
);
698 vm_object_cache_unlock();
699 thread_block(THREAD_CONTINUE_NULL
);
704 * If this object can persist, then enter it in
705 * the cache. Otherwise, terminate it.
707 * NOTE: Only permanent objects are cached, and
708 * permanent objects cannot have shadows. This
709 * affects the residence counting logic in a minor
710 * way (can do it in-line, mostly).
713 if ((object
->can_persist
) && (object
->alive
)) {
715 * Now it is safe to decrement reference count,
716 * and to return if reference count is > 0.
718 if (--object
->ref_count
> 0) {
719 vm_object_res_deallocate(object
);
720 vm_object_unlock(object
);
721 vm_object_cache_unlock();
722 if (retry_cache_trim
&&
723 ((object
= vm_object_cache_trim(TRUE
)) !=
730 #if MIGHT_NOT_CACHE_SHADOWS
732 * Remove shadow now if we don't
733 * want to cache shadows.
735 if (! cache_shadows
) {
736 shadow
= object
->shadow
;
737 object
->shadow
= VM_OBJECT_NULL
;
739 #endif /* MIGHT_NOT_CACHE_SHADOWS */
742 * Enter the object onto the queue of
743 * cached objects, and deactivate
746 assert(object
->shadow
== VM_OBJECT_NULL
);
747 VM_OBJ_RES_DECR(object
);
749 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
751 (integer_t
)vm_object_cached_list
.next
,
752 (integer_t
)vm_object_cached_list
.prev
,0,0);
754 vm_object_cached_count
++;
755 if (vm_object_cached_count
> vm_object_cached_high
)
756 vm_object_cached_high
= vm_object_cached_count
;
757 queue_enter(&vm_object_cached_list
, object
,
758 vm_object_t
, cached_list
);
759 vm_object_cache_unlock();
760 vm_object_deactivate_all_pages(object
);
761 vm_object_unlock(object
);
763 #if MIGHT_NOT_CACHE_SHADOWS
765 * If we have a shadow that we need
766 * to deallocate, do so now, remembering
767 * to trim the cache later.
769 if (! cache_shadows
&& shadow
!= VM_OBJECT_NULL
) {
771 retry_cache_trim
= TRUE
;
774 #endif /* MIGHT_NOT_CACHE_SHADOWS */
777 * Trim the cache. If the cache trim
778 * returns with a shadow for us to deallocate,
779 * then remember to retry the cache trim
780 * when we are done deallocating the shadow.
781 * Otherwise, we are done.
784 object
= vm_object_cache_trim(TRUE
);
785 if (object
== VM_OBJECT_NULL
) {
788 retry_cache_trim
= TRUE
;
792 * This object is not cachable; terminate it.
795 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
796 (integer_t
)object
, object
->resident_page_count
,
797 object
->paging_in_progress
,
798 (void *)current_thread(),object
->ref_count
);
800 VM_OBJ_RES_DECR(object
); /* XXX ? */
802 * Terminate this object. If it had a shadow,
803 * then deallocate it; otherwise, if we need
804 * to retry a cache trim, do so now; otherwise,
805 * we are done. "pageout" objects have a shadow,
806 * but maintain a "paging reference" rather than
807 * a normal reference.
809 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
810 if(vm_object_terminate(object
) != KERN_SUCCESS
) {
813 if (shadow
!= VM_OBJECT_NULL
) {
817 if (retry_cache_trim
&&
818 ((object
= vm_object_cache_trim(TRUE
)) !=
825 assert(! retry_cache_trim
);
829 * Check to see whether we really need to trim
830 * down the cache. If so, remove an object from
831 * the cache, terminate it, and repeat.
833 * Called with, and returns with, cache lock unlocked.
836 vm_object_cache_trim(
837 boolean_t called_from_vm_object_deallocate
)
839 register vm_object_t object
= VM_OBJECT_NULL
;
845 * If we no longer need to trim the cache,
849 vm_object_cache_lock();
850 if (vm_object_cached_count
<= vm_object_cached_max
) {
851 vm_object_cache_unlock();
852 return VM_OBJECT_NULL
;
856 * We must trim down the cache, so remove
857 * the first object in the cache.
860 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
861 (integer_t
)vm_object_cached_list
.next
,
862 (integer_t
)vm_object_cached_list
.prev
, 0, 0, 0);
864 object
= (vm_object_t
) queue_first(&vm_object_cached_list
);
865 if(object
== (vm_object_t
) &vm_object_cached_list
) {
866 /* something's wrong with the calling parameter or */
867 /* the value of vm_object_cached_count, just fix */
869 if(vm_object_cached_max
< 0)
870 vm_object_cached_max
= 0;
871 vm_object_cached_count
= 0;
872 vm_object_cache_unlock();
873 return VM_OBJECT_NULL
;
875 vm_object_lock(object
);
876 queue_remove(&vm_object_cached_list
, object
, vm_object_t
,
878 vm_object_cached_count
--;
881 * Since this object is in the cache, we know
882 * that it is initialized and has no references.
883 * Take a reference to avoid recursive deallocations.
886 assert(object
->pager_initialized
);
887 assert(object
->ref_count
== 0);
891 * Terminate the object.
892 * If the object had a shadow, we let vm_object_deallocate
893 * deallocate it. "pageout" objects have a shadow, but
894 * maintain a "paging reference" rather than a normal
896 * (We are careful here to limit recursion.)
898 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
899 if(vm_object_terminate(object
) != KERN_SUCCESS
)
901 if (shadow
!= VM_OBJECT_NULL
) {
902 if (called_from_vm_object_deallocate
) {
905 vm_object_deallocate(shadow
);
911 boolean_t vm_object_terminate_remove_all
= FALSE
;
914 * Routine: vm_object_terminate
916 * Free all resources associated with a vm_object.
918 * Upon entry, the object must be locked,
919 * and the object must have exactly one reference.
921 * The shadow object reference is left alone.
923 * The object must be unlocked if its found that pages
924 * must be flushed to a backing object. If someone
925 * manages to map the object while it is being flushed
926 * the object is returned unlocked and unchanged. Otherwise,
927 * upon exit, the cache will be unlocked, and the
928 * object will cease to exist.
932 register vm_object_t object
)
934 memory_object_t pager
;
935 register vm_page_t p
;
936 vm_object_t shadow_object
;
938 XPR(XPR_VM_OBJECT
, "vm_object_terminate, object 0x%X ref %d\n",
939 (integer_t
)object
, object
->ref_count
, 0, 0, 0);
941 if (!object
->pageout
&& (!object
->temporary
|| object
->can_persist
)
942 && (object
->pager
!= NULL
|| object
->shadow_severed
)) {
943 vm_object_cache_unlock();
944 while (!queue_empty(&object
->memq
)) {
946 * Clear pager_trusted bit so that the pages get yanked
947 * out of the object instead of cleaned in place. This
948 * prevents a deadlock in XMM and makes more sense anyway.
950 object
->pager_trusted
= FALSE
;
952 p
= (vm_page_t
) queue_first(&object
->memq
);
956 if (p
->busy
|| p
->cleaning
) {
957 if(p
->cleaning
|| p
->absent
) {
958 vm_object_paging_wait(object
, THREAD_UNINT
);
961 panic("vm_object_terminate.3 0x%x 0x%x", object
, p
);
965 vm_page_lock_queues();
967 VM_PAGE_QUEUES_REMOVE(p
);
968 vm_page_unlock_queues();
970 if (p
->absent
|| p
->private) {
973 * For private pages, VM_PAGE_FREE just
974 * leaves the page structure around for
975 * its owner to clean up. For absent
976 * pages, the structure is returned to
977 * the appropriate pool.
984 panic("vm_object_terminate.4 0x%x 0x%x", object
, p
);
987 p
->dirty
= pmap_is_modified(p
->phys_page
);
989 if ((p
->dirty
|| p
->precious
) && !p
->error
&& object
->alive
) {
990 vm_pageout_cluster(p
); /* flush page */
991 vm_object_paging_wait(object
, THREAD_UNINT
);
993 "vm_object_terminate restart, object 0x%X ref %d\n",
994 (integer_t
)object
, object
->ref_count
, 0, 0, 0);
1000 vm_object_unlock(object
);
1001 vm_object_cache_lock();
1002 vm_object_lock(object
);
1006 * Make sure the object isn't already being terminated
1008 if(object
->terminating
) {
1009 object
->ref_count
-= 1;
1010 assert(object
->ref_count
> 0);
1011 vm_object_cache_unlock();
1012 vm_object_unlock(object
);
1013 return KERN_FAILURE
;
1017 * Did somebody get a reference to the object while we were
1020 if(object
->ref_count
!= 1) {
1021 object
->ref_count
-= 1;
1022 assert(object
->ref_count
> 0);
1023 vm_object_res_deallocate(object
);
1024 vm_object_cache_unlock();
1025 vm_object_unlock(object
);
1026 return KERN_FAILURE
;
1030 * Make sure no one can look us up now.
1033 object
->terminating
= TRUE
;
1034 object
->alive
= FALSE
;
1035 vm_object_remove(object
);
1038 * Detach the object from its shadow if we are the shadow's
1039 * copy. The reference we hold on the shadow must be dropped
1042 if (((shadow_object
= object
->shadow
) != VM_OBJECT_NULL
) &&
1043 !(object
->pageout
)) {
1044 vm_object_lock(shadow_object
);
1045 if (shadow_object
->copy
== object
)
1046 shadow_object
->copy
= VM_OBJECT_NULL
;
1047 vm_object_unlock(shadow_object
);
1051 * The pageout daemon might be playing with our pages.
1052 * Now that the object is dead, it won't touch any more
1053 * pages, but some pages might already be on their way out.
1054 * Hence, we wait until the active paging activities have ceased
1055 * before we break the association with the pager itself.
1057 while (object
->paging_in_progress
!= 0) {
1058 vm_object_cache_unlock();
1059 vm_object_wait(object
,
1060 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
1062 vm_object_cache_lock();
1063 vm_object_lock(object
);
1066 pager
= object
->pager
;
1067 object
->pager
= MEMORY_OBJECT_NULL
;
1069 if (pager
!= MEMORY_OBJECT_NULL
)
1070 memory_object_control_disable(object
->pager_control
);
1071 vm_object_cache_unlock();
1073 object
->ref_count
--;
1075 assert(object
->res_count
== 0);
1076 #endif /* TASK_SWAPPER */
1078 assert (object
->ref_count
== 0);
1081 * Clean or free the pages, as appropriate.
1082 * It is possible for us to find busy/absent pages,
1083 * if some faults on this object were aborted.
1085 if (object
->pageout
) {
1086 assert(shadow_object
!= VM_OBJECT_NULL
);
1087 assert(shadow_object
== object
->shadow
);
1089 vm_pageout_object_terminate(object
);
1091 } else if ((object
->temporary
&& !object
->can_persist
) ||
1092 (pager
== MEMORY_OBJECT_NULL
)) {
1093 while (!queue_empty(&object
->memq
)) {
1094 p
= (vm_page_t
) queue_first(&object
->memq
);
1099 } else if (!queue_empty(&object
->memq
)) {
1100 panic("vm_object_terminate: queue just emptied isn't");
1103 assert(object
->paging_in_progress
== 0);
1104 assert(object
->ref_count
== 0);
1107 * If the pager has not already been released by
1108 * vm_object_destroy, we need to terminate it and
1109 * release our reference to it here.
1111 if (pager
!= MEMORY_OBJECT_NULL
) {
1112 vm_object_unlock(object
);
1113 vm_object_release_pager(pager
);
1114 vm_object_lock(object
);
1117 /* kick off anyone waiting on terminating */
1118 object
->terminating
= FALSE
;
1119 vm_object_paging_begin(object
);
1120 vm_object_paging_end(object
);
1121 vm_object_unlock(object
);
1124 vm_external_destroy(object
->existence_map
, object
->size
);
1125 #endif /* MACH_PAGEMAP */
1128 * Free the space for the object.
1130 zfree(vm_object_zone
, object
);
1131 return KERN_SUCCESS
;
1135 * Routine: vm_object_pager_wakeup
1136 * Purpose: Wake up anyone waiting for termination of a pager.
1140 vm_object_pager_wakeup(
1141 memory_object_t pager
)
1143 vm_object_hash_entry_t entry
;
1144 boolean_t waiting
= FALSE
;
1147 * If anyone was waiting for the memory_object_terminate
1148 * to be queued, wake them up now.
1150 vm_object_cache_lock();
1151 entry
= vm_object_hash_lookup(pager
, TRUE
);
1152 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
1153 waiting
= entry
->waiting
;
1154 vm_object_cache_unlock();
1155 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
) {
1157 thread_wakeup((event_t
) pager
);
1158 vm_object_hash_entry_free(entry
);
1163 * Routine: vm_object_release_pager
1164 * Purpose: Terminate the pager and, upon completion,
1165 * release our last reference to it.
1166 * just like memory_object_terminate, except
1167 * that we wake up anyone blocked in vm_object_enter
1168 * waiting for termination message to be queued
1169 * before calling memory_object_init.
1172 vm_object_release_pager(
1173 memory_object_t pager
)
1177 * Terminate the pager.
1180 (void) memory_object_terminate(pager
);
1183 * Wakeup anyone waiting for this terminate
1185 vm_object_pager_wakeup(pager
);
1188 * Release reference to pager.
1190 memory_object_deallocate(pager
);
1194 * Routine: vm_object_destroy
1196 * Shut down a VM object, despite the
1197 * presence of address map (or other) references
1203 __unused kern_return_t reason
)
1205 memory_object_t old_pager
;
1207 if (object
== VM_OBJECT_NULL
)
1208 return(KERN_SUCCESS
);
1211 * Remove the pager association immediately.
1213 * This will prevent the memory manager from further
1214 * meddling. [If it wanted to flush data or make
1215 * other changes, it should have done so before performing
1216 * the destroy call.]
1219 vm_object_cache_lock();
1220 vm_object_lock(object
);
1221 object
->can_persist
= FALSE
;
1222 object
->named
= FALSE
;
1223 object
->alive
= FALSE
;
1226 * Rip out the pager from the vm_object now...
1229 vm_object_remove(object
);
1230 old_pager
= object
->pager
;
1231 object
->pager
= MEMORY_OBJECT_NULL
;
1232 if (old_pager
!= MEMORY_OBJECT_NULL
)
1233 memory_object_control_disable(object
->pager_control
);
1234 vm_object_cache_unlock();
1237 * Wait for the existing paging activity (that got
1238 * through before we nulled out the pager) to subside.
1241 vm_object_paging_wait(object
, THREAD_UNINT
);
1242 vm_object_unlock(object
);
1245 * Terminate the object now.
1247 if (old_pager
!= MEMORY_OBJECT_NULL
) {
1248 vm_object_release_pager(old_pager
);
1251 * JMM - Release the caller's reference. This assumes the
1252 * caller had a reference to release, which is a big (but
1253 * currently valid) assumption if this is driven from the
1254 * vnode pager (it is holding a named reference when making
1257 vm_object_deallocate(object
);
1260 return(KERN_SUCCESS
);
1264 * vm_object_deactivate_pages
1266 * Deactivate all pages in the specified object. (Keep its pages
1267 * in memory even though it is no longer referenced.)
1269 * The object must be locked.
1272 vm_object_deactivate_all_pages(
1273 register vm_object_t object
)
1275 register vm_page_t p
;
1277 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1278 vm_page_lock_queues();
1280 vm_page_deactivate(p
);
1281 vm_page_unlock_queues();
1285 __private_extern__
void
1286 vm_object_deactivate_pages(
1288 vm_object_offset_t offset
,
1289 vm_object_size_t size
,
1290 boolean_t kill_page
)
1292 vm_object_t orig_object
;
1293 int pages_moved
= 0;
1294 int pages_found
= 0;
1297 * entered with object lock held, acquire a paging reference to
1298 * prevent the memory_object and control ports from
1301 orig_object
= object
;
1304 register vm_page_t m
;
1305 vm_object_offset_t toffset
;
1306 vm_object_size_t tsize
;
1308 vm_object_paging_begin(object
);
1309 vm_page_lock_queues();
1311 for (tsize
= size
, toffset
= offset
; tsize
; tsize
-= PAGE_SIZE
, toffset
+= PAGE_SIZE
) {
1313 if ((m
= vm_page_lookup(object
, toffset
)) != VM_PAGE_NULL
) {
1317 if ((m
->wire_count
== 0) && (!m
->private) && (!m
->gobbled
) && (!m
->busy
)) {
1319 assert(!m
->laundry
);
1321 m
->reference
= FALSE
;
1322 pmap_clear_reference(m
->phys_page
);
1324 if ((kill_page
) && (object
->internal
)) {
1325 m
->precious
= FALSE
;
1327 pmap_clear_modify(m
->phys_page
);
1328 vm_external_state_clr(object
->existence_map
, offset
);
1330 VM_PAGE_QUEUES_REMOVE(m
);
1332 assert(!m
->laundry
);
1333 assert(m
->object
!= kernel_object
);
1334 assert(m
->pageq
.next
== NULL
&&
1335 m
->pageq
.prev
== NULL
);
1339 m
, vm_page_t
, pageq
);
1342 &vm_page_queue_inactive
,
1343 m
, vm_page_t
, pageq
);
1348 vm_page_inactive_count
++;
1354 vm_page_unlock_queues();
1355 vm_object_paging_end(object
);
1357 if (object
->shadow
) {
1358 vm_object_t tmp_object
;
1362 offset
+= object
->shadow_offset
;
1364 tmp_object
= object
->shadow
;
1365 vm_object_lock(tmp_object
);
1367 if (object
!= orig_object
)
1368 vm_object_unlock(object
);
1369 object
= tmp_object
;
1373 if (object
!= orig_object
)
1374 vm_object_unlock(object
);
1378 * Routine: vm_object_pmap_protect
1381 * Reduces the permission for all physical
1382 * pages in the specified object range.
1384 * If removing write permission only, it is
1385 * sufficient to protect only the pages in
1386 * the top-level object; only those pages may
1387 * have write permission.
1389 * If removing all access, we must follow the
1390 * shadow chain from the top-level object to
1391 * remove access to all pages in shadowed objects.
1393 * The object must *not* be locked. The object must
1394 * be temporary/internal.
1396 * If pmap is not NULL, this routine assumes that
1397 * the only mappings for the pages are in that
1401 __private_extern__
void
1402 vm_object_pmap_protect(
1403 register vm_object_t object
,
1404 register vm_object_offset_t offset
,
1405 vm_object_size_t size
,
1407 vm_map_offset_t pmap_start
,
1410 if (object
== VM_OBJECT_NULL
)
1412 size
= vm_object_round_page(size
);
1413 offset
= vm_object_trunc_page(offset
);
1415 vm_object_lock(object
);
1417 assert(object
->internal
);
1420 if (ptoa_64(object
->resident_page_count
) > size
/2 && pmap
!= PMAP_NULL
) {
1421 vm_object_unlock(object
);
1422 pmap_protect(pmap
, pmap_start
, pmap_start
+ size
, prot
);
1426 /* if we are doing large ranges with respect to resident */
1427 /* page count then we should interate over pages otherwise */
1428 /* inverse page look-up will be faster */
1429 if (ptoa_64(object
->resident_page_count
/ 4) < size
) {
1431 vm_object_offset_t end
;
1433 end
= offset
+ size
;
1435 if (pmap
!= PMAP_NULL
) {
1436 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1437 if (!p
->fictitious
&&
1438 (offset
<= p
->offset
) && (p
->offset
< end
)) {
1439 vm_map_offset_t start
;
1441 start
= pmap_start
+ p
->offset
- offset
;
1442 pmap_protect(pmap
, start
, start
+ PAGE_SIZE_64
, prot
);
1446 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1447 if (!p
->fictitious
&&
1448 (offset
<= p
->offset
) && (p
->offset
< end
)) {
1450 pmap_page_protect(p
->phys_page
,
1451 prot
& ~p
->page_lock
);
1457 vm_object_offset_t end
;
1458 vm_object_offset_t target_off
;
1460 end
= offset
+ size
;
1462 if (pmap
!= PMAP_NULL
) {
1463 for(target_off
= offset
;
1465 target_off
+= PAGE_SIZE
) {
1466 p
= vm_page_lookup(object
, target_off
);
1467 if (p
!= VM_PAGE_NULL
) {
1469 start
= pmap_start
+
1470 (vm_offset_t
)(p
->offset
- offset
);
1471 pmap_protect(pmap
, start
,
1472 start
+ PAGE_SIZE
, prot
);
1476 for(target_off
= offset
;
1477 target_off
< end
; target_off
+= PAGE_SIZE
) {
1478 p
= vm_page_lookup(object
, target_off
);
1479 if (p
!= VM_PAGE_NULL
) {
1480 pmap_page_protect(p
->phys_page
,
1481 prot
& ~p
->page_lock
);
1487 if (prot
== VM_PROT_NONE
) {
1489 * Must follow shadow chain to remove access
1490 * to pages in shadowed objects.
1492 register vm_object_t next_object
;
1494 next_object
= object
->shadow
;
1495 if (next_object
!= VM_OBJECT_NULL
) {
1496 offset
+= object
->shadow_offset
;
1497 vm_object_lock(next_object
);
1498 vm_object_unlock(object
);
1499 object
= next_object
;
1503 * End of chain - we are done.
1510 * Pages in shadowed objects may never have
1511 * write permission - we may stop here.
1517 vm_object_unlock(object
);
1521 * Routine: vm_object_copy_slowly
1524 * Copy the specified range of the source
1525 * virtual memory object without using
1526 * protection-based optimizations (such
1527 * as copy-on-write). The pages in the
1528 * region are actually copied.
1530 * In/out conditions:
1531 * The caller must hold a reference and a lock
1532 * for the source virtual memory object. The source
1533 * object will be returned *unlocked*.
1536 * If the copy is completed successfully, KERN_SUCCESS is
1537 * returned. If the caller asserted the interruptible
1538 * argument, and an interruption occurred while waiting
1539 * for a user-generated event, MACH_SEND_INTERRUPTED is
1540 * returned. Other values may be returned to indicate
1541 * hard errors during the copy operation.
1543 * A new virtual memory object is returned in a
1544 * parameter (_result_object). The contents of this
1545 * new object, starting at a zero offset, are a copy
1546 * of the source memory region. In the event of
1547 * an error, this parameter will contain the value
1550 __private_extern__ kern_return_t
1551 vm_object_copy_slowly(
1552 register vm_object_t src_object
,
1553 vm_object_offset_t src_offset
,
1554 vm_object_size_t size
,
1555 boolean_t interruptible
,
1556 vm_object_t
*_result_object
) /* OUT */
1558 vm_object_t new_object
;
1559 vm_object_offset_t new_offset
;
1561 vm_object_offset_t src_lo_offset
= src_offset
;
1562 vm_object_offset_t src_hi_offset
= src_offset
+ size
;
1564 XPR(XPR_VM_OBJECT
, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1565 src_object
, src_offset
, size
, 0, 0);
1568 vm_object_unlock(src_object
);
1569 *_result_object
= VM_OBJECT_NULL
;
1570 return(KERN_INVALID_ARGUMENT
);
1574 * Prevent destruction of the source object while we copy.
1577 assert(src_object
->ref_count
> 0);
1578 src_object
->ref_count
++;
1579 VM_OBJ_RES_INCR(src_object
);
1580 vm_object_unlock(src_object
);
1583 * Create a new object to hold the copied pages.
1585 * We fill the new object starting at offset 0,
1586 * regardless of the input offset.
1587 * We don't bother to lock the new object within
1588 * this routine, since we have the only reference.
1591 new_object
= vm_object_allocate(size
);
1593 vm_object_lock(new_object
);
1595 assert(size
== trunc_page_64(size
)); /* Will the loop terminate? */
1599 src_offset
+= PAGE_SIZE_64
,
1600 new_offset
+= PAGE_SIZE_64
, size
-= PAGE_SIZE_64
1603 vm_fault_return_t result
;
1605 while ((new_page
= vm_page_alloc(new_object
, new_offset
))
1607 if (!vm_page_wait(interruptible
)) {
1608 vm_object_unlock(new_object
);
1609 vm_object_deallocate(new_object
);
1610 vm_object_deallocate(src_object
);
1611 *_result_object
= VM_OBJECT_NULL
;
1612 return(MACH_SEND_INTERRUPTED
);
1617 vm_prot_t prot
= VM_PROT_READ
;
1618 vm_page_t _result_page
;
1621 vm_page_t result_page
;
1622 kern_return_t error_code
;
1624 vm_object_lock(src_object
);
1625 vm_object_paging_begin(src_object
);
1627 XPR(XPR_VM_FAULT
,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1628 result
= vm_fault_page(src_object
, src_offset
,
1629 VM_PROT_READ
, FALSE
, interruptible
,
1630 src_lo_offset
, src_hi_offset
,
1631 VM_BEHAVIOR_SEQUENTIAL
,
1632 &prot
, &_result_page
, &top_page
,
1634 &error_code
, FALSE
, FALSE
, NULL
, 0);
1637 case VM_FAULT_SUCCESS
:
1638 result_page
= _result_page
;
1641 * We don't need to hold the object
1642 * lock -- the busy page will be enough.
1643 * [We don't care about picking up any
1644 * new modifications.]
1646 * Copy the page to the new object.
1649 * If result_page is clean,
1650 * we could steal it instead
1654 vm_object_unlock(result_page
->object
);
1655 vm_page_copy(result_page
, new_page
);
1658 * Let go of both pages (make them
1659 * not busy, perform wakeup, activate).
1662 new_page
->busy
= FALSE
;
1663 new_page
->dirty
= TRUE
;
1664 vm_object_lock(result_page
->object
);
1665 PAGE_WAKEUP_DONE(result_page
);
1667 vm_page_lock_queues();
1668 if (!result_page
->active
&&
1669 !result_page
->inactive
)
1670 vm_page_activate(result_page
);
1671 vm_page_activate(new_page
);
1672 vm_page_unlock_queues();
1675 * Release paging references and
1676 * top-level placeholder page, if any.
1679 vm_fault_cleanup(result_page
->object
,
1684 case VM_FAULT_RETRY
:
1687 case VM_FAULT_FICTITIOUS_SHORTAGE
:
1688 vm_page_more_fictitious();
1691 case VM_FAULT_MEMORY_SHORTAGE
:
1692 if (vm_page_wait(interruptible
))
1696 case VM_FAULT_INTERRUPTED
:
1697 vm_page_free(new_page
);
1698 vm_object_unlock(new_object
);
1699 vm_object_deallocate(new_object
);
1700 vm_object_deallocate(src_object
);
1701 *_result_object
= VM_OBJECT_NULL
;
1702 return(MACH_SEND_INTERRUPTED
);
1704 case VM_FAULT_MEMORY_ERROR
:
1707 * (a) ignore pages that we can't
1709 * (b) return the null object if
1710 * any page fails [chosen]
1713 vm_page_lock_queues();
1714 vm_page_free(new_page
);
1715 vm_page_unlock_queues();
1716 vm_object_unlock(new_object
);
1717 vm_object_deallocate(new_object
);
1718 vm_object_deallocate(src_object
);
1719 *_result_object
= VM_OBJECT_NULL
;
1720 return(error_code
? error_code
:
1723 } while (result
!= VM_FAULT_SUCCESS
);
1727 * Lose the extra reference, and return our object.
1730 vm_object_unlock(new_object
);
1731 vm_object_deallocate(src_object
);
1732 *_result_object
= new_object
;
1733 return(KERN_SUCCESS
);
1737 * Routine: vm_object_copy_quickly
1740 * Copy the specified range of the source virtual
1741 * memory object, if it can be done without waiting
1742 * for user-generated events.
1745 * If the copy is successful, the copy is returned in
1746 * the arguments; otherwise, the arguments are not
1749 * In/out conditions:
1750 * The object should be unlocked on entry and exit.
1754 __private_extern__ boolean_t
1755 vm_object_copy_quickly(
1756 vm_object_t
*_object
, /* INOUT */
1757 __unused vm_object_offset_t offset
, /* IN */
1758 __unused vm_object_size_t size
, /* IN */
1759 boolean_t
*_src_needs_copy
, /* OUT */
1760 boolean_t
*_dst_needs_copy
) /* OUT */
1762 vm_object_t object
= *_object
;
1763 memory_object_copy_strategy_t copy_strategy
;
1765 XPR(XPR_VM_OBJECT
, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1766 *_object
, offset
, size
, 0, 0);
1767 if (object
== VM_OBJECT_NULL
) {
1768 *_src_needs_copy
= FALSE
;
1769 *_dst_needs_copy
= FALSE
;
1773 vm_object_lock(object
);
1775 copy_strategy
= object
->copy_strategy
;
1777 switch (copy_strategy
) {
1778 case MEMORY_OBJECT_COPY_SYMMETRIC
:
1781 * Symmetric copy strategy.
1782 * Make another reference to the object.
1783 * Leave object/offset unchanged.
1786 assert(object
->ref_count
> 0);
1787 object
->ref_count
++;
1788 vm_object_res_reference(object
);
1789 object
->shadowed
= TRUE
;
1790 vm_object_unlock(object
);
1793 * Both source and destination must make
1794 * shadows, and the source must be made
1795 * read-only if not already.
1798 *_src_needs_copy
= TRUE
;
1799 *_dst_needs_copy
= TRUE
;
1803 case MEMORY_OBJECT_COPY_DELAY
:
1804 vm_object_unlock(object
);
1808 vm_object_unlock(object
);
1814 static int copy_call_count
= 0;
1815 static int copy_call_sleep_count
= 0;
1816 static int copy_call_restart_count
= 0;
1819 * Routine: vm_object_copy_call [internal]
1822 * Copy the source object (src_object), using the
1823 * user-managed copy algorithm.
1825 * In/out conditions:
1826 * The source object must be locked on entry. It
1827 * will be *unlocked* on exit.
1830 * If the copy is successful, KERN_SUCCESS is returned.
1831 * A new object that represents the copied virtual
1832 * memory is returned in a parameter (*_result_object).
1833 * If the return value indicates an error, this parameter
1836 static kern_return_t
1837 vm_object_copy_call(
1838 vm_object_t src_object
,
1839 vm_object_offset_t src_offset
,
1840 vm_object_size_t size
,
1841 vm_object_t
*_result_object
) /* OUT */
1845 boolean_t check_ready
= FALSE
;
1848 * If a copy is already in progress, wait and retry.
1851 * Consider making this call interruptable, as Mike
1852 * intended it to be.
1855 * Need a counter or version or something to allow
1856 * us to use the copy that the currently requesting
1857 * thread is obtaining -- is it worth adding to the
1858 * vm object structure? Depends how common this case it.
1861 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
1862 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
1864 copy_call_restart_count
++;
1868 * Indicate (for the benefit of memory_object_create_copy)
1869 * that we want a copy for src_object. (Note that we cannot
1870 * do a real assert_wait before calling memory_object_copy,
1871 * so we simply set the flag.)
1874 vm_object_set_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
);
1875 vm_object_unlock(src_object
);
1878 * Ask the memory manager to give us a memory object
1879 * which represents a copy of the src object.
1880 * The memory manager may give us a memory object
1881 * which we already have, or it may give us a
1882 * new memory object. This memory object will arrive
1883 * via memory_object_create_copy.
1886 kr
= KERN_FAILURE
; /* XXX need to change memory_object.defs */
1887 if (kr
!= KERN_SUCCESS
) {
1892 * Wait for the copy to arrive.
1894 vm_object_lock(src_object
);
1895 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
1896 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
1898 copy_call_sleep_count
++;
1901 assert(src_object
->copy
!= VM_OBJECT_NULL
);
1902 copy
= src_object
->copy
;
1903 if (!vm_object_lock_try(copy
)) {
1904 vm_object_unlock(src_object
);
1905 mutex_pause(); /* wait a bit */
1906 vm_object_lock(src_object
);
1909 if (copy
->size
< src_offset
+size
)
1910 copy
->size
= src_offset
+size
;
1912 if (!copy
->pager_ready
)
1918 *_result_object
= copy
;
1919 vm_object_unlock(copy
);
1920 vm_object_unlock(src_object
);
1922 /* Wait for the copy to be ready. */
1923 if (check_ready
== TRUE
) {
1924 vm_object_lock(copy
);
1925 while (!copy
->pager_ready
) {
1926 vm_object_sleep(copy
, VM_OBJECT_EVENT_PAGER_READY
, THREAD_UNINT
);
1928 vm_object_unlock(copy
);
1931 return KERN_SUCCESS
;
1934 static int copy_delayed_lock_collisions
= 0;
1935 static int copy_delayed_max_collisions
= 0;
1936 static int copy_delayed_lock_contention
= 0;
1937 static int copy_delayed_protect_iterate
= 0;
1940 * Routine: vm_object_copy_delayed [internal]
1943 * Copy the specified virtual memory object, using
1944 * the asymmetric copy-on-write algorithm.
1946 * In/out conditions:
1947 * The src_object must be locked on entry. It will be unlocked
1948 * on exit - so the caller must also hold a reference to it.
1950 * This routine will not block waiting for user-generated
1951 * events. It is not interruptible.
1953 __private_extern__ vm_object_t
1954 vm_object_copy_delayed(
1955 vm_object_t src_object
,
1956 vm_object_offset_t src_offset
,
1957 vm_object_size_t size
)
1959 vm_object_t new_copy
= VM_OBJECT_NULL
;
1960 vm_object_t old_copy
;
1962 vm_object_size_t copy_size
= src_offset
+ size
;
1966 * The user-level memory manager wants to see all of the changes
1967 * to this object, but it has promised not to make any changes on
1970 * Perform an asymmetric copy-on-write, as follows:
1971 * Create a new object, called a "copy object" to hold
1972 * pages modified by the new mapping (i.e., the copy,
1973 * not the original mapping).
1974 * Record the original object as the backing object for
1975 * the copy object. If the original mapping does not
1976 * change a page, it may be used read-only by the copy.
1977 * Record the copy object in the original object.
1978 * When the original mapping causes a page to be modified,
1979 * it must be copied to a new page that is "pushed" to
1981 * Mark the new mapping (the copy object) copy-on-write.
1982 * This makes the copy object itself read-only, allowing
1983 * it to be reused if the original mapping makes no
1984 * changes, and simplifying the synchronization required
1985 * in the "push" operation described above.
1987 * The copy-on-write is said to be assymetric because the original
1988 * object is *not* marked copy-on-write. A copied page is pushed
1989 * to the copy object, regardless which party attempted to modify
1992 * Repeated asymmetric copy operations may be done. If the
1993 * original object has not been changed since the last copy, its
1994 * copy object can be reused. Otherwise, a new copy object can be
1995 * inserted between the original object and its previous copy
1996 * object. Since any copy object is read-only, this cannot affect
1997 * affect the contents of the previous copy object.
1999 * Note that a copy object is higher in the object tree than the
2000 * original object; therefore, use of the copy object recorded in
2001 * the original object must be done carefully, to avoid deadlock.
2007 * Wait for paging in progress.
2009 if (!src_object
->true_share
)
2010 vm_object_paging_wait(src_object
, THREAD_UNINT
);
2013 * See whether we can reuse the result of a previous
2017 old_copy
= src_object
->copy
;
2018 if (old_copy
!= VM_OBJECT_NULL
) {
2020 * Try to get the locks (out of order)
2022 if (!vm_object_lock_try(old_copy
)) {
2023 vm_object_unlock(src_object
);
2026 /* Heisenberg Rules */
2027 copy_delayed_lock_collisions
++;
2028 if (collisions
++ == 0)
2029 copy_delayed_lock_contention
++;
2031 if (collisions
> copy_delayed_max_collisions
)
2032 copy_delayed_max_collisions
= collisions
;
2034 vm_object_lock(src_object
);
2039 * Determine whether the old copy object has
2043 if (old_copy
->resident_page_count
== 0 &&
2044 !old_copy
->pager_created
) {
2046 * It has not been modified.
2048 * Return another reference to
2049 * the existing copy-object if
2050 * we can safely grow it (if
2054 if (old_copy
->size
< copy_size
) {
2056 * We can't perform a delayed copy if any of the
2057 * pages in the extended range are wired (because
2058 * we can't safely take write permission away from
2059 * wired pages). If the pages aren't wired, then
2060 * go ahead and protect them.
2062 copy_delayed_protect_iterate
++;
2063 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
2064 if (!p
->fictitious
&&
2065 p
->offset
>= old_copy
->size
&&
2066 p
->offset
< copy_size
) {
2067 if (p
->wire_count
> 0) {
2068 vm_object_unlock(old_copy
);
2069 vm_object_unlock(src_object
);
2071 if (new_copy
!= VM_OBJECT_NULL
) {
2072 vm_object_unlock(new_copy
);
2073 vm_object_deallocate(new_copy
);
2076 return VM_OBJECT_NULL
;
2078 pmap_page_protect(p
->phys_page
,
2079 (VM_PROT_ALL
& ~VM_PROT_WRITE
&
2084 old_copy
->size
= copy_size
;
2087 vm_object_reference_locked(old_copy
);
2088 vm_object_unlock(old_copy
);
2089 vm_object_unlock(src_object
);
2091 if (new_copy
!= VM_OBJECT_NULL
) {
2092 vm_object_unlock(new_copy
);
2093 vm_object_deallocate(new_copy
);
2100 * Adjust the size argument so that the newly-created
2101 * copy object will be large enough to back either the
2102 * old copy object or the new mapping.
2104 if (old_copy
->size
> copy_size
)
2105 copy_size
= old_copy
->size
;
2107 if (new_copy
== VM_OBJECT_NULL
) {
2108 vm_object_unlock(old_copy
);
2109 vm_object_unlock(src_object
);
2110 new_copy
= vm_object_allocate(copy_size
);
2111 vm_object_lock(src_object
);
2112 vm_object_lock(new_copy
);
2115 new_copy
->size
= copy_size
;
2118 * The copy-object is always made large enough to
2119 * completely shadow the original object, since
2120 * it may have several users who want to shadow
2121 * the original object at different points.
2124 assert((old_copy
->shadow
== src_object
) &&
2125 (old_copy
->shadow_offset
== (vm_object_offset_t
) 0));
2127 } else if (new_copy
== VM_OBJECT_NULL
) {
2128 vm_object_unlock(src_object
);
2129 new_copy
= vm_object_allocate(copy_size
);
2130 vm_object_lock(src_object
);
2131 vm_object_lock(new_copy
);
2136 * We now have the src object locked, and the new copy object
2137 * allocated and locked (and potentially the old copy locked).
2138 * Before we go any further, make sure we can still perform
2139 * a delayed copy, as the situation may have changed.
2141 * Specifically, we can't perform a delayed copy if any of the
2142 * pages in the range are wired (because we can't safely take
2143 * write permission away from wired pages). If the pages aren't
2144 * wired, then go ahead and protect them.
2146 copy_delayed_protect_iterate
++;
2147 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
2148 if (!p
->fictitious
&& p
->offset
< copy_size
) {
2149 if (p
->wire_count
> 0) {
2151 vm_object_unlock(old_copy
);
2152 vm_object_unlock(src_object
);
2153 vm_object_unlock(new_copy
);
2154 vm_object_deallocate(new_copy
);
2155 return VM_OBJECT_NULL
;
2157 pmap_page_protect(p
->phys_page
,
2158 (VM_PROT_ALL
& ~VM_PROT_WRITE
&
2164 if (old_copy
!= VM_OBJECT_NULL
) {
2166 * Make the old copy-object shadow the new one.
2167 * It will receive no more pages from the original
2171 src_object
->ref_count
--; /* remove ref. from old_copy */
2172 assert(src_object
->ref_count
> 0);
2173 old_copy
->shadow
= new_copy
;
2174 assert(new_copy
->ref_count
> 0);
2175 new_copy
->ref_count
++; /* for old_copy->shadow ref. */
2178 if (old_copy
->res_count
) {
2179 VM_OBJ_RES_INCR(new_copy
);
2180 VM_OBJ_RES_DECR(src_object
);
2184 vm_object_unlock(old_copy
); /* done with old_copy */
2188 * Point the new copy at the existing object.
2190 new_copy
->shadow
= src_object
;
2191 new_copy
->shadow_offset
= 0;
2192 new_copy
->shadowed
= TRUE
; /* caller must set needs_copy */
2193 assert(src_object
->ref_count
> 0);
2194 src_object
->ref_count
++;
2195 VM_OBJ_RES_INCR(src_object
);
2196 src_object
->copy
= new_copy
;
2197 vm_object_unlock(src_object
);
2198 vm_object_unlock(new_copy
);
2201 "vm_object_copy_delayed: used copy object %X for source %X\n",
2202 (integer_t
)new_copy
, (integer_t
)src_object
, 0, 0, 0);
2208 * Routine: vm_object_copy_strategically
2211 * Perform a copy according to the source object's
2212 * declared strategy. This operation may block,
2213 * and may be interrupted.
2215 __private_extern__ kern_return_t
2216 vm_object_copy_strategically(
2217 register vm_object_t src_object
,
2218 vm_object_offset_t src_offset
,
2219 vm_object_size_t size
,
2220 vm_object_t
*dst_object
, /* OUT */
2221 vm_object_offset_t
*dst_offset
, /* OUT */
2222 boolean_t
*dst_needs_copy
) /* OUT */
2225 boolean_t interruptible
= THREAD_ABORTSAFE
; /* XXX */
2226 memory_object_copy_strategy_t copy_strategy
;
2228 assert(src_object
!= VM_OBJECT_NULL
);
2230 vm_object_lock(src_object
);
2233 * The copy strategy is only valid if the memory manager
2234 * is "ready". Internal objects are always ready.
2237 while (!src_object
->internal
&& !src_object
->pager_ready
) {
2238 wait_result_t wait_result
;
2240 wait_result
= vm_object_sleep( src_object
,
2241 VM_OBJECT_EVENT_PAGER_READY
,
2243 if (wait_result
!= THREAD_AWAKENED
) {
2244 vm_object_unlock(src_object
);
2245 *dst_object
= VM_OBJECT_NULL
;
2247 *dst_needs_copy
= FALSE
;
2248 return(MACH_SEND_INTERRUPTED
);
2252 copy_strategy
= src_object
->copy_strategy
;
2255 * Use the appropriate copy strategy.
2258 switch (copy_strategy
) {
2259 case MEMORY_OBJECT_COPY_DELAY
:
2260 *dst_object
= vm_object_copy_delayed(src_object
,
2262 if (*dst_object
!= VM_OBJECT_NULL
) {
2263 *dst_offset
= src_offset
;
2264 *dst_needs_copy
= TRUE
;
2265 result
= KERN_SUCCESS
;
2268 vm_object_lock(src_object
);
2269 /* fall thru when delayed copy not allowed */
2271 case MEMORY_OBJECT_COPY_NONE
:
2272 result
= vm_object_copy_slowly(src_object
, src_offset
, size
,
2273 interruptible
, dst_object
);
2274 if (result
== KERN_SUCCESS
) {
2276 *dst_needs_copy
= FALSE
;
2280 case MEMORY_OBJECT_COPY_CALL
:
2281 result
= vm_object_copy_call(src_object
, src_offset
, size
,
2283 if (result
== KERN_SUCCESS
) {
2284 *dst_offset
= src_offset
;
2285 *dst_needs_copy
= TRUE
;
2289 case MEMORY_OBJECT_COPY_SYMMETRIC
:
2290 XPR(XPR_VM_OBJECT
, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t
)src_object
, src_offset
, size
, 0, 0);
2291 vm_object_unlock(src_object
);
2292 result
= KERN_MEMORY_RESTART_COPY
;
2296 panic("copy_strategically: bad strategy");
2297 result
= KERN_INVALID_ARGUMENT
;
2305 * Create a new object which is backed by the
2306 * specified existing object range. The source
2307 * object reference is deallocated.
2309 * The new object and offset into that object
2310 * are returned in the source parameters.
2312 boolean_t vm_object_shadow_check
= FALSE
;
2314 __private_extern__ boolean_t
2316 vm_object_t
*object
, /* IN/OUT */
2317 vm_object_offset_t
*offset
, /* IN/OUT */
2318 vm_object_size_t length
)
2320 register vm_object_t source
;
2321 register vm_object_t result
;
2324 assert(source
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
);
2327 * Determine if we really need a shadow.
2330 if (vm_object_shadow_check
&& source
->ref_count
== 1 &&
2331 (source
->shadow
== VM_OBJECT_NULL
||
2332 source
->shadow
->copy
== VM_OBJECT_NULL
))
2334 source
->shadowed
= FALSE
;
2339 * Allocate a new object with the given length
2342 if ((result
= vm_object_allocate(length
)) == VM_OBJECT_NULL
)
2343 panic("vm_object_shadow: no object for shadowing");
2346 * The new object shadows the source object, adding
2347 * a reference to it. Our caller changes his reference
2348 * to point to the new object, removing a reference to
2349 * the source object. Net result: no change of reference
2352 result
->shadow
= source
;
2355 * Store the offset into the source object,
2356 * and fix up the offset into the new object.
2359 result
->shadow_offset
= *offset
;
2362 * Return the new things
2371 * The relationship between vm_object structures and
2372 * the memory_object requires careful synchronization.
2374 * All associations are created by memory_object_create_named
2375 * for external pagers and vm_object_pager_create for internal
2376 * objects as follows:
2378 * pager: the memory_object itself, supplied by
2379 * the user requesting a mapping (or the kernel,
2380 * when initializing internal objects); the
2381 * kernel simulates holding send rights by keeping
2385 * the memory object control port,
2386 * created by the kernel; the kernel holds
2387 * receive (and ownership) rights to this
2388 * port, but no other references.
2390 * When initialization is complete, the "initialized" field
2391 * is asserted. Other mappings using a particular memory object,
2392 * and any references to the vm_object gained through the
2393 * port association must wait for this initialization to occur.
2395 * In order to allow the memory manager to set attributes before
2396 * requests (notably virtual copy operations, but also data or
2397 * unlock requests) are made, a "ready" attribute is made available.
2398 * Only the memory manager may affect the value of this attribute.
2399 * Its value does not affect critical kernel functions, such as
2400 * internal object initialization or destruction. [Furthermore,
2401 * memory objects created by the kernel are assumed to be ready
2402 * immediately; the default memory manager need not explicitly
2403 * set the "ready" attribute.]
2405 * [Both the "initialized" and "ready" attribute wait conditions
2406 * use the "pager" field as the wait event.]
2408 * The port associations can be broken down by any of the
2409 * following routines:
2410 * vm_object_terminate:
2411 * No references to the vm_object remain, and
2412 * the object cannot (or will not) be cached.
2413 * This is the normal case, and is done even
2414 * though one of the other cases has already been
2416 * memory_object_destroy:
2417 * The memory manager has requested that the
2418 * kernel relinquish references to the memory
2419 * object. [The memory manager may not want to
2420 * destroy the memory object, but may wish to
2421 * refuse or tear down existing memory mappings.]
2423 * Each routine that breaks an association must break all of
2424 * them at once. At some later time, that routine must clear
2425 * the pager field and release the memory object references.
2426 * [Furthermore, each routine must cope with the simultaneous
2427 * or previous operations of the others.]
2429 * In addition to the lock on the object, the vm_object_cache_lock
2430 * governs the associations. References gained through the
2431 * association require use of the cache lock.
2433 * Because the pager field may be cleared spontaneously, it
2434 * cannot be used to determine whether a memory object has
2435 * ever been associated with a particular vm_object. [This
2436 * knowledge is important to the shadow object mechanism.]
2437 * For this reason, an additional "created" attribute is
2440 * During various paging operations, the pager reference found in the
2441 * vm_object must be valid. To prevent this from being released,
2442 * (other than being removed, i.e., made null), routines may use
2443 * the vm_object_paging_begin/end routines [actually, macros].
2444 * The implementation uses the "paging_in_progress" and "wanted" fields.
2445 * [Operations that alter the validity of the pager values include the
2446 * termination routines and vm_object_collapse.]
2450 static void vm_object_abort_activity(
2451 vm_object_t object
);
2454 * Routine: vm_object_abort_activity [internal use only]
2456 * Abort paging requests pending on this object.
2457 * In/out conditions:
2458 * The object is locked on entry and exit.
2461 vm_object_abort_activity(
2468 XPR(XPR_VM_OBJECT
, "vm_object_abort_activity, object 0x%X\n",
2469 (integer_t
)object
, 0, 0, 0, 0);
2472 * Abort all activity that would be waiting
2473 * for a result on this memory object.
2475 * We could also choose to destroy all pages
2476 * that we have in memory for this object, but
2480 p
= (vm_page_t
) queue_first(&object
->memq
);
2481 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
2482 next
= (vm_page_t
) queue_next(&p
->listq
);
2485 * If it's being paged in, destroy it.
2486 * If an unlock has been requested, start it again.
2489 if (p
->busy
&& p
->absent
) {
2493 if (p
->unlock_request
!= VM_PROT_NONE
)
2494 p
->unlock_request
= VM_PROT_NONE
;
2502 * Wake up threads waiting for the memory object to
2506 object
->pager_ready
= TRUE
;
2507 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
2511 * Routine: vm_object_pager_dead
2514 * A port is being destroy, and the IPC kobject code
2515 * can't tell if it represents a pager port or not.
2516 * So this function is called each time it sees a port
2518 * THIS IS HORRIBLY INEFFICIENT. We should only call
2519 * this routine if we had requested a notification on
2523 __private_extern__
void
2524 vm_object_pager_dead(
2528 vm_object_hash_entry_t entry
;
2531 * Perform essentially the same operations as in vm_object_lookup,
2532 * except that this time we look up based on the memory_object
2533 * port, not the control port.
2535 vm_object_cache_lock();
2536 entry
= vm_object_hash_lookup(pager
, FALSE
);
2537 if (entry
== VM_OBJECT_HASH_ENTRY_NULL
||
2538 entry
->object
== VM_OBJECT_NULL
) {
2539 vm_object_cache_unlock();
2543 object
= entry
->object
;
2544 entry
->object
= VM_OBJECT_NULL
;
2546 vm_object_lock(object
);
2547 if (object
->ref_count
== 0) {
2548 XPR(XPR_VM_OBJECT_CACHE
,
2549 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2551 (integer_t
)vm_object_cached_list
.next
,
2552 (integer_t
)vm_object_cached_list
.prev
, 0,0);
2554 queue_remove(&vm_object_cached_list
, object
,
2555 vm_object_t
, cached_list
);
2556 vm_object_cached_count
--;
2558 object
->ref_count
++;
2559 vm_object_res_reference(object
);
2561 object
->can_persist
= FALSE
;
2563 assert(object
->pager
== pager
);
2566 * Remove the pager association.
2568 * Note that the memory_object itself is dead, so
2569 * we don't bother with it.
2572 object
->pager
= MEMORY_OBJECT_NULL
;
2574 vm_object_unlock(object
);
2575 vm_object_cache_unlock();
2577 vm_object_pager_wakeup(pager
);
2580 * Release the pager reference. Note that there's no
2581 * point in trying the memory_object_terminate call
2582 * because the memory_object itself is dead. Also
2583 * release the memory_object_control reference, since
2584 * the pager didn't do that either.
2587 memory_object_deallocate(pager
);
2588 memory_object_control_deallocate(object
->pager_request
);
2592 * Restart pending page requests
2594 vm_object_lock(object
);
2595 vm_object_abort_activity(object
);
2596 vm_object_unlock(object
);
2599 * Lose the object reference.
2602 vm_object_deallocate(object
);
2607 * Routine: vm_object_enter
2609 * Find a VM object corresponding to the given
2610 * pager; if no such object exists, create one,
2611 * and initialize the pager.
2615 memory_object_t pager
,
2616 vm_object_size_t size
,
2621 register vm_object_t object
;
2622 vm_object_t new_object
;
2623 boolean_t must_init
;
2624 vm_object_hash_entry_t entry
, new_entry
;
2626 if (pager
== MEMORY_OBJECT_NULL
)
2627 return(vm_object_allocate(size
));
2629 new_object
= VM_OBJECT_NULL
;
2630 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
2634 * Look for an object associated with this port.
2637 vm_object_cache_lock();
2639 entry
= vm_object_hash_lookup(pager
, FALSE
);
2641 if (entry
== VM_OBJECT_HASH_ENTRY_NULL
) {
2642 if (new_object
== VM_OBJECT_NULL
) {
2644 * We must unlock to create a new object;
2645 * if we do so, we must try the lookup again.
2647 vm_object_cache_unlock();
2648 assert(new_entry
== VM_OBJECT_HASH_ENTRY_NULL
);
2649 new_entry
= vm_object_hash_entry_alloc(pager
);
2650 new_object
= vm_object_allocate(size
);
2651 vm_object_cache_lock();
2654 * Lookup failed twice, and we have something
2655 * to insert; set the object.
2657 vm_object_hash_insert(new_entry
);
2659 entry
->object
= new_object
;
2660 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
2661 new_object
= VM_OBJECT_NULL
;
2664 } else if (entry
->object
== VM_OBJECT_NULL
) {
2666 * If a previous object is being terminated,
2667 * we must wait for the termination message
2668 * to be queued (and lookup the entry again).
2670 entry
->waiting
= TRUE
;
2671 entry
= VM_OBJECT_HASH_ENTRY_NULL
;
2672 assert_wait((event_t
) pager
, THREAD_UNINT
);
2673 vm_object_cache_unlock();
2674 thread_block(THREAD_CONTINUE_NULL
);
2675 vm_object_cache_lock();
2677 } while (entry
== VM_OBJECT_HASH_ENTRY_NULL
);
2679 object
= entry
->object
;
2680 assert(object
!= VM_OBJECT_NULL
);
2683 vm_object_lock(object
);
2684 assert(!internal
|| object
->internal
);
2686 assert(!object
->named
);
2687 object
->named
= TRUE
;
2689 if (object
->ref_count
== 0) {
2690 XPR(XPR_VM_OBJECT_CACHE
,
2691 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2693 (integer_t
)vm_object_cached_list
.next
,
2694 (integer_t
)vm_object_cached_list
.prev
, 0,0);
2695 queue_remove(&vm_object_cached_list
, object
,
2696 vm_object_t
, cached_list
);
2697 vm_object_cached_count
--;
2699 object
->ref_count
++;
2700 vm_object_res_reference(object
);
2701 vm_object_unlock(object
);
2705 assert(object
->ref_count
> 0);
2709 vm_object_cache_unlock();
2712 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2713 (integer_t
)pager
, (integer_t
)object
, must_init
, 0, 0);
2716 * If we raced to create a vm_object but lost, let's
2720 if (new_object
!= VM_OBJECT_NULL
)
2721 vm_object_deallocate(new_object
);
2723 if (new_entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
2724 vm_object_hash_entry_free(new_entry
);
2727 memory_object_control_t control
;
2730 * Allocate request port.
2733 control
= memory_object_control_allocate(object
);
2734 assert (control
!= MEMORY_OBJECT_CONTROL_NULL
);
2736 vm_object_lock(object
);
2737 assert(object
!= kernel_object
);
2740 * Copy the reference we were given.
2743 memory_object_reference(pager
);
2744 object
->pager_created
= TRUE
;
2745 object
->pager
= pager
;
2746 object
->internal
= internal
;
2747 object
->pager_trusted
= internal
;
2749 /* copy strategy invalid until set by memory manager */
2750 object
->copy_strategy
= MEMORY_OBJECT_COPY_INVALID
;
2752 object
->pager_control
= control
;
2753 object
->pager_ready
= FALSE
;
2755 vm_object_unlock(object
);
2758 * Let the pager know we're using it.
2761 (void) memory_object_init(pager
,
2762 object
->pager_control
,
2765 vm_object_lock(object
);
2767 object
->named
= TRUE
;
2769 object
->pager_ready
= TRUE
;
2770 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
2773 object
->pager_initialized
= TRUE
;
2774 vm_object_wakeup(object
, VM_OBJECT_EVENT_INITIALIZED
);
2776 vm_object_lock(object
);
2780 * [At this point, the object must be locked]
2784 * Wait for the work above to be done by the first
2785 * thread to map this object.
2788 while (!object
->pager_initialized
) {
2789 vm_object_sleep(object
,
2790 VM_OBJECT_EVENT_INITIALIZED
,
2793 vm_object_unlock(object
);
2796 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2797 (integer_t
)object
, (integer_t
)object
->pager
, internal
, 0,0);
2802 * Routine: vm_object_pager_create
2804 * Create a memory object for an internal object.
2805 * In/out conditions:
2806 * The object is locked on entry and exit;
2807 * it may be unlocked within this call.
2809 * Only one thread may be performing a
2810 * vm_object_pager_create on an object at
2811 * a time. Presumably, only the pageout
2812 * daemon will be using this routine.
2816 vm_object_pager_create(
2817 register vm_object_t object
)
2819 memory_object_t pager
;
2820 vm_object_hash_entry_t entry
;
2822 vm_object_size_t size
;
2823 vm_external_map_t map
;
2824 #endif /* MACH_PAGEMAP */
2826 XPR(XPR_VM_OBJECT
, "vm_object_pager_create, object 0x%X\n",
2827 (integer_t
)object
, 0,0,0,0);
2829 assert(object
!= kernel_object
);
2831 if (memory_manager_default_check() != KERN_SUCCESS
)
2835 * Prevent collapse or termination by holding a paging reference
2838 vm_object_paging_begin(object
);
2839 if (object
->pager_created
) {
2841 * Someone else got to it first...
2842 * wait for them to finish initializing the ports
2844 while (!object
->pager_initialized
) {
2845 vm_object_sleep(object
,
2846 VM_OBJECT_EVENT_INITIALIZED
,
2849 vm_object_paging_end(object
);
2854 * Indicate that a memory object has been assigned
2855 * before dropping the lock, to prevent a race.
2858 object
->pager_created
= TRUE
;
2859 object
->paging_offset
= 0;
2862 size
= object
->size
;
2863 #endif /* MACH_PAGEMAP */
2864 vm_object_unlock(object
);
2867 map
= vm_external_create(size
);
2868 vm_object_lock(object
);
2869 assert(object
->size
== size
);
2870 object
->existence_map
= map
;
2871 vm_object_unlock(object
);
2872 #endif /* MACH_PAGEMAP */
2875 * Create the [internal] pager, and associate it with this object.
2877 * We make the association here so that vm_object_enter()
2878 * can look up the object to complete initializing it. No
2879 * user will ever map this object.
2882 memory_object_default_t dmm
;
2883 vm_size_t cluster_size
;
2885 /* acquire a reference for the default memory manager */
2886 dmm
= memory_manager_default_reference(&cluster_size
);
2887 assert(cluster_size
>= PAGE_SIZE
);
2889 object
->cluster_size
= cluster_size
; /* XXX ??? */
2890 assert(object
->temporary
);
2892 /* create our new memory object */
2893 (void) memory_object_create(dmm
, object
->size
, &pager
);
2895 memory_object_default_deallocate(dmm
);
2898 entry
= vm_object_hash_entry_alloc(pager
);
2900 vm_object_cache_lock();
2901 vm_object_hash_insert(entry
);
2903 entry
->object
= object
;
2904 vm_object_cache_unlock();
2907 * A reference was returned by
2908 * memory_object_create(), and it is
2909 * copied by vm_object_enter().
2912 if (vm_object_enter(pager
, object
->size
, TRUE
, TRUE
, FALSE
) != object
)
2913 panic("vm_object_pager_create: mismatch");
2916 * Drop the reference we were passed.
2918 memory_object_deallocate(pager
);
2920 vm_object_lock(object
);
2923 * Release the paging reference
2925 vm_object_paging_end(object
);
2929 * Routine: vm_object_remove
2931 * Eliminate the pager/object association
2934 * The object cache must be locked.
2936 __private_extern__
void
2940 memory_object_t pager
;
2942 if ((pager
= object
->pager
) != MEMORY_OBJECT_NULL
) {
2943 vm_object_hash_entry_t entry
;
2945 entry
= vm_object_hash_lookup(pager
, FALSE
);
2946 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
2947 entry
->object
= VM_OBJECT_NULL
;
2953 * Global variables for vm_object_collapse():
2955 * Counts for normal collapses and bypasses.
2956 * Debugging variables, to watch or disable collapse.
2958 static long object_collapses
= 0;
2959 static long object_bypasses
= 0;
2961 static boolean_t vm_object_collapse_allowed
= TRUE
;
2962 static boolean_t vm_object_bypass_allowed
= TRUE
;
2964 static int vm_external_discarded
;
2965 static int vm_external_collapsed
;
2967 unsigned long vm_object_collapse_encrypted
= 0;
2970 * Routine: vm_object_do_collapse
2972 * Collapse an object with the object backing it.
2973 * Pages in the backing object are moved into the
2974 * parent, and the backing object is deallocated.
2976 * Both objects and the cache are locked; the page
2977 * queues are unlocked.
2981 vm_object_do_collapse(
2983 vm_object_t backing_object
)
2986 vm_object_offset_t new_offset
, backing_offset
;
2987 vm_object_size_t size
;
2989 backing_offset
= object
->shadow_offset
;
2990 size
= object
->size
;
2993 * Move all in-memory pages from backing_object
2994 * to the parent. Pages that have been paged out
2995 * will be overwritten by any of the parent's
2996 * pages that shadow them.
2999 while (!queue_empty(&backing_object
->memq
)) {
3001 p
= (vm_page_t
) queue_first(&backing_object
->memq
);
3003 new_offset
= (p
->offset
- backing_offset
);
3005 assert(!p
->busy
|| p
->absent
);
3008 * If the parent has a page here, or if
3009 * this page falls outside the parent,
3012 * Otherwise, move it as planned.
3015 if (p
->offset
< backing_offset
|| new_offset
>= size
) {
3020 * The encryption key includes the "pager" and the
3021 * "paging_offset". These might not be the same in
3022 * the new object, so we can't just move an encrypted
3023 * page from one object to the other. We can't just
3024 * decrypt the page here either, because that would drop
3026 * The caller should check for encrypted pages before
3027 * attempting to collapse.
3029 ASSERT_PAGE_DECRYPTED(p
);
3031 pp
= vm_page_lookup(object
, new_offset
);
3032 if (pp
== VM_PAGE_NULL
) {
3035 * Parent now has no page.
3036 * Move the backing object's page up.
3039 vm_page_rename(p
, object
, new_offset
);
3041 } else if (pp
->absent
) {
3044 * Parent has an absent page...
3045 * it's not being paged in, so
3046 * it must really be missing from
3049 * Throw out the absent page...
3050 * any faults looking for that
3051 * page will restart with the new
3056 vm_page_rename(p
, object
, new_offset
);
3057 #endif /* MACH_PAGEMAP */
3059 assert(! pp
->absent
);
3062 * Parent object has a real page.
3063 * Throw away the backing object's
3072 assert(!object
->pager_created
&& object
->pager
== MEMORY_OBJECT_NULL
3073 || (!backing_object
->pager_created
3074 && backing_object
->pager
== MEMORY_OBJECT_NULL
));
3076 assert(!object
->pager_created
&& object
->pager
== MEMORY_OBJECT_NULL
);
3077 #endif /* !MACH_PAGEMAP */
3079 if (backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
3080 vm_object_hash_entry_t entry
;
3083 * Move the pager from backing_object to object.
3085 * XXX We're only using part of the paging space
3086 * for keeps now... we ought to discard the
3090 assert(!object
->paging_in_progress
);
3091 object
->pager
= backing_object
->pager
;
3092 entry
= vm_object_hash_lookup(object
->pager
, FALSE
);
3093 assert(entry
!= VM_OBJECT_HASH_ENTRY_NULL
);
3094 entry
->object
= object
;
3095 object
->pager_created
= backing_object
->pager_created
;
3096 object
->pager_control
= backing_object
->pager_control
;
3097 object
->pager_ready
= backing_object
->pager_ready
;
3098 object
->pager_initialized
= backing_object
->pager_initialized
;
3099 object
->cluster_size
= backing_object
->cluster_size
;
3100 object
->paging_offset
=
3101 backing_object
->paging_offset
+ backing_offset
;
3102 if (object
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
3103 memory_object_control_collapse(object
->pager_control
,
3108 vm_object_cache_unlock();
3112 * If the shadow offset is 0, the use the existence map from
3113 * the backing object if there is one. If the shadow offset is
3114 * not zero, toss it.
3116 * XXX - If the shadow offset is not 0 then a bit copy is needed
3117 * if the map is to be salvaged. For now, we just just toss the
3118 * old map, giving the collapsed object no map. This means that
3119 * the pager is invoked for zero fill pages. If analysis shows
3120 * that this happens frequently and is a performance hit, then
3121 * this code should be fixed to salvage the map.
3123 assert(object
->existence_map
== VM_EXTERNAL_NULL
);
3124 if (backing_offset
|| (size
!= backing_object
->size
)) {
3125 vm_external_discarded
++;
3126 vm_external_destroy(backing_object
->existence_map
,
3127 backing_object
->size
);
3130 vm_external_collapsed
++;
3131 object
->existence_map
= backing_object
->existence_map
;
3133 backing_object
->existence_map
= VM_EXTERNAL_NULL
;
3134 #endif /* MACH_PAGEMAP */
3137 * Object now shadows whatever backing_object did.
3138 * Note that the reference to backing_object->shadow
3139 * moves from within backing_object to within object.
3142 assert(!object
->phys_contiguous
);
3143 assert(!backing_object
->phys_contiguous
);
3144 object
->shadow
= backing_object
->shadow
;
3145 if (object
->shadow
) {
3146 object
->shadow_offset
+= backing_object
->shadow_offset
;
3148 /* no shadow, therefore no shadow offset... */
3149 object
->shadow_offset
= 0;
3151 assert((object
->shadow
== VM_OBJECT_NULL
) ||
3152 (object
->shadow
->copy
!= backing_object
));
3155 * Discard backing_object.
3157 * Since the backing object has no pages, no
3158 * pager left, and no object references within it,
3159 * all that is necessary is to dispose of it.
3162 assert((backing_object
->ref_count
== 1) &&
3163 (backing_object
->resident_page_count
== 0) &&
3164 (backing_object
->paging_in_progress
== 0));
3166 backing_object
->alive
= FALSE
;
3167 vm_object_unlock(backing_object
);
3169 XPR(XPR_VM_OBJECT
, "vm_object_collapse, collapsed 0x%X\n",
3170 (integer_t
)backing_object
, 0,0,0,0);
3172 zfree(vm_object_zone
, backing_object
);
3178 vm_object_do_bypass(
3180 vm_object_t backing_object
)
3183 * Make the parent shadow the next object
3189 * Do object reference in-line to
3190 * conditionally increment shadow's
3191 * residence count. If object is not
3192 * resident, leave residence count
3195 if (backing_object
->shadow
!= VM_OBJECT_NULL
) {
3196 vm_object_lock(backing_object
->shadow
);
3197 backing_object
->shadow
->ref_count
++;
3198 if (object
->res_count
!= 0)
3199 vm_object_res_reference(backing_object
->shadow
);
3200 vm_object_unlock(backing_object
->shadow
);
3202 #else /* TASK_SWAPPER */
3203 vm_object_reference(backing_object
->shadow
);
3204 #endif /* TASK_SWAPPER */
3206 assert(!object
->phys_contiguous
);
3207 assert(!backing_object
->phys_contiguous
);
3208 object
->shadow
= backing_object
->shadow
;
3209 if (object
->shadow
) {
3210 object
->shadow_offset
+= backing_object
->shadow_offset
;
3212 /* no shadow, therefore no shadow offset... */
3213 object
->shadow_offset
= 0;
3217 * Backing object might have had a copy pointer
3218 * to us. If it did, clear it.
3220 if (backing_object
->copy
== object
) {
3221 backing_object
->copy
= VM_OBJECT_NULL
;
3225 * Drop the reference count on backing_object.
3227 * Since its ref_count was at least 2, it
3228 * will not vanish; so we don't need to call
3229 * vm_object_deallocate.
3230 * [FBDP: that doesn't seem to be true any more]
3232 * The res_count on the backing object is
3233 * conditionally decremented. It's possible
3234 * (via vm_pageout_scan) to get here with
3235 * a "swapped" object, which has a 0 res_count,
3236 * in which case, the backing object res_count
3237 * is already down by one.
3239 * Don't call vm_object_deallocate unless
3240 * ref_count drops to zero.
3242 * The ref_count can drop to zero here if the
3243 * backing object could be bypassed but not
3244 * collapsed, such as when the backing object
3245 * is temporary and cachable.
3248 if (backing_object
->ref_count
> 1) {
3249 backing_object
->ref_count
--;
3251 if (object
->res_count
!= 0)
3252 vm_object_res_deallocate(backing_object
);
3253 assert(backing_object
->ref_count
> 0);
3254 #endif /* TASK_SWAPPER */
3255 vm_object_unlock(backing_object
);
3259 * Drop locks so that we can deallocate
3260 * the backing object.
3264 if (object
->res_count
== 0) {
3265 /* XXX get a reference for the deallocate below */
3266 vm_object_res_reference(backing_object
);
3268 #endif /* TASK_SWAPPER */
3269 vm_object_unlock(object
);
3270 vm_object_unlock(backing_object
);
3271 vm_object_deallocate(backing_object
);
3274 * Relock object. We don't have to reverify
3275 * its state since vm_object_collapse will
3276 * do that for us as it starts at the
3280 vm_object_lock(object
);
3288 * vm_object_collapse:
3290 * Perform an object collapse or an object bypass if appropriate.
3291 * The real work of collapsing and bypassing is performed in
3292 * the routines vm_object_do_collapse and vm_object_do_bypass.
3294 * Requires that the object be locked and the page queues be unlocked.
3297 static unsigned long vm_object_collapse_calls
= 0;
3298 static unsigned long vm_object_collapse_objects
= 0;
3299 static unsigned long vm_object_collapse_do_collapse
= 0;
3300 static unsigned long vm_object_collapse_do_bypass
= 0;
3301 __private_extern__
void
3303 register vm_object_t object
,
3304 register vm_object_offset_t hint_offset
,
3305 boolean_t can_bypass
)
3307 register vm_object_t backing_object
;
3308 register unsigned int rcount
;
3309 register unsigned int size
;
3310 vm_object_offset_t collapse_min_offset
;
3311 vm_object_offset_t collapse_max_offset
;
3313 vm_object_t original_object
;
3315 vm_object_collapse_calls
++;
3317 if (! vm_object_collapse_allowed
&&
3318 ! (can_bypass
&& vm_object_bypass_allowed
)) {
3322 XPR(XPR_VM_OBJECT
, "vm_object_collapse, obj 0x%X\n",
3323 (integer_t
)object
, 0,0,0,0);
3325 if (object
== VM_OBJECT_NULL
)
3328 original_object
= object
;
3331 vm_object_collapse_objects
++;
3333 * Verify that the conditions are right for either
3334 * collapse or bypass:
3338 * There is a backing object, and
3341 backing_object
= object
->shadow
;
3342 if (backing_object
== VM_OBJECT_NULL
) {
3343 if (object
!= original_object
) {
3344 vm_object_unlock(object
);
3350 * No pages in the object are currently
3351 * being paged out, and
3353 if (object
->paging_in_progress
!= 0 ||
3354 object
->absent_count
!= 0) {
3355 /* try and collapse the rest of the shadow chain */
3356 vm_object_lock(backing_object
);
3357 if (object
!= original_object
) {
3358 vm_object_unlock(object
);
3360 object
= backing_object
;
3364 vm_object_lock(backing_object
);
3368 * The backing object is not read_only,
3369 * and no pages in the backing object are
3370 * currently being paged out.
3371 * The backing object is internal.
3375 if (!backing_object
->internal
||
3376 backing_object
->paging_in_progress
!= 0) {
3377 /* try and collapse the rest of the shadow chain */
3378 if (object
!= original_object
) {
3379 vm_object_unlock(object
);
3381 object
= backing_object
;
3386 * The backing object can't be a copy-object:
3387 * the shadow_offset for the copy-object must stay
3388 * as 0. Furthermore (for the 'we have all the
3389 * pages' case), if we bypass backing_object and
3390 * just shadow the next object in the chain, old
3391 * pages from that object would then have to be copied
3392 * BOTH into the (former) backing_object and into the
3395 if (backing_object
->shadow
!= VM_OBJECT_NULL
&&
3396 backing_object
->shadow
->copy
== backing_object
) {
3397 /* try and collapse the rest of the shadow chain */
3398 if (object
!= original_object
) {
3399 vm_object_unlock(object
);
3401 object
= backing_object
;
3406 * We can now try to either collapse the backing
3407 * object (if the parent is the only reference to
3408 * it) or (perhaps) remove the parent's reference
3411 * If there is exactly one reference to the backing
3412 * object, we may be able to collapse it into the
3415 * If MACH_PAGEMAP is defined:
3416 * The parent must not have a pager created for it,
3417 * since collapsing a backing_object dumps new pages
3418 * into the parent that its pager doesn't know about
3419 * (and the collapse code can't merge the existence
3422 * As long as one of the objects is still not known
3423 * to the pager, we can collapse them.
3425 if (backing_object
->ref_count
== 1 &&
3426 (!object
->pager_created
3428 || !backing_object
->pager_created
3429 #endif /*!MACH_PAGEMAP */
3430 ) && vm_object_collapse_allowed
) {
3433 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3434 (integer_t
)backing_object
, (integer_t
)object
,
3435 (integer_t
)backing_object
->pager
,
3436 (integer_t
)backing_object
->pager_control
, 0);
3439 * We need the cache lock for collapsing,
3440 * but we must not deadlock.
3443 if (! vm_object_cache_lock_try()) {
3444 if (object
!= original_object
) {
3445 vm_object_unlock(object
);
3447 vm_object_unlock(backing_object
);
3453 * We can't collapse the object if it contains
3454 * any encypted page, because the encryption key
3455 * includes the <object,offset> info. We can't
3456 * drop the object lock in vm_object_do_collapse()
3457 * so we can't decrypt the page there either.
3459 if (vm_pages_encrypted
) {
3460 collapse_min_offset
= object
->shadow_offset
;
3461 collapse_max_offset
=
3462 object
->shadow_offset
+ object
->size
;
3463 queue_iterate(&backing_object
->memq
,
3464 page
, vm_page_t
, listq
) {
3465 if (page
->encrypted
&&
3467 collapse_min_offset
) &&
3469 collapse_max_offset
)) {
3471 * We found an encrypted page
3472 * in the backing object,
3473 * within the range covered
3474 * by the parent object: we can
3475 * not collapse them.
3477 vm_object_collapse_encrypted
++;
3478 vm_object_cache_unlock();
3485 * Collapse the object with its backing
3486 * object, and try again with the object's
3487 * new backing object.
3490 vm_object_do_collapse(object
, backing_object
);
3491 vm_object_collapse_do_collapse
++;
3497 * Collapsing the backing object was not possible
3498 * or permitted, so let's try bypassing it.
3501 if (! (can_bypass
&& vm_object_bypass_allowed
)) {
3502 /* try and collapse the rest of the shadow chain */
3503 if (object
!= original_object
) {
3504 vm_object_unlock(object
);
3506 object
= backing_object
;
3512 * If the object doesn't have all its pages present,
3513 * we have to make sure no pages in the backing object
3514 * "show through" before bypassing it.
3516 size
= atop(object
->size
);
3517 rcount
= object
->resident_page_count
;
3518 if (rcount
!= size
) {
3519 vm_object_offset_t offset
;
3520 vm_object_offset_t backing_offset
;
3521 unsigned int backing_rcount
;
3522 unsigned int lookups
= 0;
3525 * If the backing object has a pager but no pagemap,
3526 * then we cannot bypass it, because we don't know
3527 * what pages it has.
3529 if (backing_object
->pager_created
3531 && (backing_object
->existence_map
== VM_EXTERNAL_NULL
)
3532 #endif /* MACH_PAGEMAP */
3534 /* try and collapse the rest of the shadow chain */
3535 if (object
!= original_object
) {
3536 vm_object_unlock(object
);
3538 object
= backing_object
;
3543 * If the object has a pager but no pagemap,
3544 * then we cannot bypass it, because we don't know
3545 * what pages it has.
3547 if (object
->pager_created
3549 && (object
->existence_map
== VM_EXTERNAL_NULL
)
3550 #endif /* MACH_PAGEMAP */
3552 /* try and collapse the rest of the shadow chain */
3553 if (object
!= original_object
) {
3554 vm_object_unlock(object
);
3556 object
= backing_object
;
3561 * If all of the pages in the backing object are
3562 * shadowed by the parent object, the parent
3563 * object no longer has to shadow the backing
3564 * object; it can shadow the next one in the
3567 * If the backing object has existence info,
3568 * we must check examine its existence info
3573 backing_offset
= object
->shadow_offset
;
3574 backing_rcount
= backing_object
->resident_page_count
;
3576 #define EXISTS_IN_OBJECT(obj, off, rc) \
3577 (vm_external_state_get((obj)->existence_map, \
3578 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3579 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3582 * Check the hint location first
3583 * (since it is often the quickest way out of here).
3585 if (object
->cow_hint
!= ~(vm_offset_t
)0)
3586 hint_offset
= (vm_object_offset_t
)object
->cow_hint
;
3588 hint_offset
= (hint_offset
> 8 * PAGE_SIZE_64
) ?
3589 (hint_offset
- 8 * PAGE_SIZE_64
) : 0;
3591 if (EXISTS_IN_OBJECT(backing_object
, hint_offset
+
3592 backing_offset
, backing_rcount
) &&
3593 !EXISTS_IN_OBJECT(object
, hint_offset
, rcount
)) {
3594 /* dependency right at the hint */
3595 object
->cow_hint
= (vm_offset_t
)hint_offset
;
3596 /* try and collapse the rest of the shadow chain */
3597 if (object
!= original_object
) {
3598 vm_object_unlock(object
);
3600 object
= backing_object
;
3605 * If the object's window onto the backing_object
3606 * is large compared to the number of resident
3607 * pages in the backing object, it makes sense to
3608 * walk the backing_object's resident pages first.
3610 * NOTE: Pages may be in both the existence map and
3611 * resident. So, we can't permanently decrement
3612 * the rcount here because the second loop may
3613 * find the same pages in the backing object'
3614 * existence map that we found here and we would
3615 * double-decrement the rcount. We also may or
3616 * may not have found the
3618 if (backing_rcount
&& size
>
3619 ((backing_object
->existence_map
) ?
3620 backing_rcount
: (backing_rcount
>> 1))) {
3621 unsigned int rc
= rcount
;
3624 backing_rcount
= backing_object
->resident_page_count
;
3625 p
= (vm_page_t
)queue_first(&backing_object
->memq
);
3627 /* Until we get more than one lookup lock */
3628 if (lookups
> 256) {
3633 offset
= (p
->offset
- backing_offset
);
3634 if (offset
< object
->size
&&
3635 offset
!= hint_offset
&&
3636 !EXISTS_IN_OBJECT(object
, offset
, rc
)) {
3637 /* found a dependency */
3638 object
->cow_hint
= (vm_offset_t
)offset
;
3641 p
= (vm_page_t
) queue_next(&p
->listq
);
3643 } while (--backing_rcount
);
3644 if (backing_rcount
!= 0 ) {
3645 /* try and collapse the rest of the shadow chain */
3646 if (object
!= original_object
) {
3647 vm_object_unlock(object
);
3649 object
= backing_object
;
3655 * Walk through the offsets looking for pages in the
3656 * backing object that show through to the object.
3658 if (backing_rcount
|| backing_object
->existence_map
) {
3659 offset
= hint_offset
;
3662 (offset
+ PAGE_SIZE_64
< object
->size
) ?
3663 (offset
+ PAGE_SIZE_64
) : 0) != hint_offset
) {
3665 /* Until we get more than one lookup lock */
3666 if (lookups
> 256) {
3671 if (EXISTS_IN_OBJECT(backing_object
, offset
+
3672 backing_offset
, backing_rcount
) &&
3673 !EXISTS_IN_OBJECT(object
, offset
, rcount
)) {
3674 /* found a dependency */
3675 object
->cow_hint
= (vm_offset_t
)offset
;
3679 if (offset
!= hint_offset
) {
3680 /* try and collapse the rest of the shadow chain */
3681 if (object
!= original_object
) {
3682 vm_object_unlock(object
);
3684 object
= backing_object
;
3690 /* reset the offset hint for any objects deeper in the chain */
3691 object
->cow_hint
= (vm_offset_t
)0;
3694 * All interesting pages in the backing object
3695 * already live in the parent or its pager.
3696 * Thus we can bypass the backing object.
3699 vm_object_do_bypass(object
, backing_object
);
3700 vm_object_collapse_do_bypass
++;
3703 * Try again with this object's new backing object.
3709 if (object
!= original_object
) {
3710 vm_object_unlock(object
);
3715 * Routine: vm_object_page_remove: [internal]
3717 * Removes all physical pages in the specified
3718 * object range from the object's list of pages.
3720 * In/out conditions:
3721 * The object must be locked.
3722 * The object must not have paging_in_progress, usually
3723 * guaranteed by not having a pager.
3725 unsigned int vm_object_page_remove_lookup
= 0;
3726 unsigned int vm_object_page_remove_iterate
= 0;
3728 __private_extern__
void
3729 vm_object_page_remove(
3730 register vm_object_t object
,
3731 register vm_object_offset_t start
,
3732 register vm_object_offset_t end
)
3734 register vm_page_t p
, next
;
3737 * One and two page removals are most popular.
3738 * The factor of 16 here is somewhat arbitrary.
3739 * It balances vm_object_lookup vs iteration.
3742 if (atop_64(end
- start
) < (unsigned)object
->resident_page_count
/16) {
3743 vm_object_page_remove_lookup
++;
3745 for (; start
< end
; start
+= PAGE_SIZE_64
) {
3746 p
= vm_page_lookup(object
, start
);
3747 if (p
!= VM_PAGE_NULL
) {
3748 assert(!p
->cleaning
&& !p
->pageout
);
3750 pmap_disconnect(p
->phys_page
);
3755 vm_object_page_remove_iterate
++;
3757 p
= (vm_page_t
) queue_first(&object
->memq
);
3758 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
3759 next
= (vm_page_t
) queue_next(&p
->listq
);
3760 if ((start
<= p
->offset
) && (p
->offset
< end
)) {
3761 assert(!p
->cleaning
&& !p
->pageout
);
3763 pmap_disconnect(p
->phys_page
);
3773 * Routine: vm_object_coalesce
3774 * Function: Coalesces two objects backing up adjoining
3775 * regions of memory into a single object.
3777 * returns TRUE if objects were combined.
3779 * NOTE: Only works at the moment if the second object is NULL -
3780 * if it's not, which object do we lock first?
3783 * prev_object First object to coalesce
3784 * prev_offset Offset into prev_object
3785 * next_object Second object into coalesce
3786 * next_offset Offset into next_object
3788 * prev_size Size of reference to prev_object
3789 * next_size Size of reference to next_object
3792 * The object(s) must *not* be locked. The map must be locked
3793 * to preserve the reference to the object(s).
3795 static int vm_object_coalesce_count
= 0;
3797 __private_extern__ boolean_t
3799 register vm_object_t prev_object
,
3800 vm_object_t next_object
,
3801 vm_object_offset_t prev_offset
,
3802 __unused vm_object_offset_t next_offset
,
3803 vm_object_size_t prev_size
,
3804 vm_object_size_t next_size
)
3806 vm_object_size_t newsize
;
3812 if (next_object
!= VM_OBJECT_NULL
) {
3816 if (prev_object
== VM_OBJECT_NULL
) {
3821 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3822 (integer_t
)prev_object
, prev_offset
, prev_size
, next_size
, 0);
3824 vm_object_lock(prev_object
);
3827 * Try to collapse the object first
3829 vm_object_collapse(prev_object
, prev_offset
, TRUE
);
3832 * Can't coalesce if pages not mapped to
3833 * prev_entry may be in use any way:
3834 * . more than one reference
3836 * . shadows another object
3837 * . has a copy elsewhere
3839 * . paging references (pages might be in page-list)
3842 if ((prev_object
->ref_count
> 1) ||
3843 prev_object
->pager_created
||
3844 (prev_object
->shadow
!= VM_OBJECT_NULL
) ||
3845 (prev_object
->copy
!= VM_OBJECT_NULL
) ||
3846 (prev_object
->true_share
!= FALSE
) ||
3847 (prev_object
->purgable
!= VM_OBJECT_NONPURGABLE
) ||
3848 (prev_object
->paging_in_progress
!= 0)) {
3849 vm_object_unlock(prev_object
);
3853 vm_object_coalesce_count
++;
3856 * Remove any pages that may still be in the object from
3857 * a previous deallocation.
3859 vm_object_page_remove(prev_object
,
3860 prev_offset
+ prev_size
,
3861 prev_offset
+ prev_size
+ next_size
);
3864 * Extend the object if necessary.
3866 newsize
= prev_offset
+ prev_size
+ next_size
;
3867 if (newsize
> prev_object
->size
) {
3870 * We cannot extend an object that has existence info,
3871 * since the existence info might then fail to cover
3872 * the entire object.
3874 * This assertion must be true because the object
3875 * has no pager, and we only create existence info
3876 * for objects with pagers.
3878 assert(prev_object
->existence_map
== VM_EXTERNAL_NULL
);
3879 #endif /* MACH_PAGEMAP */
3880 prev_object
->size
= newsize
;
3883 vm_object_unlock(prev_object
);
3888 * Attach a set of physical pages to an object, so that they can
3889 * be mapped by mapping the object. Typically used to map IO memory.
3891 * The mapping function and its private data are used to obtain the
3892 * physical addresses for each page to be mapped.
3897 vm_object_offset_t offset
,
3898 vm_object_size_t size
,
3899 vm_object_offset_t (*map_fn
)(void *map_fn_data
,
3900 vm_object_offset_t offset
),
3901 void *map_fn_data
) /* private to map_fn */
3907 vm_object_offset_t addr
;
3909 num_pages
= atop_64(size
);
3911 for (i
= 0; i
< num_pages
; i
++, offset
+= PAGE_SIZE_64
) {
3913 addr
= (*map_fn
)(map_fn_data
, offset
);
3915 while ((m
= vm_page_grab_fictitious()) == VM_PAGE_NULL
)
3916 vm_page_more_fictitious();
3918 vm_object_lock(object
);
3919 if ((old_page
= vm_page_lookup(object
, offset
))
3922 vm_page_lock_queues();
3923 vm_page_free(old_page
);
3924 vm_page_unlock_queues();
3927 vm_page_init(m
, addr
);
3928 /* private normally requires lock_queues but since we */
3929 /* are initializing the page, its not necessary here */
3930 m
->private = TRUE
; /* don`t free page */
3932 vm_page_insert(m
, object
, offset
);
3934 PAGE_WAKEUP_DONE(m
);
3935 vm_object_unlock(object
);
3939 #include <mach_kdb.h>
3942 #include <ddb/db_output.h>
3943 #include <vm/vm_print.h>
3945 #define printf kdbprintf
3947 extern boolean_t
vm_object_cached(
3948 vm_object_t object
);
3950 extern void print_bitstring(
3953 boolean_t vm_object_print_pages
= FALSE
;
3959 printf("%c%c%c%c%c%c%c%c",
3960 ((byte
& (1 << 0)) ? '1' : '0'),
3961 ((byte
& (1 << 1)) ? '1' : '0'),
3962 ((byte
& (1 << 2)) ? '1' : '0'),
3963 ((byte
& (1 << 3)) ? '1' : '0'),
3964 ((byte
& (1 << 4)) ? '1' : '0'),
3965 ((byte
& (1 << 5)) ? '1' : '0'),
3966 ((byte
& (1 << 6)) ? '1' : '0'),
3967 ((byte
& (1 << 7)) ? '1' : '0'));
3972 register vm_object_t object
)
3974 register vm_object_t o
;
3976 queue_iterate(&vm_object_cached_list
, o
, vm_object_t
, cached_list
) {
3986 * vm_external_print: [ debug ]
3990 vm_external_map_t emap
,
3993 if (emap
== VM_EXTERNAL_NULL
) {
3996 vm_size_t existence_size
= stob(size
);
3997 printf("{ size=%d, map=[", existence_size
);
3998 if (existence_size
> 0) {
3999 print_bitstring(emap
[0]);
4001 if (existence_size
> 1) {
4002 print_bitstring(emap
[1]);
4004 if (existence_size
> 2) {
4006 print_bitstring(emap
[existence_size
-1]);
4012 #endif /* MACH_PAGEMAP */
4019 int orig_db_indent
= db_indent
;
4022 if (object
== VM_OBJECT_NULL
) {
4023 db_indent
= orig_db_indent
;
4029 iprintf("object 0x%x", object
);
4030 printf(", shadow=0x%x", object
->shadow
);
4031 printf(", copy=0x%x", object
->copy
);
4032 printf(", pager=0x%x", object
->pager
);
4033 printf(", ref=%d\n", object
->ref_count
);
4036 object
= object
->shadow
;
4042 * vm_object_print: [ debug ]
4047 __unused boolean_t have_addr
,
4048 __unused
int arg_count
,
4049 __unused
char *modif
)
4052 register vm_page_t p
;
4057 object
= (vm_object_t
) (long) db_addr
;
4058 if (object
== VM_OBJECT_NULL
)
4061 iprintf("object 0x%x\n", object
);
4065 iprintf("size=0x%x", object
->size
);
4066 printf(", cluster=0x%x", object
->cluster_size
);
4067 printf(", memq_hint=%p", object
->memq_hint
);
4068 printf(", ref_count=%d\n", object
->ref_count
);
4071 printf("res_count=%d, ", object
->res_count
);
4072 #endif /* TASK_SWAPPER */
4073 printf("resident_page_count=%d\n", object
->resident_page_count
);
4075 iprintf("shadow=0x%x", object
->shadow
);
4076 if (object
->shadow
) {
4078 vm_object_t shadow
= object
;
4079 while((shadow
= shadow
->shadow
))
4081 printf(" (depth %d)", i
);
4083 printf(", copy=0x%x", object
->copy
);
4084 printf(", shadow_offset=0x%x", object
->shadow_offset
);
4085 printf(", last_alloc=0x%x\n", object
->last_alloc
);
4087 iprintf("pager=0x%x", object
->pager
);
4088 printf(", paging_offset=0x%x", object
->paging_offset
);
4089 printf(", pager_control=0x%x\n", object
->pager_control
);
4091 iprintf("copy_strategy=%d[", object
->copy_strategy
);
4092 switch (object
->copy_strategy
) {
4093 case MEMORY_OBJECT_COPY_NONE
:
4094 printf("copy_none");
4097 case MEMORY_OBJECT_COPY_CALL
:
4098 printf("copy_call");
4101 case MEMORY_OBJECT_COPY_DELAY
:
4102 printf("copy_delay");
4105 case MEMORY_OBJECT_COPY_SYMMETRIC
:
4106 printf("copy_symmetric");
4109 case MEMORY_OBJECT_COPY_INVALID
:
4110 printf("copy_invalid");
4117 printf(", absent_count=%d\n", object
->absent_count
);
4119 iprintf("all_wanted=0x%x<", object
->all_wanted
);
4121 if (vm_object_wanted(object
, VM_OBJECT_EVENT_INITIALIZED
)) {
4122 printf("%sinit", s
);
4125 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGER_READY
)) {
4126 printf("%sready", s
);
4129 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGING_IN_PROGRESS
)) {
4130 printf("%spaging", s
);
4133 if (vm_object_wanted(object
, VM_OBJECT_EVENT_ABSENT_COUNT
)) {
4134 printf("%sabsent", s
);
4137 if (vm_object_wanted(object
, VM_OBJECT_EVENT_LOCK_IN_PROGRESS
)) {
4138 printf("%slock", s
);
4141 if (vm_object_wanted(object
, VM_OBJECT_EVENT_UNCACHING
)) {
4142 printf("%suncaching", s
);
4145 if (vm_object_wanted(object
, VM_OBJECT_EVENT_COPY_CALL
)) {
4146 printf("%scopy_call", s
);
4149 if (vm_object_wanted(object
, VM_OBJECT_EVENT_CACHING
)) {
4150 printf("%scaching", s
);
4154 printf(", paging_in_progress=%d\n", object
->paging_in_progress
);
4156 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4157 (object
->pager_created
? "" : "!"),
4158 (object
->pager_initialized
? "" : "!"),
4159 (object
->pager_ready
? "" : "!"),
4160 (object
->can_persist
? "" : "!"),
4161 (object
->pager_trusted
? "" : "!"),
4162 (object
->pageout
? "" : "!"),
4163 (object
->internal
? "internal" : "external"),
4164 (object
->temporary
? "temporary" : "permanent"));
4165 iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
4166 (object
->alive
? "" : "!"),
4167 ((object
->purgable
!= VM_OBJECT_NONPURGABLE
) ? "" : "!"),
4168 ((object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
) ? "" : "!"),
4169 ((object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) ? "" : "!"),
4170 (object
->shadowed
? "" : "!"),
4171 (vm_object_cached(object
) ? "" : "!"),
4172 (object
->private ? "" : "!"));
4173 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4174 (object
->advisory_pageout
? "" : "!"),
4175 (object
->silent_overwrite
? "" : "!"));
4178 iprintf("existence_map=");
4179 vm_external_print(object
->existence_map
, object
->size
);
4180 #endif /* MACH_PAGEMAP */
4182 iprintf("paging_object=0x%x\n", object
->paging_object
);
4183 #endif /* MACH_ASSERT */
4185 if (vm_object_print_pages
) {
4187 p
= (vm_page_t
) queue_first(&object
->memq
);
4188 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
4190 iprintf("memory:=");
4191 } else if (count
== 2) {
4200 printf("(off=0x%llX,page=%p)", p
->offset
, p
);
4201 p
= (vm_page_t
) queue_next(&p
->listq
);
4212 * vm_object_find [ debug ]
4214 * Find all tasks which reference the given vm_object.
4217 boolean_t
vm_object_find(vm_object_t object
);
4218 boolean_t vm_object_print_verbose
= FALSE
;
4226 vm_map_entry_t entry
;
4227 processor_set_t pset
= &default_pset
;
4228 boolean_t found
= FALSE
;
4230 queue_iterate(&pset
->tasks
, task
, task_t
, pset_tasks
) {
4232 for (entry
= vm_map_first_entry(map
);
4233 entry
&& entry
!= vm_map_to_entry(map
);
4234 entry
= entry
->vme_next
) {
4239 * For the time being skip submaps,
4240 * only the kernel can have submaps,
4241 * and unless we are interested in
4242 * kernel objects, we can simply skip
4243 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4244 * for a full solution.
4246 if (entry
->is_sub_map
)
4249 obj
= entry
->object
.vm_object
;
4253 while (obj
!= VM_OBJECT_NULL
) {
4254 if (obj
== object
) {
4256 printf("TASK\t\tMAP\t\tENTRY\n");
4259 printf("0x%x\t0x%x\t0x%x\n",
4270 #endif /* MACH_KDB */
4273 vm_object_populate_with_private(
4275 vm_object_offset_t offset
,
4280 vm_object_offset_t base_offset
;
4283 if(!object
->private)
4284 return KERN_FAILURE
;
4286 base_page
= phys_page
;
4288 vm_object_lock(object
);
4289 if(!object
->phys_contiguous
) {
4291 if((base_offset
= trunc_page_64(offset
)) != offset
) {
4292 vm_object_unlock(object
);
4293 return KERN_FAILURE
;
4295 base_offset
+= object
->paging_offset
;
4297 m
= vm_page_lookup(object
, base_offset
);
4298 if(m
!= VM_PAGE_NULL
) {
4300 vm_page_lock_queues();
4301 m
->fictitious
= FALSE
;
4303 m
->phys_page
= base_page
;
4309 object
->absent_count
++;
4311 m
->list_req_pending
= TRUE
;
4312 vm_page_unlock_queues();
4313 } else if (m
->phys_page
!= base_page
) {
4314 /* pmap call to clear old mapping */
4315 pmap_disconnect(m
->phys_page
);
4316 m
->phys_page
= base_page
;
4321 * We're not pointing to the same
4322 * physical page any longer and the
4323 * contents of the new one are not
4324 * supposed to be encrypted.
4325 * XXX What happens to the original
4326 * physical page. Is it lost ?
4328 m
->encrypted
= FALSE
;
4331 while ((m
= vm_page_grab_fictitious())
4333 vm_page_more_fictitious();
4334 vm_page_lock_queues();
4335 m
->fictitious
= FALSE
;
4337 m
->phys_page
= base_page
;
4338 m
->list_req_pending
= TRUE
;
4341 object
->absent_count
++;
4342 vm_page_unlock_queues();
4343 vm_page_insert(m
, object
, base_offset
);
4345 base_page
++; /* Go to the next physical page */
4346 base_offset
+= PAGE_SIZE
;
4350 /* NOTE: we should check the original settings here */
4351 /* if we have a size > zero a pmap call should be made */
4352 /* to disable the range */
4356 /* shadows on contiguous memory are not allowed */
4357 /* we therefore can use the offset field */
4358 object
->shadow_offset
= (vm_object_offset_t
)(phys_page
<< 12);
4359 object
->size
= size
;
4361 vm_object_unlock(object
);
4362 return KERN_SUCCESS
;
4366 * memory_object_free_from_cache:
4368 * Walk the vm_object cache list, removing and freeing vm_objects
4369 * which are backed by the pager identified by the caller, (pager_ops).
4370 * Remove up to "count" objects, if there are that may available
4373 * Walk the list at most once, return the number of vm_objects
4377 __private_extern__ kern_return_t
4378 memory_object_free_from_cache(
4379 __unused host_t host
,
4380 memory_object_pager_ops_t pager_ops
,
4384 int object_released
= 0;
4386 register vm_object_t object
= VM_OBJECT_NULL
;
4390 if(host == HOST_NULL)
4391 return(KERN_INVALID_ARGUMENT);
4395 vm_object_cache_lock();
4397 queue_iterate(&vm_object_cached_list
, object
,
4398 vm_object_t
, cached_list
) {
4399 if (object
->pager
&&
4400 (pager_ops
== object
->pager
->mo_pager_ops
)) {
4401 vm_object_lock(object
);
4402 queue_remove(&vm_object_cached_list
, object
,
4403 vm_object_t
, cached_list
);
4404 vm_object_cached_count
--;
4407 * Since this object is in the cache, we know
4408 * that it is initialized and has only a pager's
4409 * (implicit) reference. Take a reference to avoid
4410 * recursive deallocations.
4413 assert(object
->pager_initialized
);
4414 assert(object
->ref_count
== 0);
4415 object
->ref_count
++;
4418 * Terminate the object.
4419 * If the object had a shadow, we let
4420 * vm_object_deallocate deallocate it.
4421 * "pageout" objects have a shadow, but
4422 * maintain a "paging reference" rather
4423 * than a normal reference.
4424 * (We are careful here to limit recursion.)
4426 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
4427 if ((vm_object_terminate(object
) == KERN_SUCCESS
)
4428 && (shadow
!= VM_OBJECT_NULL
)) {
4429 vm_object_deallocate(shadow
);
4432 if(object_released
++ == *count
)
4433 return KERN_SUCCESS
;
4437 vm_object_cache_unlock();
4438 *count
= object_released
;
4439 return KERN_SUCCESS
;
4445 memory_object_create_named(
4446 memory_object_t pager
,
4447 memory_object_offset_t size
,
4448 memory_object_control_t
*control
)
4451 vm_object_hash_entry_t entry
;
4453 *control
= MEMORY_OBJECT_CONTROL_NULL
;
4454 if (pager
== MEMORY_OBJECT_NULL
)
4455 return KERN_INVALID_ARGUMENT
;
4457 vm_object_cache_lock();
4458 entry
= vm_object_hash_lookup(pager
, FALSE
);
4459 if ((entry
!= VM_OBJECT_HASH_ENTRY_NULL
) &&
4460 (entry
->object
!= VM_OBJECT_NULL
)) {
4461 if (entry
->object
->named
== TRUE
)
4462 panic("memory_object_create_named: caller already holds the right"); }
4464 vm_object_cache_unlock();
4465 if ((object
= vm_object_enter(pager
, size
, FALSE
, FALSE
, TRUE
))
4466 == VM_OBJECT_NULL
) {
4467 return(KERN_INVALID_OBJECT
);
4470 /* wait for object (if any) to be ready */
4471 if (object
!= VM_OBJECT_NULL
) {
4472 vm_object_lock(object
);
4473 object
->named
= TRUE
;
4474 while (!object
->pager_ready
) {
4475 vm_object_sleep(object
,
4476 VM_OBJECT_EVENT_PAGER_READY
,
4479 *control
= object
->pager_control
;
4480 vm_object_unlock(object
);
4482 return (KERN_SUCCESS
);
4487 * Routine: memory_object_recover_named [user interface]
4489 * Attempt to recover a named reference for a VM object.
4490 * VM will verify that the object has not already started
4491 * down the termination path, and if it has, will optionally
4492 * wait for that to finish.
4494 * KERN_SUCCESS - we recovered a named reference on the object
4495 * KERN_FAILURE - we could not recover a reference (object dead)
4496 * KERN_INVALID_ARGUMENT - bad memory object control
4499 memory_object_recover_named(
4500 memory_object_control_t control
,
4501 boolean_t wait_on_terminating
)
4505 vm_object_cache_lock();
4506 object
= memory_object_control_to_vm_object(control
);
4507 if (object
== VM_OBJECT_NULL
) {
4508 vm_object_cache_unlock();
4509 return (KERN_INVALID_ARGUMENT
);
4513 vm_object_lock(object
);
4515 if (object
->terminating
&& wait_on_terminating
) {
4516 vm_object_cache_unlock();
4517 vm_object_wait(object
,
4518 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
4520 vm_object_cache_lock();
4524 if (!object
->alive
) {
4525 vm_object_cache_unlock();
4526 vm_object_unlock(object
);
4527 return KERN_FAILURE
;
4530 if (object
->named
== TRUE
) {
4531 vm_object_cache_unlock();
4532 vm_object_unlock(object
);
4533 return KERN_SUCCESS
;
4536 if((object
->ref_count
== 0) && (!object
->terminating
)){
4537 queue_remove(&vm_object_cached_list
, object
,
4538 vm_object_t
, cached_list
);
4539 vm_object_cached_count
--;
4540 XPR(XPR_VM_OBJECT_CACHE
,
4541 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4543 (integer_t
)vm_object_cached_list
.next
,
4544 (integer_t
)vm_object_cached_list
.prev
, 0,0);
4547 vm_object_cache_unlock();
4549 object
->named
= TRUE
;
4550 object
->ref_count
++;
4551 vm_object_res_reference(object
);
4552 while (!object
->pager_ready
) {
4553 vm_object_sleep(object
,
4554 VM_OBJECT_EVENT_PAGER_READY
,
4557 vm_object_unlock(object
);
4558 return (KERN_SUCCESS
);
4563 * vm_object_release_name:
4565 * Enforces name semantic on memory_object reference count decrement
4566 * This routine should not be called unless the caller holds a name
4567 * reference gained through the memory_object_create_named.
4569 * If the TERMINATE_IDLE flag is set, the call will return if the
4570 * reference count is not 1. i.e. idle with the only remaining reference
4572 * If the decision is made to proceed the name field flag is set to
4573 * false and the reference count is decremented. If the RESPECT_CACHE
4574 * flag is set and the reference count has gone to zero, the
4575 * memory_object is checked to see if it is cacheable otherwise when
4576 * the reference count is zero, it is simply terminated.
4579 __private_extern__ kern_return_t
4580 vm_object_release_name(
4585 boolean_t original_object
= TRUE
;
4587 while (object
!= VM_OBJECT_NULL
) {
4590 * The cache holds a reference (uncounted) to
4591 * the object. We must locke it before removing
4596 vm_object_cache_lock();
4597 vm_object_lock(object
);
4598 assert(object
->alive
);
4600 assert(object
->named
);
4601 assert(object
->ref_count
> 0);
4604 * We have to wait for initialization before
4605 * destroying or caching the object.
4608 if (object
->pager_created
&& !object
->pager_initialized
) {
4609 assert(!object
->can_persist
);
4610 vm_object_assert_wait(object
,
4611 VM_OBJECT_EVENT_INITIALIZED
,
4613 vm_object_unlock(object
);
4614 vm_object_cache_unlock();
4615 thread_block(THREAD_CONTINUE_NULL
);
4619 if (((object
->ref_count
> 1)
4620 && (flags
& MEMORY_OBJECT_TERMINATE_IDLE
))
4621 || (object
->terminating
)) {
4622 vm_object_unlock(object
);
4623 vm_object_cache_unlock();
4624 return KERN_FAILURE
;
4626 if (flags
& MEMORY_OBJECT_RELEASE_NO_OP
) {
4627 vm_object_unlock(object
);
4628 vm_object_cache_unlock();
4629 return KERN_SUCCESS
;
4633 if ((flags
& MEMORY_OBJECT_RESPECT_CACHE
) &&
4634 (object
->ref_count
== 1)) {
4636 object
->named
= FALSE
;
4637 vm_object_unlock(object
);
4638 vm_object_cache_unlock();
4639 /* let vm_object_deallocate push this thing into */
4640 /* the cache, if that it is where it is bound */
4641 vm_object_deallocate(object
);
4642 return KERN_SUCCESS
;
4644 VM_OBJ_RES_DECR(object
);
4645 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
4646 if(object
->ref_count
== 1) {
4647 if(vm_object_terminate(object
) != KERN_SUCCESS
) {
4648 if(original_object
) {
4649 return KERN_FAILURE
;
4651 return KERN_SUCCESS
;
4654 if (shadow
!= VM_OBJECT_NULL
) {
4655 original_object
= FALSE
;
4659 return KERN_SUCCESS
;
4661 object
->ref_count
--;
4662 assert(object
->ref_count
> 0);
4664 object
->named
= FALSE
;
4665 vm_object_unlock(object
);
4666 vm_object_cache_unlock();
4667 return KERN_SUCCESS
;
4672 return KERN_FAILURE
;
4676 __private_extern__ kern_return_t
4677 vm_object_lock_request(
4679 vm_object_offset_t offset
,
4680 vm_object_size_t size
,
4681 memory_object_return_t should_return
,
4685 __unused boolean_t should_flush
;
4687 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
4689 XPR(XPR_MEMORY_OBJECT
,
4690 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4691 (integer_t
)object
, offset
, size
,
4692 (((should_return
&1)<<1)|should_flush
), prot
);
4695 * Check for bogus arguments.
4697 if (object
== VM_OBJECT_NULL
)
4698 return (KERN_INVALID_ARGUMENT
);
4700 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
4701 return (KERN_INVALID_ARGUMENT
);
4703 size
= round_page_64(size
);
4706 * Lock the object, and acquire a paging reference to
4707 * prevent the memory_object reference from being released.
4709 vm_object_lock(object
);
4710 vm_object_paging_begin(object
);
4712 (void)vm_object_update(object
,
4713 offset
, size
, NULL
, NULL
, should_return
, flags
, prot
);
4715 vm_object_paging_end(object
);
4716 vm_object_unlock(object
);
4718 return (KERN_SUCCESS
);
4722 * Empty a purgable object by grabbing the physical pages assigned to it and
4723 * putting them on the free queue without writing them to backing store, etc.
4724 * When the pages are next touched they will be demand zero-fill pages. We
4725 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
4726 * skip referenced/dirty pages, pages on the active queue, etc. We're more
4727 * than happy to grab these since this is a purgable object. We mark the
4728 * object as "empty" after reaping its pages.
4730 * On entry the object and page queues are locked, the object must be a
4731 * purgable object with no delayed copies pending.
4734 vm_object_purge(vm_object_t object
)
4737 unsigned int num_purged_pages
;
4738 vm_page_t local_freeq
;
4739 unsigned long local_freed
;
4740 int purge_loop_quota
;
4741 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
4742 #define PURGE_BATCH_FREE_LIMIT 50
4743 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
4744 #define PURGE_LOOP_QUOTA 100
4746 num_purged_pages
= 0;
4747 if (object
->purgable
== VM_OBJECT_NONPURGABLE
)
4748 return num_purged_pages
;
4750 object
->purgable
= VM_OBJECT_PURGABLE_EMPTY
;
4752 assert(object
->copy
== VM_OBJECT_NULL
);
4753 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
4754 purge_loop_quota
= PURGE_LOOP_QUOTA
;
4756 local_freeq
= VM_PAGE_NULL
;
4760 * Go through the object's resident pages and try and discard them.
4762 next
= (vm_page_t
)queue_first(&object
->memq
);
4763 while (!queue_end(&object
->memq
, (queue_entry_t
)next
)) {
4765 next
= (vm_page_t
)queue_next(&next
->listq
);
4767 if (purge_loop_quota
-- == 0) {
4769 * Avoid holding the page queues lock for too long.
4770 * Let someone else take it for a while if needed.
4771 * Keep holding the object's lock to guarantee that
4772 * the object's page list doesn't change under us
4775 if (local_freeq
!= VM_PAGE_NULL
) {
4777 * Flush our queue of pages to free.
4779 vm_page_free_list(local_freeq
);
4780 local_freeq
= VM_PAGE_NULL
;
4783 vm_page_unlock_queues();
4785 vm_page_lock_queues();
4787 /* resume with the current page and a new quota */
4788 purge_loop_quota
= PURGE_LOOP_QUOTA
;
4792 if (p
->busy
|| p
->cleaning
|| p
->laundry
||
4793 p
->list_req_pending
) {
4794 /* page is being acted upon, so don't mess with it */
4797 if (p
->wire_count
) {
4798 /* don't discard a wired page */
4803 /* clean up the object/offset table */
4807 /* update the object's count of absent pages */
4808 vm_object_absent_release(object
);
4811 /* we can discard this page */
4813 /* advertize that this page is in a transition state */
4816 if (p
->no_isync
== TRUE
) {
4817 /* the page hasn't been mapped yet */
4818 /* (optimization to delay the i-cache sync) */
4820 /* unmap the page */
4823 refmod_state
= pmap_disconnect(p
->phys_page
);
4824 if (refmod_state
& VM_MEM_MODIFIED
) {
4829 if (p
->dirty
|| p
->precious
) {
4830 /* we saved the cost of cleaning this page ! */
4832 vm_page_purged_count
++;
4835 /* remove page from active or inactive queue... */
4836 VM_PAGE_QUEUES_REMOVE(p
);
4838 /* ... and put it on our queue of pages to free */
4839 assert(!p
->laundry
);
4840 assert(p
->object
!= kernel_object
);
4841 assert(p
->pageq
.next
== NULL
&&
4842 p
->pageq
.prev
== NULL
);
4843 p
->pageq
.next
= (queue_entry_t
) local_freeq
;
4845 if (++local_freed
>= PURGE_BATCH_FREE_LIMIT
) {
4846 /* flush our queue of pages to free */
4847 vm_page_free_list(local_freeq
);
4848 local_freeq
= VM_PAGE_NULL
;
4853 /* flush our local queue of pages to free one last time */
4854 if (local_freeq
!= VM_PAGE_NULL
) {
4855 vm_page_free_list(local_freeq
);
4856 local_freeq
= VM_PAGE_NULL
;
4860 return num_purged_pages
;
4864 * vm_object_purgable_control() allows the caller to control and investigate the
4865 * state of a purgable object. A purgable object is created via a call to
4866 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will
4867 * never be coalesced with any other object -- even other purgable objects --
4868 * and will thus always remain a distinct object. A purgable object has
4869 * special semantics when its reference count is exactly 1. If its reference
4870 * count is greater than 1, then a purgable object will behave like a normal
4871 * object and attempts to use this interface will result in an error return
4872 * of KERN_INVALID_ARGUMENT.
4874 * A purgable object may be put into a "volatile" state which will make the
4875 * object's pages elligable for being reclaimed without paging to backing
4876 * store if the system runs low on memory. If the pages in a volatile
4877 * purgable object are reclaimed, the purgable object is said to have been
4878 * "emptied." When a purgable object is emptied the system will reclaim as
4879 * many pages from the object as it can in a convenient manner (pages already
4880 * en route to backing store or busy for other reasons are left as is). When
4881 * a purgable object is made volatile, its pages will generally be reclaimed
4882 * before other pages in the application's working set. This semantic is
4883 * generally used by applications which can recreate the data in the object
4884 * faster than it can be paged in. One such example might be media assets
4885 * which can be reread from a much faster RAID volume.
4887 * A purgable object may be designated as "non-volatile" which means it will
4888 * behave like all other objects in the system with pages being written to and
4889 * read from backing store as needed to satisfy system memory needs. If the
4890 * object was emptied before the object was made non-volatile, that fact will
4891 * be returned as the old state of the purgable object (see
4892 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
4893 * were reclaimed as part of emptying the object will be refaulted in as
4894 * zero-fill on demand. It is up to the application to note that an object
4895 * was emptied and recreate the objects contents if necessary. When a
4896 * purgable object is made non-volatile, its pages will generally not be paged
4897 * out to backing store in the immediate future. A purgable object may also
4898 * be manually emptied.
4900 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
4901 * volatile purgable object may be queried at any time. This information may
4902 * be used as a control input to let the application know when the system is
4903 * experiencing memory pressure and is reclaiming memory.
4905 * The specified address may be any address within the purgable object. If
4906 * the specified address does not represent any object in the target task's
4907 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
4908 * object containing the specified address is not a purgable object, then
4909 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
4912 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
4913 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
4914 * state is used to set the new state of the purgable object and return its
4915 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable
4916 * object is returned in the parameter state.
4918 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
4919 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
4920 * the non-volatile, volatile and volatile/empty states described above.
4921 * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
4922 * immediately reclaim as many pages in the object as can be conveniently
4923 * collected (some may have already been written to backing store or be
4926 * The process of making a purgable object non-volatile and determining its
4927 * previous state is atomic. Thus, if a purgable object is made
4928 * VM_PURGABLE_NONVOLATILE and the old state is returned as
4929 * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
4930 * completely intact and will remain so until the object is made volatile
4931 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
4932 * was reclaimed while it was in a volatile state and its previous contents
4936 * The object must be locked.
4939 vm_object_purgable_control(
4941 vm_purgable_t control
,
4947 if (object
== VM_OBJECT_NULL
) {
4949 * Object must already be present or it can't be purgable.
4951 return KERN_INVALID_ARGUMENT
;
4955 * Get current state of the purgable object.
4957 switch (object
->purgable
) {
4958 case VM_OBJECT_NONPURGABLE
:
4959 return KERN_INVALID_ARGUMENT
;
4961 case VM_OBJECT_PURGABLE_NONVOLATILE
:
4962 old_state
= VM_PURGABLE_NONVOLATILE
;
4965 case VM_OBJECT_PURGABLE_VOLATILE
:
4966 old_state
= VM_PURGABLE_VOLATILE
;
4969 case VM_OBJECT_PURGABLE_EMPTY
:
4970 old_state
= VM_PURGABLE_EMPTY
;
4974 old_state
= VM_PURGABLE_NONVOLATILE
;
4975 panic("Bad state (%d) for purgable object!\n",
4980 /* purgable cant have delayed copies - now or in the future */
4981 assert(object
->copy
== VM_OBJECT_NULL
);
4982 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
4985 * Execute the desired operation.
4987 if (control
== VM_PURGABLE_GET_STATE
) {
4989 return KERN_SUCCESS
;
4993 case VM_PURGABLE_NONVOLATILE
:
4994 vm_page_lock_queues();
4995 if (object
->purgable
!= VM_OBJECT_PURGABLE_NONVOLATILE
) {
4996 assert(vm_page_purgeable_count
>=
4997 object
->resident_page_count
);
4998 vm_page_purgeable_count
-= object
->resident_page_count
;
5001 object
->purgable
= VM_OBJECT_PURGABLE_NONVOLATILE
;
5004 * If the object wasn't emptied, then mark all pages of the
5005 * object as referenced in order to give them a complete turn
5006 * of the virtual memory "clock" before becoming candidates
5007 * for paging out (if the system is suffering from memory
5008 * pressure). We don't really need to set the pmap reference
5009 * bits (which would be expensive) since the software copies
5010 * are believed if they're set to true ...
5012 if (old_state
!= VM_PURGABLE_EMPTY
) {
5013 for (p
= (vm_page_t
)queue_first(&object
->memq
);
5014 !queue_end(&object
->memq
, (queue_entry_t
)p
);
5015 p
= (vm_page_t
)queue_next(&p
->listq
))
5016 p
->reference
= TRUE
;
5019 vm_page_unlock_queues();
5023 case VM_PURGABLE_VOLATILE
:
5024 vm_page_lock_queues();
5026 if (object
->purgable
!= VM_OBJECT_PURGABLE_VOLATILE
&&
5027 object
->purgable
!= VM_OBJECT_PURGABLE_EMPTY
) {
5028 vm_page_purgeable_count
+= object
->resident_page_count
;
5031 object
->purgable
= VM_OBJECT_PURGABLE_VOLATILE
;
5034 * We want the newly volatile purgable object to be a
5035 * candidate for the pageout scan before other pages in the
5036 * application if the system is suffering from memory
5037 * pressure. To do this, we move a page of the object from
5038 * the active queue onto the inactive queue in order to
5039 * promote the object for early reclaim. We only need to move
5040 * a single page since the pageout scan will reap the entire
5041 * purgable object if it finds a single page in a volatile
5042 * state. Obviously we don't do this if there are no pages
5043 * associated with the object or we find a page of the object
5044 * already on the inactive queue.
5046 for (p
= (vm_page_t
)queue_first(&object
->memq
);
5047 !queue_end(&object
->memq
, (queue_entry_t
)p
);
5048 p
= (vm_page_t
)queue_next(&p
->listq
)) {
5050 /* already a page on the inactive queue */
5053 if (p
->active
&& !p
->busy
) {
5054 /* found one we can move */
5055 vm_page_deactivate(p
);
5059 vm_page_unlock_queues();
5064 case VM_PURGABLE_EMPTY
:
5065 vm_page_lock_queues();
5066 if (object
->purgable
!= VM_OBJECT_PURGABLE_VOLATILE
&&
5067 object
->purgable
!= VM_OBJECT_PURGABLE_EMPTY
) {
5068 vm_page_purgeable_count
+= object
->resident_page_count
;
5070 (void) vm_object_purge(object
);
5071 vm_page_unlock_queues();
5077 return KERN_SUCCESS
;
5082 * vm_object_res_deallocate
5084 * (recursively) decrement residence counts on vm objects and their shadows.
5085 * Called from vm_object_deallocate and when swapping out an object.
5087 * The object is locked, and remains locked throughout the function,
5088 * even as we iterate down the shadow chain. Locks on intermediate objects
5089 * will be dropped, but not the original object.
5091 * NOTE: this function used to use recursion, rather than iteration.
5094 __private_extern__
void
5095 vm_object_res_deallocate(
5098 vm_object_t orig_object
= object
;
5100 * Object is locked so it can be called directly
5101 * from vm_object_deallocate. Original object is never
5104 assert(object
->res_count
> 0);
5105 while (--object
->res_count
== 0) {
5106 assert(object
->ref_count
>= object
->res_count
);
5107 vm_object_deactivate_all_pages(object
);
5108 /* iterate on shadow, if present */
5109 if (object
->shadow
!= VM_OBJECT_NULL
) {
5110 vm_object_t tmp_object
= object
->shadow
;
5111 vm_object_lock(tmp_object
);
5112 if (object
!= orig_object
)
5113 vm_object_unlock(object
);
5114 object
= tmp_object
;
5115 assert(object
->res_count
> 0);
5119 if (object
!= orig_object
)
5120 vm_object_unlock(object
);
5124 * vm_object_res_reference
5126 * Internal function to increment residence count on a vm object
5127 * and its shadows. It is called only from vm_object_reference, and
5128 * when swapping in a vm object, via vm_map_swap.
5130 * The object is locked, and remains locked throughout the function,
5131 * even as we iterate down the shadow chain. Locks on intermediate objects
5132 * will be dropped, but not the original object.
5134 * NOTE: this function used to use recursion, rather than iteration.
5137 __private_extern__
void
5138 vm_object_res_reference(
5141 vm_object_t orig_object
= object
;
5143 * Object is locked, so this can be called directly
5144 * from vm_object_reference. This lock is never released.
5146 while ((++object
->res_count
== 1) &&
5147 (object
->shadow
!= VM_OBJECT_NULL
)) {
5148 vm_object_t tmp_object
= object
->shadow
;
5150 assert(object
->ref_count
>= object
->res_count
);
5151 vm_object_lock(tmp_object
);
5152 if (object
!= orig_object
)
5153 vm_object_unlock(object
);
5154 object
= tmp_object
;
5156 if (object
!= orig_object
)
5157 vm_object_unlock(object
);
5158 assert(orig_object
->ref_count
>= orig_object
->res_count
);
5160 #endif /* TASK_SWAPPER */
5163 * vm_object_reference:
5165 * Gets another reference to the given object.
5167 #ifdef vm_object_reference
5168 #undef vm_object_reference
5170 __private_extern__
void
5171 vm_object_reference(
5172 register vm_object_t object
)
5174 if (object
== VM_OBJECT_NULL
)
5177 vm_object_lock(object
);
5178 assert(object
->ref_count
> 0);
5179 vm_object_reference_locked(object
);
5180 vm_object_unlock(object
);
5185 * Scale the vm_object_cache
5186 * This is required to make sure that the vm_object_cache is big
5187 * enough to effectively cache the mapped file.
5188 * This is really important with UBC as all the regular file vnodes
5189 * have memory object associated with them. Havving this cache too
5190 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5192 * This is also needed as number of vnodes can be dynamically scaled.
5195 adjust_vm_object_cache(
5196 __unused vm_size_t oval
,
5199 vm_object_cached_max
= nval
;
5200 vm_object_cache_trim(FALSE
);
5201 return (KERN_SUCCESS
);
5203 #endif /* MACH_BSD */
5207 * vm_object_transpose
5209 * This routine takes two VM objects of the same size and exchanges
5210 * their backing store.
5211 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5212 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5214 * The VM objects must not be locked by caller.
5217 vm_object_transpose(
5218 vm_object_t object1
,
5219 vm_object_t object2
,
5220 vm_object_size_t transpose_size
)
5222 vm_object_t tmp_object
;
5223 kern_return_t retval
;
5224 boolean_t object1_locked
, object2_locked
;
5225 boolean_t object1_paging
, object2_paging
;
5227 vm_object_offset_t page_offset
;
5229 tmp_object
= VM_OBJECT_NULL
;
5230 object1_locked
= FALSE
; object2_locked
= FALSE
;
5231 object1_paging
= FALSE
; object2_paging
= FALSE
;
5233 if (object1
== object2
||
5234 object1
== VM_OBJECT_NULL
||
5235 object2
== VM_OBJECT_NULL
) {
5237 * If the 2 VM objects are the same, there's
5238 * no point in exchanging their backing store.
5240 retval
= KERN_INVALID_VALUE
;
5244 vm_object_lock(object1
);
5245 object1_locked
= TRUE
;
5246 if (object1
->copy
|| object1
->shadow
|| object1
->shadowed
||
5247 object1
->purgable
!= VM_OBJECT_NONPURGABLE
) {
5249 * We don't deal with copy or shadow objects (yet).
5251 retval
= KERN_INVALID_VALUE
;
5255 * Since we're about to mess with the object's backing store,
5256 * mark it as "paging_in_progress". Note that this is not enough
5257 * to prevent any paging activity on this object, so the caller should
5258 * have "quiesced" the objects beforehand, via a UPL operation with
5259 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5260 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5262 vm_object_paging_begin(object1
);
5263 object1_paging
= TRUE
;
5264 vm_object_unlock(object1
);
5265 object1_locked
= FALSE
;
5268 * Same as above for the 2nd object...
5270 vm_object_lock(object2
);
5271 object2_locked
= TRUE
;
5272 if (object2
->copy
|| object2
->shadow
|| object2
->shadowed
||
5273 object2
->purgable
!= VM_OBJECT_NONPURGABLE
) {
5274 retval
= KERN_INVALID_VALUE
;
5277 vm_object_paging_begin(object2
);
5278 object2_paging
= TRUE
;
5279 vm_object_unlock(object2
);
5280 object2_locked
= FALSE
;
5283 * Allocate a temporary VM object to hold object1's contents
5284 * while we copy object2 to object1.
5286 tmp_object
= vm_object_allocate(transpose_size
);
5287 vm_object_lock(tmp_object
);
5288 vm_object_paging_begin(tmp_object
);
5289 tmp_object
->can_persist
= FALSE
;
5292 * Since we need to lock both objects at the same time,
5293 * make sure we always lock them in the same order to
5296 if (object1
< object2
) {
5297 vm_object_lock(object1
);
5298 vm_object_lock(object2
);
5300 vm_object_lock(object2
);
5301 vm_object_lock(object1
);
5303 object1_locked
= TRUE
;
5304 object2_locked
= TRUE
;
5306 if (object1
->size
!= object2
->size
||
5307 object1
->size
!= transpose_size
) {
5309 * If the 2 objects don't have the same size, we can't
5310 * exchange their backing stores or one would overflow.
5311 * If their size doesn't match the caller's
5312 * "transpose_size", we can't do it either because the
5313 * transpose operation will affect the entire span of
5316 retval
= KERN_INVALID_VALUE
;
5322 * Transpose the lists of resident pages.
5324 if (object1
->phys_contiguous
|| queue_empty(&object1
->memq
)) {
5326 * No pages in object1, just transfer pages
5327 * from object2 to object1. No need to go through
5328 * an intermediate object.
5330 while (!queue_empty(&object2
->memq
)) {
5331 page
= (vm_page_t
) queue_first(&object2
->memq
);
5332 vm_page_rename(page
, object1
, page
->offset
);
5334 assert(queue_empty(&object2
->memq
));
5335 } else if (object2
->phys_contiguous
|| queue_empty(&object2
->memq
)) {
5337 * No pages in object2, just transfer pages
5338 * from object1 to object2. No need to go through
5339 * an intermediate object.
5341 while (!queue_empty(&object1
->memq
)) {
5342 page
= (vm_page_t
) queue_first(&object1
->memq
);
5343 vm_page_rename(page
, object2
, page
->offset
);
5345 assert(queue_empty(&object1
->memq
));
5347 /* transfer object1's pages to tmp_object */
5348 vm_page_lock_queues();
5349 while (!queue_empty(&object1
->memq
)) {
5350 page
= (vm_page_t
) queue_first(&object1
->memq
);
5351 page_offset
= page
->offset
;
5352 vm_page_remove(page
);
5353 page
->offset
= page_offset
;
5354 queue_enter(&tmp_object
->memq
, page
, vm_page_t
, listq
);
5356 vm_page_unlock_queues();
5357 assert(queue_empty(&object1
->memq
));
5358 /* transfer object2's pages to object1 */
5359 while (!queue_empty(&object2
->memq
)) {
5360 page
= (vm_page_t
) queue_first(&object2
->memq
);
5361 vm_page_rename(page
, object1
, page
->offset
);
5363 assert(queue_empty(&object2
->memq
));
5364 /* transfer tmp_object's pages to object1 */
5365 while (!queue_empty(&tmp_object
->memq
)) {
5366 page
= (vm_page_t
) queue_first(&tmp_object
->memq
);
5367 queue_remove(&tmp_object
->memq
, page
,
5369 vm_page_insert(page
, object2
, page
->offset
);
5371 assert(queue_empty(&tmp_object
->memq
));
5374 /* no need to transpose the size: they should be identical */
5375 assert(object1
->size
== object2
->size
);
5377 #define __TRANSPOSE_FIELD(field) \
5379 tmp_object->field = object1->field; \
5380 object1->field = object2->field; \
5381 object2->field = tmp_object->field; \
5384 assert(!object1
->copy
);
5385 assert(!object2
->copy
);
5387 assert(!object1
->shadow
);
5388 assert(!object2
->shadow
);
5390 __TRANSPOSE_FIELD(shadow_offset
); /* used by phys_contiguous objects */
5391 __TRANSPOSE_FIELD(pager
);
5392 __TRANSPOSE_FIELD(paging_offset
);
5394 __TRANSPOSE_FIELD(pager_control
);
5395 /* update the memory_objects' pointers back to the VM objects */
5396 if (object1
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
5397 memory_object_control_collapse(object1
->pager_control
,
5400 if (object2
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
5401 memory_object_control_collapse(object2
->pager_control
,
5405 __TRANSPOSE_FIELD(absent_count
);
5407 assert(object1
->paging_in_progress
);
5408 assert(object2
->paging_in_progress
);
5410 __TRANSPOSE_FIELD(pager_created
);
5411 __TRANSPOSE_FIELD(pager_initialized
);
5412 __TRANSPOSE_FIELD(pager_ready
);
5413 __TRANSPOSE_FIELD(pager_trusted
);
5414 __TRANSPOSE_FIELD(internal
);
5415 __TRANSPOSE_FIELD(temporary
);
5416 __TRANSPOSE_FIELD(private);
5417 __TRANSPOSE_FIELD(pageout
);
5418 __TRANSPOSE_FIELD(true_share
);
5419 __TRANSPOSE_FIELD(phys_contiguous
);
5420 __TRANSPOSE_FIELD(nophyscache
);
5421 __TRANSPOSE_FIELD(last_alloc
);
5422 __TRANSPOSE_FIELD(sequential
);
5423 __TRANSPOSE_FIELD(cluster_size
);
5424 __TRANSPOSE_FIELD(existence_map
);
5425 __TRANSPOSE_FIELD(cow_hint
);
5426 __TRANSPOSE_FIELD(wimg_bits
);
5428 #undef __TRANSPOSE_FIELD
5430 retval
= KERN_SUCCESS
;
5436 if (tmp_object
!= VM_OBJECT_NULL
) {
5437 vm_object_paging_end(tmp_object
);
5438 vm_object_unlock(tmp_object
);
5440 * Re-initialize the temporary object to avoid
5441 * deallocating a real pager.
5443 _vm_object_allocate(transpose_size
, tmp_object
);
5444 vm_object_deallocate(tmp_object
);
5445 tmp_object
= VM_OBJECT_NULL
;
5448 if (object1_locked
) {
5449 vm_object_unlock(object1
);
5450 object1_locked
= FALSE
;
5452 if (object2_locked
) {
5453 vm_object_unlock(object2
);
5454 object2_locked
= FALSE
;
5456 if (object1_paging
) {
5457 vm_object_lock(object1
);
5458 vm_object_paging_end(object1
);
5459 vm_object_unlock(object1
);
5460 object1_paging
= FALSE
;
5462 if (object2_paging
) {
5463 vm_object_lock(object2
);
5464 vm_object_paging_end(object2
);
5465 vm_object_unlock(object2
);
5466 object2_paging
= FALSE
;
5473 /* Allow manipulation of individual page state. This is actually part of */
5474 /* the UPL regimen but takes place on the VM object rather than on a UPL */
5479 vm_object_offset_t offset
,
5481 ppnum_t
*phys_entry
,
5486 vm_object_lock(object
);
5488 if(ops
& UPL_POP_PHYSICAL
) {
5489 if(object
->phys_contiguous
) {
5491 *phys_entry
= (ppnum_t
)
5492 (object
->shadow_offset
>> 12);
5494 vm_object_unlock(object
);
5495 return KERN_SUCCESS
;
5497 vm_object_unlock(object
);
5498 return KERN_INVALID_OBJECT
;
5501 if(object
->phys_contiguous
) {
5502 vm_object_unlock(object
);
5503 return KERN_INVALID_OBJECT
;
5507 if((dst_page
= vm_page_lookup(object
,offset
)) == VM_PAGE_NULL
) {
5508 vm_object_unlock(object
);
5509 return KERN_FAILURE
;
5512 /* Sync up on getting the busy bit */
5513 if((dst_page
->busy
|| dst_page
->cleaning
) &&
5514 (((ops
& UPL_POP_SET
) &&
5515 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
5516 /* someone else is playing with the page, we will */
5518 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
5522 if (ops
& UPL_POP_DUMP
) {
5523 vm_page_lock_queues();
5525 if (dst_page
->no_isync
== FALSE
)
5526 pmap_disconnect(dst_page
->phys_page
);
5527 vm_page_free(dst_page
);
5529 vm_page_unlock_queues();
5536 /* Get the condition of flags before requested ops */
5537 /* are undertaken */
5539 if(dst_page
->dirty
) *flags
|= UPL_POP_DIRTY
;
5540 if(dst_page
->pageout
) *flags
|= UPL_POP_PAGEOUT
;
5541 if(dst_page
->precious
) *flags
|= UPL_POP_PRECIOUS
;
5542 if(dst_page
->absent
) *flags
|= UPL_POP_ABSENT
;
5543 if(dst_page
->busy
) *flags
|= UPL_POP_BUSY
;
5546 /* The caller should have made a call either contingent with */
5547 /* or prior to this call to set UPL_POP_BUSY */
5548 if(ops
& UPL_POP_SET
) {
5549 /* The protection granted with this assert will */
5550 /* not be complete. If the caller violates the */
5551 /* convention and attempts to change page state */
5552 /* without first setting busy we may not see it */
5553 /* because the page may already be busy. However */
5554 /* if such violations occur we will assert sooner */
5556 assert(dst_page
->busy
|| (ops
& UPL_POP_BUSY
));
5557 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= TRUE
;
5558 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= TRUE
;
5559 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= TRUE
;
5560 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= TRUE
;
5561 if (ops
& UPL_POP_BUSY
) dst_page
->busy
= TRUE
;
5564 if(ops
& UPL_POP_CLR
) {
5565 assert(dst_page
->busy
);
5566 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= FALSE
;
5567 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= FALSE
;
5568 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= FALSE
;
5569 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= FALSE
;
5570 if (ops
& UPL_POP_BUSY
) {
5571 dst_page
->busy
= FALSE
;
5572 PAGE_WAKEUP(dst_page
);
5576 if (dst_page
->encrypted
) {
5579 * We need to decrypt this encrypted page before the
5580 * caller can access its contents.
5581 * But if the caller really wants to access the page's
5582 * contents, they have to keep the page "busy".
5583 * Otherwise, the page could get recycled or re-encrypted
5586 if ((ops
& UPL_POP_SET
) && (ops
& UPL_POP_BUSY
) &&
5589 * The page is stable enough to be accessed by
5590 * the caller, so make sure its contents are
5593 vm_page_decrypt(dst_page
, 0);
5596 * The page is not busy, so don't bother
5597 * decrypting it, since anything could
5598 * happen to it between now and when the
5599 * caller wants to access it.
5600 * We should not give the caller access
5603 assert(!phys_entry
);
5609 * The physical page number will remain valid
5610 * only if the page is kept busy.
5611 * ENCRYPTED SWAP: make sure we don't let the
5612 * caller access an encrypted page.
5614 assert(dst_page
->busy
);
5615 assert(!dst_page
->encrypted
);
5616 *phys_entry
= dst_page
->phys_page
;
5622 vm_object_unlock(object
);
5623 return KERN_SUCCESS
;
5628 * vm_object_range_op offers performance enhancement over
5629 * vm_object_page_op for page_op functions which do not require page
5630 * level state to be returned from the call. Page_op was created to provide
5631 * a low-cost alternative to page manipulation via UPLs when only a single
5632 * page was involved. The range_op call establishes the ability in the _op
5633 * family of functions to work on multiple pages where the lack of page level
5634 * state handling allows the caller to avoid the overhead of the upl structures.
5640 vm_object_offset_t offset_beg
,
5641 vm_object_offset_t offset_end
,
5645 vm_object_offset_t offset
;
5648 if (object
->resident_page_count
== 0) {
5650 if (ops
& UPL_ROP_PRESENT
)
5653 *range
= offset_end
- offset_beg
;
5655 return KERN_SUCCESS
;
5657 vm_object_lock(object
);
5659 if (object
->phys_contiguous
) {
5660 vm_object_unlock(object
);
5661 return KERN_INVALID_OBJECT
;
5664 offset
= offset_beg
;
5666 while (offset
< offset_end
) {
5667 dst_page
= vm_page_lookup(object
, offset
);
5668 if (dst_page
!= VM_PAGE_NULL
) {
5669 if (ops
& UPL_ROP_DUMP
) {
5670 if (dst_page
->busy
|| dst_page
->cleaning
) {
5672 * someone else is playing with the
5673 * page, we will have to wait
5676 dst_page
, THREAD_UNINT
);
5678 * need to relook the page up since it's
5679 * state may have changed while we slept
5680 * it might even belong to a different object
5685 vm_page_lock_queues();
5687 if (dst_page
->no_isync
== FALSE
)
5688 pmap_disconnect(dst_page
->phys_page
);
5689 vm_page_free(dst_page
);
5691 vm_page_unlock_queues();
5692 } else if (ops
& UPL_ROP_ABSENT
)
5694 } else if (ops
& UPL_ROP_PRESENT
)
5697 offset
+= PAGE_SIZE
;
5699 vm_object_unlock(object
);
5702 *range
= offset
- offset_beg
;
5704 return KERN_SUCCESS
;