2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Virtual memory object module.
66 #include <mach_pagemap.h>
67 #include <task_swapper.h>
69 #include <mach/mach_types.h>
70 #include <mach/memory_object.h>
71 #include <mach/memory_object_default.h>
72 #include <mach/memory_object_control_server.h>
73 #include <mach/vm_param.h>
75 #include <ipc/ipc_types.h>
76 #include <ipc/ipc_port.h>
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/lock.h>
81 #include <kern/queue.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85 #include <kern/host.h>
86 #include <kern/host_statistics.h>
87 #include <kern/processor.h>
88 #include <kern/misc_protos.h>
90 #include <vm/memory_object.h>
91 #include <vm/vm_fault.h>
92 #include <vm/vm_map.h>
93 #include <vm/vm_object.h>
94 #include <vm/vm_page.h>
95 #include <vm/vm_pageout.h>
96 #include <vm/vm_protos.h>
97 #include <vm/vm_purgeable_internal.h>
100 #include <sys/kern_memorystatus.h>
104 * Virtual memory objects maintain the actual data
105 * associated with allocated virtual memory. A given
106 * page of memory exists within exactly one object.
108 * An object is only deallocated when all "references"
111 * Associated with each object is a list of all resident
112 * memory pages belonging to that object; this list is
113 * maintained by the "vm_page" module, but locked by the object's
116 * Each object also records the memory object reference
117 * that is used by the kernel to request and write
118 * back data (the memory object, field "pager"), etc...
120 * Virtual memory objects are allocated to provide
121 * zero-filled memory (vm_allocate) or map a user-defined
122 * memory object into a virtual address space (vm_map).
124 * Virtual memory objects that refer to a user-defined
125 * memory object are called "permanent", because all changes
126 * made in virtual memory are reflected back to the
127 * memory manager, which may then store it permanently.
128 * Other virtual memory objects are called "temporary",
129 * meaning that changes need be written back only when
130 * necessary to reclaim pages, and that storage associated
131 * with the object can be discarded once it is no longer
134 * A permanent memory object may be mapped into more
135 * than one virtual address space. Moreover, two threads
136 * may attempt to make the first mapping of a memory
137 * object concurrently. Only one thread is allowed to
138 * complete this mapping; all others wait for the
139 * "pager_initialized" field is asserted, indicating
140 * that the first thread has initialized all of the
141 * necessary fields in the virtual memory object structure.
143 * The kernel relies on a *default memory manager* to
144 * provide backing storage for the zero-filled virtual
145 * memory objects. The pager memory objects associated
146 * with these temporary virtual memory objects are only
147 * requested from the default memory manager when it
148 * becomes necessary. Virtual memory objects
149 * that depend on the default memory manager are called
150 * "internal". The "pager_created" field is provided to
151 * indicate whether these ports have ever been allocated.
153 * The kernel may also create virtual memory objects to
154 * hold changed pages after a copy-on-write operation.
155 * In this case, the virtual memory object (and its
156 * backing storage -- its memory object) only contain
157 * those pages that have been changed. The "shadow"
158 * field refers to the virtual memory object that contains
159 * the remainder of the contents. The "shadow_offset"
160 * field indicates where in the "shadow" these contents begin.
161 * The "copy" field refers to a virtual memory object
162 * to which changed pages must be copied before changing
163 * this object, in order to implement another form
164 * of copy-on-write optimization.
166 * The virtual memory object structure also records
167 * the attributes associated with its memory object.
168 * The "pager_ready", "can_persist" and "copy_strategy"
169 * fields represent those attributes. The "cached_list"
170 * field is used in the implementation of the persistence
173 * ZZZ Continue this comment.
176 /* Forward declarations for internal functions. */
177 static kern_return_t
vm_object_terminate(
180 extern void vm_object_remove(
183 static kern_return_t
vm_object_copy_call(
184 vm_object_t src_object
,
185 vm_object_offset_t src_offset
,
186 vm_object_size_t size
,
187 vm_object_t
*_result_object
);
189 static void vm_object_do_collapse(
191 vm_object_t backing_object
);
193 static void vm_object_do_bypass(
195 vm_object_t backing_object
);
197 static void vm_object_release_pager(
198 memory_object_t pager
,
201 static zone_t vm_object_zone
; /* vm backing store zone */
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
207 static struct vm_object kernel_object_store
;
208 vm_object_t kernel_object
;
212 * The submap object is used as a placeholder for vm_map_submap
213 * operations. The object is declared in vm_map.c because it
214 * is exported by the vm_map module. The storage is declared
215 * here because it must be initialized here.
217 static struct vm_object vm_submap_object_store
;
220 * Virtual memory objects are initialized from
221 * a template (see vm_object_allocate).
223 * When adding a new field to the virtual memory
224 * object structure, be sure to add initialization
225 * (see _vm_object_allocate()).
227 static struct vm_object vm_object_template
;
229 unsigned int vm_page_purged_wired
= 0;
230 unsigned int vm_page_purged_busy
= 0;
231 unsigned int vm_page_purged_others
= 0;
235 * Virtual memory objects that are not referenced by
236 * any address maps, but that are allowed to persist
237 * (an attribute specified by the associated memory manager),
238 * are kept in a queue (vm_object_cached_list).
240 * When an object from this queue is referenced again,
241 * for example to make another address space mapping,
242 * it must be removed from the queue. That is, the
243 * queue contains *only* objects with zero references.
245 * The kernel may choose to terminate objects from this
246 * queue in order to reclaim storage. The current policy
247 * is to permit a fixed maximum number of unreferenced
248 * objects (vm_object_cached_max).
250 * A spin lock (accessed by routines
251 * vm_object_cache_{lock,lock_try,unlock}) governs the
252 * object cache. It must be held when objects are
253 * added to or removed from the cache (in vm_object_terminate).
254 * The routines that acquire a reference to a virtual
255 * memory object based on one of the memory object ports
256 * must also lock the cache.
258 * Ideally, the object cache should be more isolated
259 * from the reference mechanism, so that the lock need
260 * not be held to make simple references.
262 static vm_object_t
vm_object_cache_trim(
263 boolean_t called_from_vm_object_deallocate
);
265 static void vm_object_deactivate_all_pages(
268 static int vm_object_cached_high
; /* highest # cached objects */
269 static int vm_object_cached_max
= 512; /* may be patched*/
271 #define vm_object_cache_lock() \
272 lck_mtx_lock(&vm_object_cached_lock_data)
273 #define vm_object_cache_lock_try() \
274 lck_mtx_try_lock(&vm_object_cached_lock_data)
276 #endif /* VM_OBJECT_CACHE */
278 static queue_head_t vm_object_cached_list
;
279 static uint32_t vm_object_cache_pages_freed
= 0;
280 static uint32_t vm_object_cache_pages_moved
= 0;
281 static uint32_t vm_object_cache_pages_skipped
= 0;
282 static uint32_t vm_object_cache_adds
= 0;
283 static uint32_t vm_object_cached_count
= 0;
284 static lck_mtx_t vm_object_cached_lock_data
;
285 static lck_mtx_ext_t vm_object_cached_lock_data_ext
;
287 static uint32_t vm_object_page_grab_failed
= 0;
288 static uint32_t vm_object_page_grab_skipped
= 0;
289 static uint32_t vm_object_page_grab_returned
= 0;
290 static uint32_t vm_object_page_grab_pmapped
= 0;
291 static uint32_t vm_object_page_grab_reactivations
= 0;
293 #define vm_object_cache_lock_spin() \
294 lck_mtx_lock_spin(&vm_object_cached_lock_data)
295 #define vm_object_cache_unlock() \
296 lck_mtx_unlock(&vm_object_cached_lock_data)
298 static void vm_object_cache_remove_locked(vm_object_t
);
301 #define VM_OBJECT_HASH_COUNT 1024
302 #define VM_OBJECT_HASH_LOCK_COUNT 512
304 static lck_mtx_t vm_object_hashed_lock_data
[VM_OBJECT_HASH_LOCK_COUNT
];
305 static lck_mtx_ext_t vm_object_hashed_lock_data_ext
[VM_OBJECT_HASH_LOCK_COUNT
];
307 static queue_head_t vm_object_hashtable
[VM_OBJECT_HASH_COUNT
];
308 static struct zone
*vm_object_hash_zone
;
310 struct vm_object_hash_entry
{
311 queue_chain_t hash_link
; /* hash chain link */
312 memory_object_t pager
; /* pager we represent */
313 vm_object_t object
; /* corresponding object */
314 boolean_t waiting
; /* someone waiting for
318 typedef struct vm_object_hash_entry
*vm_object_hash_entry_t
;
319 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
321 #define VM_OBJECT_HASH_SHIFT 5
322 #define vm_object_hash(pager) \
323 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT))
325 #define vm_object_lock_hash(pager) \
326 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT))
328 void vm_object_hash_entry_free(
329 vm_object_hash_entry_t entry
);
331 static void vm_object_reap(vm_object_t object
);
332 static void vm_object_reap_async(vm_object_t object
);
333 static void vm_object_reaper_thread(void);
335 static lck_mtx_t vm_object_reaper_lock_data
;
336 static lck_mtx_ext_t vm_object_reaper_lock_data_ext
;
338 static queue_head_t vm_object_reaper_queue
; /* protected by vm_object_reaper_lock() */
339 unsigned int vm_object_reap_count
= 0;
340 unsigned int vm_object_reap_count_async
= 0;
342 #define vm_object_reaper_lock() \
343 lck_mtx_lock(&vm_object_reaper_lock_data)
344 #define vm_object_reaper_lock_spin() \
345 lck_mtx_lock_spin(&vm_object_reaper_lock_data)
346 #define vm_object_reaper_unlock() \
347 lck_mtx_unlock(&vm_object_reaper_lock_data)
351 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
356 vm_object_hash_lock_spin(
357 memory_object_t pager
)
361 index
= vm_object_lock_hash(pager
);
363 lck_mtx_lock_spin(&vm_object_hashed_lock_data
[index
]);
365 return (&vm_object_hashed_lock_data
[index
]);
369 vm_object_hash_unlock(lck_mtx_t
*lck
)
376 * vm_object_hash_lookup looks up a pager in the hashtable
377 * and returns the corresponding entry, with optional removal.
379 static vm_object_hash_entry_t
380 vm_object_hash_lookup(
381 memory_object_t pager
,
382 boolean_t remove_entry
)
385 vm_object_hash_entry_t entry
;
387 bucket
= &vm_object_hashtable
[vm_object_hash(pager
)];
389 entry
= (vm_object_hash_entry_t
)queue_first(bucket
);
390 while (!queue_end(bucket
, (queue_entry_t
)entry
)) {
391 if (entry
->pager
== pager
) {
393 queue_remove(bucket
, entry
,
394 vm_object_hash_entry_t
, hash_link
);
398 entry
= (vm_object_hash_entry_t
)queue_next(&entry
->hash_link
);
400 return(VM_OBJECT_HASH_ENTRY_NULL
);
404 * vm_object_hash_enter enters the specified
405 * pager / cache object association in the hashtable.
409 vm_object_hash_insert(
410 vm_object_hash_entry_t entry
,
415 bucket
= &vm_object_hashtable
[vm_object_hash(entry
->pager
)];
417 queue_enter(bucket
, entry
, vm_object_hash_entry_t
, hash_link
);
419 entry
->object
= object
;
420 object
->hashed
= TRUE
;
423 static vm_object_hash_entry_t
424 vm_object_hash_entry_alloc(
425 memory_object_t pager
)
427 vm_object_hash_entry_t entry
;
429 entry
= (vm_object_hash_entry_t
)zalloc(vm_object_hash_zone
);
430 entry
->pager
= pager
;
431 entry
->object
= VM_OBJECT_NULL
;
432 entry
->waiting
= FALSE
;
438 vm_object_hash_entry_free(
439 vm_object_hash_entry_t entry
)
441 zfree(vm_object_hash_zone
, entry
);
445 * vm_object_allocate:
447 * Returns a new object with the given size.
450 __private_extern__
void
452 vm_object_size_t size
,
456 "vm_object_allocate, object 0x%X size 0x%X\n",
457 object
, size
, 0,0,0);
459 *object
= vm_object_template
;
460 queue_init(&object
->memq
);
461 queue_init(&object
->msr_q
);
463 queue_init(&object
->uplq
);
464 #endif /* UPL_DEBUG */
465 vm_object_lock_init(object
);
466 object
->vo_size
= size
;
469 __private_extern__ vm_object_t
471 vm_object_size_t size
)
473 register vm_object_t object
;
475 object
= (vm_object_t
) zalloc(vm_object_zone
);
477 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
479 if (object
!= VM_OBJECT_NULL
)
480 _vm_object_allocate(size
, object
);
486 lck_grp_t vm_object_lck_grp
;
487 lck_grp_t vm_object_cache_lck_grp
;
488 lck_grp_attr_t vm_object_lck_grp_attr
;
489 lck_attr_t vm_object_lck_attr
;
490 lck_attr_t kernel_object_lck_attr
;
493 * vm_object_bootstrap:
495 * Initialize the VM objects module.
497 __private_extern__
void
498 vm_object_bootstrap(void)
502 vm_object_zone
= zinit((vm_size_t
) sizeof(struct vm_object
),
503 round_page(512*1024),
506 zone_change(vm_object_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
507 zone_change(vm_object_zone
, Z_NOENCRYPT
, TRUE
);
509 vm_object_init_lck_grp();
511 queue_init(&vm_object_cached_list
);
513 lck_mtx_init_ext(&vm_object_cached_lock_data
,
514 &vm_object_cached_lock_data_ext
,
515 &vm_object_cache_lck_grp
,
516 &vm_object_lck_attr
);
518 queue_init(&vm_object_reaper_queue
);
520 for (i
= 0; i
< VM_OBJECT_HASH_LOCK_COUNT
; i
++) {
521 lck_mtx_init_ext(&vm_object_hashed_lock_data
[i
],
522 &vm_object_hashed_lock_data_ext
[i
],
524 &vm_object_lck_attr
);
526 lck_mtx_init_ext(&vm_object_reaper_lock_data
,
527 &vm_object_reaper_lock_data_ext
,
529 &vm_object_lck_attr
);
531 vm_object_hash_zone
=
532 zinit((vm_size_t
) sizeof (struct vm_object_hash_entry
),
533 round_page(512*1024),
535 "vm object hash entries");
536 zone_change(vm_object_hash_zone
, Z_CALLERACCT
, FALSE
);
537 zone_change(vm_object_hash_zone
, Z_NOENCRYPT
, TRUE
);
539 for (i
= 0; i
< VM_OBJECT_HASH_COUNT
; i
++)
540 queue_init(&vm_object_hashtable
[i
]);
544 * Fill in a template object, for quick initialization
547 /* memq; Lock; init after allocation */
548 vm_object_template
.memq
.prev
= NULL
;
549 vm_object_template
.memq
.next
= NULL
;
552 * We can't call vm_object_lock_init() here because that will
553 * allocate some memory and VM is not fully initialized yet.
554 * The lock will be initialized for each allocated object in
555 * _vm_object_allocate(), so we don't need to initialize it in
556 * the vm_object_template.
558 vm_object_lock_init(&vm_object_template
);
560 vm_object_template
.vo_size
= 0;
561 vm_object_template
.memq_hint
= VM_PAGE_NULL
;
562 vm_object_template
.ref_count
= 1;
564 vm_object_template
.res_count
= 1;
565 #endif /* TASK_SWAPPER */
566 vm_object_template
.resident_page_count
= 0;
567 vm_object_template
.wired_page_count
= 0;
568 vm_object_template
.reusable_page_count
= 0;
569 vm_object_template
.copy
= VM_OBJECT_NULL
;
570 vm_object_template
.shadow
= VM_OBJECT_NULL
;
571 vm_object_template
.vo_shadow_offset
= (vm_object_offset_t
) 0;
572 vm_object_template
.pager
= MEMORY_OBJECT_NULL
;
573 vm_object_template
.paging_offset
= 0;
574 vm_object_template
.pager_control
= MEMORY_OBJECT_CONTROL_NULL
;
575 vm_object_template
.copy_strategy
= MEMORY_OBJECT_COPY_SYMMETRIC
;
576 vm_object_template
.paging_in_progress
= 0;
577 vm_object_template
.activity_in_progress
= 0;
579 /* Begin bitfields */
580 vm_object_template
.all_wanted
= 0; /* all bits FALSE */
581 vm_object_template
.pager_created
= FALSE
;
582 vm_object_template
.pager_initialized
= FALSE
;
583 vm_object_template
.pager_ready
= FALSE
;
584 vm_object_template
.pager_trusted
= FALSE
;
585 vm_object_template
.can_persist
= FALSE
;
586 vm_object_template
.internal
= TRUE
;
587 vm_object_template
.temporary
= TRUE
;
588 vm_object_template
.private = FALSE
;
589 vm_object_template
.pageout
= FALSE
;
590 vm_object_template
.alive
= TRUE
;
591 vm_object_template
.purgable
= VM_PURGABLE_DENY
;
592 vm_object_template
.shadowed
= FALSE
;
593 vm_object_template
.silent_overwrite
= FALSE
;
594 vm_object_template
.advisory_pageout
= FALSE
;
595 vm_object_template
.true_share
= FALSE
;
596 vm_object_template
.terminating
= FALSE
;
597 vm_object_template
.named
= FALSE
;
598 vm_object_template
.shadow_severed
= FALSE
;
599 vm_object_template
.phys_contiguous
= FALSE
;
600 vm_object_template
.nophyscache
= FALSE
;
603 vm_object_template
.cached_list
.prev
= NULL
;
604 vm_object_template
.cached_list
.next
= NULL
;
605 vm_object_template
.msr_q
.prev
= NULL
;
606 vm_object_template
.msr_q
.next
= NULL
;
608 vm_object_template
.last_alloc
= (vm_object_offset_t
) 0;
609 vm_object_template
.sequential
= (vm_object_offset_t
) 0;
610 vm_object_template
.pages_created
= 0;
611 vm_object_template
.pages_used
= 0;
612 vm_object_template
.scan_collisions
= 0;
615 vm_object_template
.existence_map
= VM_EXTERNAL_NULL
;
616 #endif /* MACH_PAGEMAP */
617 vm_object_template
.cow_hint
= ~(vm_offset_t
)0;
619 vm_object_template
.paging_object
= VM_OBJECT_NULL
;
620 #endif /* MACH_ASSERT */
622 /* cache bitfields */
623 vm_object_template
.wimg_bits
= VM_WIMG_USE_DEFAULT
;
624 vm_object_template
.set_cache_attr
= FALSE
;
625 vm_object_template
.code_signed
= FALSE
;
626 vm_object_template
.hashed
= FALSE
;
627 vm_object_template
.transposed
= FALSE
;
628 vm_object_template
.mapping_in_progress
= FALSE
;
629 vm_object_template
.volatile_empty
= FALSE
;
630 vm_object_template
.volatile_fault
= FALSE
;
631 vm_object_template
.all_reusable
= FALSE
;
632 vm_object_template
.blocked_access
= FALSE
;
633 vm_object_template
.__object2_unused_bits
= 0;
635 vm_object_template
.uplq
.prev
= NULL
;
636 vm_object_template
.uplq
.next
= NULL
;
637 #endif /* UPL_DEBUG */
639 bzero(&vm_object_template
.pip_holders
,
640 sizeof (vm_object_template
.pip_holders
));
641 #endif /* VM_PIP_DEBUG */
643 vm_object_template
.objq
.next
=NULL
;
644 vm_object_template
.objq
.prev
=NULL
;
646 vm_object_template
.vo_cache_ts
= 0;
649 * Initialize the "kernel object"
652 kernel_object
= &kernel_object_store
;
655 * Note that in the following size specifications, we need to add 1 because
656 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
660 _vm_object_allocate(vm_last_addr
+ 1,
663 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS
+ 1,
666 kernel_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
669 * Initialize the "submap object". Make it as large as the
670 * kernel object so that no limit is imposed on submap sizes.
673 vm_submap_object
= &vm_submap_object_store
;
675 _vm_object_allocate(vm_last_addr
+ 1,
678 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS
+ 1,
681 vm_submap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
684 * Create an "extra" reference to this object so that we never
685 * try to deallocate it; zfree doesn't like to be called with
688 vm_object_reference(vm_submap_object
);
691 vm_external_module_initialize();
692 #endif /* MACH_PAGEMAP */
696 vm_object_reaper_init(void)
701 kr
= kernel_thread_start_priority(
702 (thread_continue_t
) vm_object_reaper_thread
,
706 if (kr
!= KERN_SUCCESS
) {
707 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr
);
709 thread_deallocate(thread
);
712 __private_extern__
void
716 * Finish initializing the kernel object.
721 __private_extern__
void
722 vm_object_init_lck_grp(void)
725 * initialze the vm_object lock world
727 lck_grp_attr_setdefault(&vm_object_lck_grp_attr
);
728 lck_grp_init(&vm_object_lck_grp
, "vm_object", &vm_object_lck_grp_attr
);
729 lck_grp_init(&vm_object_cache_lck_grp
, "vm_object_cache", &vm_object_lck_grp_attr
);
730 lck_attr_setdefault(&vm_object_lck_attr
);
731 lck_attr_setdefault(&kernel_object_lck_attr
);
732 lck_attr_cleardebug(&kernel_object_lck_attr
);
736 #define MIGHT_NOT_CACHE_SHADOWS 1
737 #if MIGHT_NOT_CACHE_SHADOWS
738 static int cache_shadows
= TRUE
;
739 #endif /* MIGHT_NOT_CACHE_SHADOWS */
743 * vm_object_deallocate:
745 * Release a reference to the specified object,
746 * gained either through a vm_object_allocate
747 * or a vm_object_reference call. When all references
748 * are gone, storage associated with this object
749 * may be relinquished.
751 * No object may be locked.
753 unsigned long vm_object_deallocate_shared_successes
= 0;
754 unsigned long vm_object_deallocate_shared_failures
= 0;
755 unsigned long vm_object_deallocate_shared_swap_failures
= 0;
756 __private_extern__
void
757 vm_object_deallocate(
758 register vm_object_t object
)
761 boolean_t retry_cache_trim
= FALSE
;
762 uint32_t try_failed_count
= 0;
764 vm_object_t shadow
= VM_OBJECT_NULL
;
766 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
767 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
769 if (object
== VM_OBJECT_NULL
)
772 if (object
== kernel_object
) {
773 vm_object_lock_shared(object
);
775 OSAddAtomic(-1, &object
->ref_count
);
777 if (object
->ref_count
== 0) {
778 panic("vm_object_deallocate: losing kernel_object\n");
780 vm_object_unlock(object
);
784 if (object
->ref_count
> 2 ||
785 (!object
->named
&& object
->ref_count
> 1)) {
786 UInt32 original_ref_count
;
787 volatile UInt32
*ref_count_p
;
791 * The object currently looks like it is not being
792 * kept alive solely by the reference we're about to release.
793 * Let's try and release our reference without taking
794 * all the locks we would need if we had to terminate the
795 * object (cache lock + exclusive object lock).
796 * Lock the object "shared" to make sure we don't race with
797 * anyone holding it "exclusive".
799 vm_object_lock_shared(object
);
800 ref_count_p
= (volatile UInt32
*) &object
->ref_count
;
801 original_ref_count
= object
->ref_count
;
803 * Test again as "ref_count" could have changed.
804 * "named" shouldn't change.
806 if (original_ref_count
> 2 ||
807 (!object
->named
&& original_ref_count
> 1)) {
808 atomic_swap
= OSCompareAndSwap(
810 original_ref_count
- 1,
811 (UInt32
*) &object
->ref_count
);
812 if (atomic_swap
== FALSE
) {
813 vm_object_deallocate_shared_swap_failures
++;
819 vm_object_unlock(object
);
823 * ref_count was updated atomically !
825 vm_object_deallocate_shared_successes
++;
830 * Someone else updated the ref_count at the same
831 * time and we lost the race. Fall back to the usual
832 * slow but safe path...
834 vm_object_deallocate_shared_failures
++;
837 while (object
!= VM_OBJECT_NULL
) {
839 vm_object_lock(object
);
841 assert(object
->ref_count
> 0);
844 * If the object has a named reference, and only
845 * that reference would remain, inform the pager
846 * about the last "mapping" reference going away.
848 if ((object
->ref_count
== 2) && (object
->named
)) {
849 memory_object_t pager
= object
->pager
;
851 /* Notify the Pager that there are no */
852 /* more mappers for this object */
854 if (pager
!= MEMORY_OBJECT_NULL
) {
855 vm_object_mapping_wait(object
, THREAD_UNINT
);
856 vm_object_mapping_begin(object
);
857 vm_object_unlock(object
);
859 memory_object_last_unmap(pager
);
861 vm_object_lock(object
);
862 vm_object_mapping_end(object
);
864 assert(object
->ref_count
> 0);
868 * Lose the reference. If other references
869 * remain, then we are done, unless we need
870 * to retry a cache trim.
871 * If it is the last reference, then keep it
872 * until any pending initialization is completed.
875 /* if the object is terminating, it cannot go into */
876 /* the cache and we obviously should not call */
877 /* terminate again. */
879 if ((object
->ref_count
> 1) || object
->terminating
) {
880 vm_object_lock_assert_exclusive(object
);
882 vm_object_res_deallocate(object
);
884 if (object
->ref_count
== 1 &&
885 object
->shadow
!= VM_OBJECT_NULL
) {
887 * There's only one reference left on this
888 * VM object. We can't tell if it's a valid
889 * one (from a mapping for example) or if this
890 * object is just part of a possibly stale and
891 * useless shadow chain.
892 * We would like to try and collapse it into
893 * its parent, but we don't have any pointers
894 * back to this parent object.
895 * But we can try and collapse this object with
896 * its own shadows, in case these are useless
898 * We can't bypass this object though, since we
899 * don't know if this last reference on it is
902 vm_object_collapse(object
, 0, FALSE
);
904 vm_object_unlock(object
);
906 if (retry_cache_trim
&&
907 ((object
= vm_object_cache_trim(TRUE
)) !=
916 * We have to wait for initialization
917 * before destroying or caching the object.
920 if (object
->pager_created
&& ! object
->pager_initialized
) {
921 assert(! object
->can_persist
);
922 vm_object_assert_wait(object
,
923 VM_OBJECT_EVENT_INITIALIZED
,
925 vm_object_unlock(object
);
927 thread_block(THREAD_CONTINUE_NULL
);
933 * If this object can persist, then enter it in
934 * the cache. Otherwise, terminate it.
936 * NOTE: Only permanent objects are cached, and
937 * permanent objects cannot have shadows. This
938 * affects the residence counting logic in a minor
939 * way (can do it in-line, mostly).
942 if ((object
->can_persist
) && (object
->alive
)) {
944 * Now it is safe to decrement reference count,
945 * and to return if reference count is > 0.
948 vm_object_lock_assert_exclusive(object
);
949 if (--object
->ref_count
> 0) {
950 vm_object_res_deallocate(object
);
951 vm_object_unlock(object
);
953 if (retry_cache_trim
&&
954 ((object
= vm_object_cache_trim(TRUE
)) !=
961 #if MIGHT_NOT_CACHE_SHADOWS
963 * Remove shadow now if we don't
964 * want to cache shadows.
966 if (! cache_shadows
) {
967 shadow
= object
->shadow
;
968 object
->shadow
= VM_OBJECT_NULL
;
970 #endif /* MIGHT_NOT_CACHE_SHADOWS */
973 * Enter the object onto the queue of
974 * cached objects, and deactivate
977 assert(object
->shadow
== VM_OBJECT_NULL
);
978 VM_OBJ_RES_DECR(object
);
980 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
982 vm_object_cached_list
.next
,
983 vm_object_cached_list
.prev
,0,0);
986 vm_object_unlock(object
);
988 try_failed_count
= 0;
990 vm_object_cache_lock();
993 * if we try to take a regular lock here
994 * we risk deadlocking against someone
995 * holding a lock on this object while
996 * trying to vm_object_deallocate a different
999 if (vm_object_lock_try(object
))
1001 vm_object_cache_unlock();
1004 mutex_pause(try_failed_count
); /* wait a bit */
1006 vm_object_cached_count
++;
1007 if (vm_object_cached_count
> vm_object_cached_high
)
1008 vm_object_cached_high
= vm_object_cached_count
;
1009 queue_enter(&vm_object_cached_list
, object
,
1010 vm_object_t
, cached_list
);
1011 vm_object_cache_unlock();
1013 vm_object_deactivate_all_pages(object
);
1014 vm_object_unlock(object
);
1016 #if MIGHT_NOT_CACHE_SHADOWS
1018 * If we have a shadow that we need
1019 * to deallocate, do so now, remembering
1020 * to trim the cache later.
1022 if (! cache_shadows
&& shadow
!= VM_OBJECT_NULL
) {
1024 retry_cache_trim
= TRUE
;
1027 #endif /* MIGHT_NOT_CACHE_SHADOWS */
1030 * Trim the cache. If the cache trim
1031 * returns with a shadow for us to deallocate,
1032 * then remember to retry the cache trim
1033 * when we are done deallocating the shadow.
1034 * Otherwise, we are done.
1037 object
= vm_object_cache_trim(TRUE
);
1038 if (object
== VM_OBJECT_NULL
) {
1041 retry_cache_trim
= TRUE
;
1043 #endif /* VM_OBJECT_CACHE */
1046 * This object is not cachable; terminate it.
1049 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
1050 object
, object
->resident_page_count
,
1051 object
->paging_in_progress
,
1052 (void *)current_thread(),object
->ref_count
);
1054 VM_OBJ_RES_DECR(object
); /* XXX ? */
1056 * Terminate this object. If it had a shadow,
1057 * then deallocate it; otherwise, if we need
1058 * to retry a cache trim, do so now; otherwise,
1059 * we are done. "pageout" objects have a shadow,
1060 * but maintain a "paging reference" rather than
1061 * a normal reference.
1063 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
1065 if (vm_object_terminate(object
) != KERN_SUCCESS
) {
1068 if (shadow
!= VM_OBJECT_NULL
) {
1073 if (retry_cache_trim
&&
1074 ((object
= vm_object_cache_trim(TRUE
)) !=
1083 assert(! retry_cache_trim
);
1090 vm_object_page_grab(
1093 vm_page_t p
, next_p
;
1097 vm_object_lock_assert_exclusive(object
);
1099 next_p
= (vm_page_t
)queue_first(&object
->memq
);
1100 p_limit
= MIN(50, object
->resident_page_count
);
1102 while (!queue_end(&object
->memq
, (queue_entry_t
)next_p
) && --p_limit
> 0) {
1105 next_p
= (vm_page_t
)queue_next(&next_p
->listq
);
1107 if (VM_PAGE_WIRED(p
) || p
->busy
|| p
->cleaning
|| p
->fictitious
)
1108 goto move_page_in_obj
;
1110 if (p
->pmapped
|| p
->dirty
|| p
->precious
) {
1111 vm_page_lockspin_queues();
1116 vm_object_page_grab_pmapped
++;
1118 if (p
->reference
== FALSE
|| p
->dirty
== FALSE
) {
1120 refmod_state
= pmap_get_refmod(p
->phys_page
);
1122 if (refmod_state
& VM_MEM_REFERENCED
)
1123 p
->reference
= TRUE
;
1124 if (refmod_state
& VM_MEM_MODIFIED
)
1127 if (p
->dirty
== FALSE
&& p
->precious
== FALSE
) {
1129 refmod_state
= pmap_disconnect(p
->phys_page
);
1131 if (refmod_state
& VM_MEM_REFERENCED
)
1132 p
->reference
= TRUE
;
1133 if (refmod_state
& VM_MEM_MODIFIED
)
1136 if (p
->dirty
== FALSE
)
1140 if (p
->inactive
&& p
->reference
== TRUE
) {
1141 vm_page_activate(p
);
1143 VM_STAT_INCR(reactivations
);
1144 vm_object_page_grab_reactivations
++;
1146 vm_page_unlock_queues();
1148 queue_remove(&object
->memq
, p
, vm_page_t
, listq
);
1149 queue_enter(&object
->memq
, p
, vm_page_t
, listq
);
1154 vm_page_lockspin_queues();
1156 vm_page_free_prepare_queues(p
);
1157 vm_object_page_grab_returned
++;
1158 vm_object_page_grab_skipped
+= p_skipped
;
1160 vm_page_unlock_queues();
1162 vm_page_free_prepare_object(p
, TRUE
);
1166 vm_object_page_grab_skipped
+= p_skipped
;
1167 vm_object_page_grab_failed
++;
1174 #define EVICT_PREPARE_LIMIT 64
1175 #define EVICT_AGE 10
1177 static clock_sec_t vm_object_cache_aging_ts
= 0;
1180 vm_object_cache_remove_locked(
1183 queue_remove(&vm_object_cached_list
, object
, vm_object_t
, objq
);
1184 object
->objq
.next
= NULL
;
1185 object
->objq
.prev
= NULL
;
1187 vm_object_cached_count
--;
1191 vm_object_cache_remove(
1194 vm_object_cache_lock_spin();
1196 if (object
->objq
.next
|| object
->objq
.prev
)
1197 vm_object_cache_remove_locked(object
);
1199 vm_object_cache_unlock();
1203 vm_object_cache_add(
1209 if (object
->resident_page_count
== 0)
1211 clock_get_system_nanotime(&sec
, &nsec
);
1213 vm_object_cache_lock_spin();
1215 if (object
->objq
.next
== NULL
&& object
->objq
.prev
== NULL
) {
1216 queue_enter(&vm_object_cached_list
, object
, vm_object_t
, objq
);
1217 object
->vo_cache_ts
= sec
+ EVICT_AGE
;
1218 object
->vo_cache_pages_to_scan
= object
->resident_page_count
;
1220 vm_object_cached_count
++;
1221 vm_object_cache_adds
++;
1223 vm_object_cache_unlock();
1227 vm_object_cache_evict(
1229 int max_objects_to_examine
)
1231 vm_object_t object
= VM_OBJECT_NULL
;
1232 vm_object_t next_obj
= VM_OBJECT_NULL
;
1233 vm_page_t local_free_q
= VM_PAGE_NULL
;
1237 vm_page_t ep_array
[EVICT_PREPARE_LIMIT
];
1243 uint32_t ep_skipped
= 0;
1247 KERNEL_DEBUG(0x13001ec | DBG_FUNC_START
, 0, 0, 0, 0, 0);
1249 * do a couple of quick checks to see if it's
1250 * worthwhile grabbing the lock
1252 if (queue_empty(&vm_object_cached_list
)) {
1253 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1256 clock_get_system_nanotime(&sec
, &nsec
);
1259 * the object on the head of the queue has not
1260 * yet sufficiently aged
1262 if (sec
< vm_object_cache_aging_ts
) {
1263 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1267 * don't need the queue lock to find
1268 * and lock an object on the cached list
1270 vm_page_unlock_queues();
1272 vm_object_cache_lock_spin();
1275 next_obj
= (vm_object_t
)queue_first(&vm_object_cached_list
);
1277 while (!queue_end(&vm_object_cached_list
, (queue_entry_t
)next_obj
) && object_cnt
++ < max_objects_to_examine
) {
1280 next_obj
= (vm_object_t
)queue_next(&next_obj
->objq
);
1282 if (sec
< object
->vo_cache_ts
) {
1283 KERNEL_DEBUG(0x130020c, object
, object
->resident_page_count
, object
->vo_cache_ts
, sec
, 0);
1285 vm_object_cache_aging_ts
= object
->vo_cache_ts
;
1286 object
= VM_OBJECT_NULL
;
1289 if (!vm_object_lock_try_scan(object
)) {
1291 * just skip over this guy for now... if we find
1292 * an object to steal pages from, we'll revist in a bit...
1293 * hopefully, the lock will have cleared
1295 KERNEL_DEBUG(0x13001f8, object
, object
->resident_page_count
, 0, 0, 0);
1297 object
= VM_OBJECT_NULL
;
1300 if (queue_empty(&object
->memq
) || object
->vo_cache_pages_to_scan
== 0) {
1302 * this case really shouldn't happen, but it's not fatal
1303 * so deal with it... if we don't remove the object from
1304 * the list, we'll never move past it.
1306 KERNEL_DEBUG(0x13001fc, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1308 vm_object_cache_remove_locked(object
);
1309 vm_object_unlock(object
);
1310 object
= VM_OBJECT_NULL
;
1314 * we have a locked object with pages...
1315 * time to start harvesting
1319 vm_object_cache_unlock();
1321 if (object
== VM_OBJECT_NULL
)
1325 * object is locked at this point and
1326 * has resident pages
1328 next_p
= (vm_page_t
)queue_first(&object
->memq
);
1331 * break the page scan into 2 pieces to minimize the time spent
1332 * behind the page queue lock...
1333 * the list of pages on these unused objects is likely to be cold
1334 * w/r to the cpu cache which increases the time to scan the list
1335 * tenfold... and we may have a 'run' of pages we can't utilize that
1336 * needs to be skipped over...
1338 if ((ep_limit
= num_to_evict
- (ep_freed
+ ep_moved
)) > EVICT_PREPARE_LIMIT
)
1339 ep_limit
= EVICT_PREPARE_LIMIT
;
1342 while (!queue_end(&object
->memq
, (queue_entry_t
)next_p
) && object
->vo_cache_pages_to_scan
&& ep_count
< ep_limit
) {
1345 next_p
= (vm_page_t
)queue_next(&next_p
->listq
);
1347 object
->vo_cache_pages_to_scan
--;
1349 if (VM_PAGE_WIRED(p
) || p
->busy
|| p
->cleaning
) {
1350 queue_remove(&object
->memq
, p
, vm_page_t
, listq
);
1351 queue_enter(&object
->memq
, p
, vm_page_t
, listq
);
1356 if (p
->wpmapped
|| p
->dirty
|| p
->precious
) {
1357 queue_remove(&object
->memq
, p
, vm_page_t
, listq
);
1358 queue_enter(&object
->memq
, p
, vm_page_t
, listq
);
1360 pmap_clear_reference(p
->phys_page
);
1362 ep_array
[ep_count
++] = p
;
1364 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START
, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1366 vm_page_lockspin_queues();
1368 for (ep_index
= 0; ep_index
< ep_count
; ep_index
++) {
1370 p
= ep_array
[ep_index
];
1372 if (p
->wpmapped
|| p
->dirty
|| p
->precious
) {
1373 p
->reference
= FALSE
;
1374 p
->no_cache
= FALSE
;
1376 VM_PAGE_QUEUES_REMOVE(p
);
1377 VM_PAGE_ENQUEUE_INACTIVE(p
, TRUE
);
1381 vm_page_free_prepare_queues(p
);
1383 assert(p
->pageq
.next
== NULL
&& p
->pageq
.prev
== NULL
);
1385 * Add this page to our list of reclaimed pages,
1386 * to be freed later.
1388 p
->pageq
.next
= (queue_entry_t
) local_free_q
;
1394 vm_page_unlock_queues();
1396 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END
, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1399 vm_page_free_list(local_free_q
, TRUE
);
1400 local_free_q
= VM_PAGE_NULL
;
1402 if (object
->vo_cache_pages_to_scan
== 0) {
1403 KERNEL_DEBUG(0x1300208, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1405 vm_object_cache_remove(object
);
1407 KERNEL_DEBUG(0x13001fc, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1410 * done with this object
1412 vm_object_unlock(object
);
1413 object
= VM_OBJECT_NULL
;
1416 * at this point, we are not holding any locks
1418 if ((ep_freed
+ ep_moved
) >= num_to_evict
) {
1420 * we've reached our target for the
1421 * number of pages to evict
1425 vm_object_cache_lock_spin();
1428 * put the page queues lock back to the caller's
1431 vm_page_lock_queues();
1433 vm_object_cache_pages_freed
+= ep_freed
;
1434 vm_object_cache_pages_moved
+= ep_moved
;
1435 vm_object_cache_pages_skipped
+= ep_skipped
;
1437 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END
, ep_freed
, 0, 0, 0, 0);
1444 * Check to see whether we really need to trim
1445 * down the cache. If so, remove an object from
1446 * the cache, terminate it, and repeat.
1448 * Called with, and returns with, cache lock unlocked.
1451 vm_object_cache_trim(
1452 boolean_t called_from_vm_object_deallocate
)
1454 register vm_object_t object
= VM_OBJECT_NULL
;
1460 * If we no longer need to trim the cache,
1463 if (vm_object_cached_count
<= vm_object_cached_max
)
1464 return VM_OBJECT_NULL
;
1466 vm_object_cache_lock();
1467 if (vm_object_cached_count
<= vm_object_cached_max
) {
1468 vm_object_cache_unlock();
1469 return VM_OBJECT_NULL
;
1473 * We must trim down the cache, so remove
1474 * the first object in the cache.
1477 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
1478 vm_object_cached_list
.next
,
1479 vm_object_cached_list
.prev
, 0, 0, 0);
1481 object
= (vm_object_t
) queue_first(&vm_object_cached_list
);
1482 if(object
== (vm_object_t
) &vm_object_cached_list
) {
1483 /* something's wrong with the calling parameter or */
1484 /* the value of vm_object_cached_count, just fix */
1486 if(vm_object_cached_max
< 0)
1487 vm_object_cached_max
= 0;
1488 vm_object_cached_count
= 0;
1489 vm_object_cache_unlock();
1490 return VM_OBJECT_NULL
;
1492 vm_object_lock(object
);
1493 queue_remove(&vm_object_cached_list
, object
, vm_object_t
,
1495 vm_object_cached_count
--;
1497 vm_object_cache_unlock();
1499 * Since this object is in the cache, we know
1500 * that it is initialized and has no references.
1501 * Take a reference to avoid recursive deallocations.
1504 assert(object
->pager_initialized
);
1505 assert(object
->ref_count
== 0);
1506 vm_object_lock_assert_exclusive(object
);
1507 object
->ref_count
++;
1510 * Terminate the object.
1511 * If the object had a shadow, we let vm_object_deallocate
1512 * deallocate it. "pageout" objects have a shadow, but
1513 * maintain a "paging reference" rather than a normal
1515 * (We are careful here to limit recursion.)
1517 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
1519 if(vm_object_terminate(object
) != KERN_SUCCESS
)
1522 if (shadow
!= VM_OBJECT_NULL
) {
1523 if (called_from_vm_object_deallocate
) {
1526 vm_object_deallocate(shadow
);
1535 * Routine: vm_object_terminate
1537 * Free all resources associated with a vm_object.
1538 * In/out conditions:
1539 * Upon entry, the object must be locked,
1540 * and the object must have exactly one reference.
1542 * The shadow object reference is left alone.
1544 * The object must be unlocked if its found that pages
1545 * must be flushed to a backing object. If someone
1546 * manages to map the object while it is being flushed
1547 * the object is returned unlocked and unchanged. Otherwise,
1548 * upon exit, the cache will be unlocked, and the
1549 * object will cease to exist.
1551 static kern_return_t
1552 vm_object_terminate(
1555 vm_object_t shadow_object
;
1557 XPR(XPR_VM_OBJECT
, "vm_object_terminate, object 0x%X ref %d\n",
1558 object
, object
->ref_count
, 0, 0, 0);
1560 if (!object
->pageout
&& (!object
->temporary
|| object
->can_persist
) &&
1561 (object
->pager
!= NULL
|| object
->shadow_severed
)) {
1563 * Clear pager_trusted bit so that the pages get yanked
1564 * out of the object instead of cleaned in place. This
1565 * prevents a deadlock in XMM and makes more sense anyway.
1567 object
->pager_trusted
= FALSE
;
1569 vm_object_reap_pages(object
, REAP_TERMINATE
);
1572 * Make sure the object isn't already being terminated
1574 if (object
->terminating
) {
1575 vm_object_lock_assert_exclusive(object
);
1576 object
->ref_count
--;
1577 assert(object
->ref_count
> 0);
1578 vm_object_unlock(object
);
1579 return KERN_FAILURE
;
1583 * Did somebody get a reference to the object while we were
1586 if (object
->ref_count
!= 1) {
1587 vm_object_lock_assert_exclusive(object
);
1588 object
->ref_count
--;
1589 assert(object
->ref_count
> 0);
1590 vm_object_res_deallocate(object
);
1591 vm_object_unlock(object
);
1592 return KERN_FAILURE
;
1596 * Make sure no one can look us up now.
1599 object
->terminating
= TRUE
;
1600 object
->alive
= FALSE
;
1602 if ( !object
->internal
&& (object
->objq
.next
|| object
->objq
.prev
))
1603 vm_object_cache_remove(object
);
1605 if (object
->hashed
) {
1608 lck
= vm_object_hash_lock_spin(object
->pager
);
1609 vm_object_remove(object
);
1610 vm_object_hash_unlock(lck
);
1613 * Detach the object from its shadow if we are the shadow's
1614 * copy. The reference we hold on the shadow must be dropped
1617 if (((shadow_object
= object
->shadow
) != VM_OBJECT_NULL
) &&
1618 !(object
->pageout
)) {
1619 vm_object_lock(shadow_object
);
1620 if (shadow_object
->copy
== object
)
1621 shadow_object
->copy
= VM_OBJECT_NULL
;
1622 vm_object_unlock(shadow_object
);
1625 if (object
->paging_in_progress
!= 0 ||
1626 object
->activity_in_progress
!= 0) {
1628 * There are still some paging_in_progress references
1629 * on this object, meaning that there are some paging
1630 * or other I/O operations in progress for this VM object.
1631 * Such operations take some paging_in_progress references
1632 * up front to ensure that the object doesn't go away, but
1633 * they may also need to acquire a reference on the VM object,
1634 * to map it in kernel space, for example. That means that
1635 * they may end up releasing the last reference on the VM
1636 * object, triggering its termination, while still holding
1637 * paging_in_progress references. Waiting for these
1638 * pending paging_in_progress references to go away here would
1641 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1642 * complete the VM object termination if it still holds
1643 * paging_in_progress references at this point.
1645 * No new paging_in_progress should appear now that the
1646 * VM object is "terminating" and not "alive".
1648 vm_object_reap_async(object
);
1649 vm_object_unlock(object
);
1651 * Return KERN_FAILURE to let the caller know that we
1652 * haven't completed the termination and it can't drop this
1653 * object's reference on its shadow object yet.
1654 * The reaper thread will take care of that once it has
1655 * completed this object's termination.
1657 return KERN_FAILURE
;
1660 * complete the VM object termination
1662 vm_object_reap(object
);
1663 object
= VM_OBJECT_NULL
;
1666 * the object lock was released by vm_object_reap()
1668 * KERN_SUCCESS means that this object has been terminated
1669 * and no longer needs its shadow object but still holds a
1671 * The caller is responsible for dropping that reference.
1672 * We can't call vm_object_deallocate() here because that
1673 * would create a recursion.
1675 return KERN_SUCCESS
;
1682 * Complete the termination of a VM object after it's been marked
1683 * as "terminating" and "!alive" by vm_object_terminate().
1685 * The VM object must be locked by caller.
1686 * The lock will be released on return and the VM object is no longer valid.
1692 memory_object_t pager
;
1694 vm_object_lock_assert_exclusive(object
);
1695 assert(object
->paging_in_progress
== 0);
1696 assert(object
->activity_in_progress
== 0);
1698 vm_object_reap_count
++;
1700 pager
= object
->pager
;
1701 object
->pager
= MEMORY_OBJECT_NULL
;
1703 if (pager
!= MEMORY_OBJECT_NULL
)
1704 memory_object_control_disable(object
->pager_control
);
1706 object
->ref_count
--;
1708 assert(object
->res_count
== 0);
1709 #endif /* TASK_SWAPPER */
1711 assert (object
->ref_count
== 0);
1714 * remove from purgeable queue if it's on
1716 if (object
->internal
&& (object
->objq
.next
|| object
->objq
.prev
)) {
1717 purgeable_q_t queue
= vm_purgeable_object_remove(object
);
1720 /* Must take page lock for this - using it to protect token queue */
1721 vm_page_lock_queues();
1722 vm_purgeable_token_delete_first(queue
);
1724 assert(queue
->debug_count_objects
>=0);
1725 vm_page_unlock_queues();
1729 * Clean or free the pages, as appropriate.
1730 * It is possible for us to find busy/absent pages,
1731 * if some faults on this object were aborted.
1733 if (object
->pageout
) {
1734 assert(object
->shadow
!= VM_OBJECT_NULL
);
1736 vm_pageout_object_terminate(object
);
1738 } else if (((object
->temporary
&& !object
->can_persist
) || (pager
== MEMORY_OBJECT_NULL
))) {
1740 vm_object_reap_pages(object
, REAP_REAP
);
1742 assert(queue_empty(&object
->memq
));
1743 assert(object
->paging_in_progress
== 0);
1744 assert(object
->activity_in_progress
== 0);
1745 assert(object
->ref_count
== 0);
1748 * If the pager has not already been released by
1749 * vm_object_destroy, we need to terminate it and
1750 * release our reference to it here.
1752 if (pager
!= MEMORY_OBJECT_NULL
) {
1753 vm_object_unlock(object
);
1754 vm_object_release_pager(pager
, object
->hashed
);
1755 vm_object_lock(object
);
1758 /* kick off anyone waiting on terminating */
1759 object
->terminating
= FALSE
;
1760 vm_object_paging_begin(object
);
1761 vm_object_paging_end(object
);
1762 vm_object_unlock(object
);
1765 vm_external_destroy(object
->existence_map
, object
->vo_size
);
1766 #endif /* MACH_PAGEMAP */
1768 object
->shadow
= VM_OBJECT_NULL
;
1770 vm_object_lock_destroy(object
);
1772 * Free the space for the object.
1774 zfree(vm_object_zone
, object
);
1775 object
= VM_OBJECT_NULL
;
1779 unsigned int vm_max_batch
= 256;
1781 #define V_O_R_MAX_BATCH 128
1783 #define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch)
1786 #define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \
1788 if (_local_free_q) { \
1789 if (do_disconnect) { \
1791 for (m = _local_free_q; \
1792 m != VM_PAGE_NULL; \
1793 m = (vm_page_t) m->pageq.next) { \
1795 pmap_disconnect(m->phys_page); \
1799 vm_page_free_list(_local_free_q, TRUE); \
1800 _local_free_q = VM_PAGE_NULL; \
1806 vm_object_reap_pages(
1812 vm_page_t local_free_q
= VM_PAGE_NULL
;
1814 boolean_t disconnect_on_release
;
1816 if (reap_type
== REAP_DATA_FLUSH
) {
1818 * We need to disconnect pages from all pmaps before
1819 * releasing them to the free list
1821 disconnect_on_release
= TRUE
;
1824 * Either the caller has already disconnected the pages
1825 * from all pmaps, or we disconnect them here as we add
1826 * them to out local list of pages to be released.
1827 * No need to re-disconnect them when we release the pages
1830 disconnect_on_release
= FALSE
;
1833 restart_after_sleep
:
1834 if (queue_empty(&object
->memq
))
1836 loop_count
= BATCH_LIMIT(V_O_R_MAX_BATCH
) + 1;
1838 vm_page_lockspin_queues();
1840 next
= (vm_page_t
)queue_first(&object
->memq
);
1842 while (!queue_end(&object
->memq
, (queue_entry_t
)next
)) {
1845 next
= (vm_page_t
)queue_next(&next
->listq
);
1847 if (--loop_count
== 0) {
1849 vm_page_unlock_queues();
1853 * Free the pages we reclaimed so far
1854 * and take a little break to avoid
1855 * hogging the page queue lock too long
1857 VM_OBJ_REAP_FREELIST(local_free_q
,
1858 disconnect_on_release
);
1862 loop_count
= BATCH_LIMIT(V_O_R_MAX_BATCH
) + 1;
1864 vm_page_lockspin_queues();
1866 if (reap_type
== REAP_DATA_FLUSH
|| reap_type
== REAP_TERMINATE
) {
1868 if (reap_type
== REAP_DATA_FLUSH
&&
1869 ((p
->pageout
== TRUE
|| p
->cleaning
== TRUE
) && p
->list_req_pending
== TRUE
)) {
1870 p
->list_req_pending
= FALSE
;
1871 p
->cleaning
= FALSE
;
1873 * need to drop the laundry count...
1874 * we may also need to remove it
1875 * from the I/O paging queue...
1876 * vm_pageout_throttle_up handles both cases
1878 * the laundry and pageout_queue flags are cleared...
1880 vm_pageout_throttle_up(p
);
1882 if (p
->pageout
== TRUE
) {
1884 * toss the wire count we picked up
1885 * when we initially set this page up
1886 * to be cleaned and stolen...
1888 vm_page_unwire(p
, TRUE
);
1893 } else if (p
->busy
|| p
->cleaning
) {
1895 vm_page_unlock_queues();
1897 * free the pages reclaimed so far
1899 VM_OBJ_REAP_FREELIST(local_free_q
,
1900 disconnect_on_release
);
1902 PAGE_SLEEP(object
, p
, THREAD_UNINT
);
1904 goto restart_after_sleep
;
1907 switch (reap_type
) {
1909 case REAP_DATA_FLUSH
:
1910 if (VM_PAGE_WIRED(p
)) {
1912 * this is an odd case... perhaps we should
1913 * zero-fill this page since we're conceptually
1914 * tossing its data at this point, but leaving
1915 * it on the object to honor the 'wire' contract
1921 case REAP_PURGEABLE
:
1922 if (VM_PAGE_WIRED(p
)) {
1923 /* can't purge a wired page */
1924 vm_page_purged_wired
++;
1930 * We can't reclaim a busy page but we can
1931 * make it pageable (it's not wired) to make
1932 * sure that it gets considered by
1933 * vm_pageout_scan() later.
1935 vm_page_deactivate(p
);
1936 vm_page_purged_busy
++;
1940 if (p
->cleaning
|| p
->laundry
|| p
->list_req_pending
) {
1942 * page is being acted upon,
1943 * so don't mess with it
1945 vm_page_purged_others
++;
1948 assert(p
->object
!= kernel_object
);
1951 * we can discard this page...
1953 if (p
->pmapped
== TRUE
) {
1958 refmod_state
= pmap_disconnect(p
->phys_page
);
1959 if (refmod_state
& VM_MEM_MODIFIED
) {
1963 if (p
->dirty
|| p
->precious
) {
1965 * we saved the cost of cleaning this page !
1967 vm_page_purged_count
++;
1972 case REAP_TERMINATE
:
1973 if (p
->absent
|| p
->private) {
1975 * For private pages, VM_PAGE_FREE just
1976 * leaves the page structure around for
1977 * its owner to clean up. For absent
1978 * pages, the structure is returned to
1979 * the appropriate pool.
1983 if (p
->fictitious
) {
1984 assert (p
->phys_page
== vm_page_guard_addr
);
1987 if (!p
->dirty
&& p
->wpmapped
)
1988 p
->dirty
= pmap_is_modified(p
->phys_page
);
1990 if ((p
->dirty
|| p
->precious
) && !p
->error
&& object
->alive
) {
1994 VM_PAGE_QUEUES_REMOVE(p
);
1996 * flush page... page will be freed
1997 * upon completion of I/O
1999 vm_pageout_cluster(p
);
2001 vm_page_unlock_queues();
2003 * free the pages reclaimed so far
2005 VM_OBJ_REAP_FREELIST(local_free_q
,
2006 disconnect_on_release
);
2008 vm_object_paging_wait(object
, THREAD_UNINT
);
2010 goto restart_after_sleep
;
2017 vm_page_free_prepare_queues(p
);
2018 assert(p
->pageq
.next
== NULL
&& p
->pageq
.prev
== NULL
);
2020 * Add this page to our list of reclaimed pages,
2021 * to be freed later.
2023 p
->pageq
.next
= (queue_entry_t
) local_free_q
;
2026 vm_page_unlock_queues();
2029 * Free the remaining reclaimed pages
2031 VM_OBJ_REAP_FREELIST(local_free_q
,
2032 disconnect_on_release
);
2037 vm_object_reap_async(
2040 vm_object_lock_assert_exclusive(object
);
2042 vm_object_reaper_lock_spin();
2044 vm_object_reap_count_async
++;
2046 /* enqueue the VM object... */
2047 queue_enter(&vm_object_reaper_queue
, object
,
2048 vm_object_t
, cached_list
);
2050 vm_object_reaper_unlock();
2052 /* ... and wake up the reaper thread */
2053 thread_wakeup((event_t
) &vm_object_reaper_queue
);
2058 vm_object_reaper_thread(void)
2060 vm_object_t object
, shadow_object
;
2062 vm_object_reaper_lock_spin();
2064 while (!queue_empty(&vm_object_reaper_queue
)) {
2065 queue_remove_first(&vm_object_reaper_queue
,
2070 vm_object_reaper_unlock();
2071 vm_object_lock(object
);
2073 assert(object
->terminating
);
2074 assert(!object
->alive
);
2077 * The pageout daemon might be playing with our pages.
2078 * Now that the object is dead, it won't touch any more
2079 * pages, but some pages might already be on their way out.
2080 * Hence, we wait until the active paging activities have
2081 * ceased before we break the association with the pager
2084 while (object
->paging_in_progress
!= 0 ||
2085 object
->activity_in_progress
!= 0) {
2086 vm_object_wait(object
,
2087 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
2089 vm_object_lock(object
);
2093 object
->pageout
? VM_OBJECT_NULL
: object
->shadow
;
2095 vm_object_reap(object
);
2096 /* cache is unlocked and object is no longer valid */
2097 object
= VM_OBJECT_NULL
;
2099 if (shadow_object
!= VM_OBJECT_NULL
) {
2101 * Drop the reference "object" was holding on
2102 * its shadow object.
2104 vm_object_deallocate(shadow_object
);
2105 shadow_object
= VM_OBJECT_NULL
;
2107 vm_object_reaper_lock_spin();
2110 /* wait for more work... */
2111 assert_wait((event_t
) &vm_object_reaper_queue
, THREAD_UNINT
);
2113 vm_object_reaper_unlock();
2115 thread_block((thread_continue_t
) vm_object_reaper_thread
);
2120 * Routine: vm_object_pager_wakeup
2121 * Purpose: Wake up anyone waiting for termination of a pager.
2125 vm_object_pager_wakeup(
2126 memory_object_t pager
)
2128 vm_object_hash_entry_t entry
;
2129 boolean_t waiting
= FALSE
;
2133 * If anyone was waiting for the memory_object_terminate
2134 * to be queued, wake them up now.
2136 lck
= vm_object_hash_lock_spin(pager
);
2137 entry
= vm_object_hash_lookup(pager
, TRUE
);
2138 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
2139 waiting
= entry
->waiting
;
2140 vm_object_hash_unlock(lck
);
2142 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
) {
2144 thread_wakeup((event_t
) pager
);
2145 vm_object_hash_entry_free(entry
);
2150 * Routine: vm_object_release_pager
2151 * Purpose: Terminate the pager and, upon completion,
2152 * release our last reference to it.
2153 * just like memory_object_terminate, except
2154 * that we wake up anyone blocked in vm_object_enter
2155 * waiting for termination message to be queued
2156 * before calling memory_object_init.
2159 vm_object_release_pager(
2160 memory_object_t pager
,
2165 * Terminate the pager.
2168 (void) memory_object_terminate(pager
);
2170 if (hashed
== TRUE
) {
2172 * Wakeup anyone waiting for this terminate
2173 * and remove the entry from the hash
2175 vm_object_pager_wakeup(pager
);
2178 * Release reference to pager.
2180 memory_object_deallocate(pager
);
2184 * Routine: vm_object_destroy
2186 * Shut down a VM object, despite the
2187 * presence of address map (or other) references
2193 __unused kern_return_t reason
)
2195 memory_object_t old_pager
;
2197 if (object
== VM_OBJECT_NULL
)
2198 return(KERN_SUCCESS
);
2201 * Remove the pager association immediately.
2203 * This will prevent the memory manager from further
2204 * meddling. [If it wanted to flush data or make
2205 * other changes, it should have done so before performing
2206 * the destroy call.]
2209 vm_object_lock(object
);
2210 object
->can_persist
= FALSE
;
2211 object
->named
= FALSE
;
2212 object
->alive
= FALSE
;
2214 if (object
->hashed
) {
2217 * Rip out the pager from the vm_object now...
2219 lck
= vm_object_hash_lock_spin(object
->pager
);
2220 vm_object_remove(object
);
2221 vm_object_hash_unlock(lck
);
2223 old_pager
= object
->pager
;
2224 object
->pager
= MEMORY_OBJECT_NULL
;
2225 if (old_pager
!= MEMORY_OBJECT_NULL
)
2226 memory_object_control_disable(object
->pager_control
);
2229 * Wait for the existing paging activity (that got
2230 * through before we nulled out the pager) to subside.
2233 vm_object_paging_wait(object
, THREAD_UNINT
);
2234 vm_object_unlock(object
);
2237 * Terminate the object now.
2239 if (old_pager
!= MEMORY_OBJECT_NULL
) {
2240 vm_object_release_pager(old_pager
, object
->hashed
);
2243 * JMM - Release the caller's reference. This assumes the
2244 * caller had a reference to release, which is a big (but
2245 * currently valid) assumption if this is driven from the
2246 * vnode pager (it is holding a named reference when making
2249 vm_object_deallocate(object
);
2252 return(KERN_SUCCESS
);
2258 #define VM_OBJ_DEACT_ALL_STATS DEBUG
2259 #if VM_OBJ_DEACT_ALL_STATS
2260 uint32_t vm_object_deactivate_all_pages_batches
= 0;
2261 uint32_t vm_object_deactivate_all_pages_pages
= 0;
2262 #endif /* VM_OBJ_DEACT_ALL_STATS */
2264 * vm_object_deactivate_all_pages
2266 * Deactivate all pages in the specified object. (Keep its pages
2267 * in memory even though it is no longer referenced.)
2269 * The object must be locked.
2272 vm_object_deactivate_all_pages(
2273 register vm_object_t object
)
2275 register vm_page_t p
;
2277 #if VM_OBJ_DEACT_ALL_STATS
2279 #endif /* VM_OBJ_DEACT_ALL_STATS */
2280 #define V_O_D_A_P_MAX_BATCH 256
2282 loop_count
= BATCH_LIMIT(V_O_D_A_P_MAX_BATCH
);
2283 #if VM_OBJ_DEACT_ALL_STATS
2285 #endif /* VM_OBJ_DEACT_ALL_STATS */
2286 vm_page_lock_queues();
2287 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
2288 if (--loop_count
== 0) {
2289 #if VM_OBJ_DEACT_ALL_STATS
2290 hw_atomic_add(&vm_object_deactivate_all_pages_batches
,
2292 hw_atomic_add(&vm_object_deactivate_all_pages_pages
,
2295 #endif /* VM_OBJ_DEACT_ALL_STATS */
2296 lck_mtx_yield(&vm_page_queue_lock
);
2297 loop_count
= BATCH_LIMIT(V_O_D_A_P_MAX_BATCH
);
2299 if (!p
->busy
&& !p
->throttled
) {
2300 #if VM_OBJ_DEACT_ALL_STATS
2302 #endif /* VM_OBJ_DEACT_ALL_STATS */
2303 vm_page_deactivate(p
);
2306 #if VM_OBJ_DEACT_ALL_STATS
2308 hw_atomic_add(&vm_object_deactivate_all_pages_batches
, 1);
2309 hw_atomic_add(&vm_object_deactivate_all_pages_pages
,
2313 #endif /* VM_OBJ_DEACT_ALL_STATS */
2314 vm_page_unlock_queues();
2316 #endif /* VM_OBJECT_CACHE */
2321 * The "chunk" macros are used by routines below when looking for pages to deactivate. These
2322 * exist because of the need to handle shadow chains. When deactivating pages, we only
2323 * want to deactive the ones at the top most level in the object chain. In order to do
2324 * this efficiently, the specified address range is divided up into "chunks" and we use
2325 * a bit map to keep track of which pages have already been processed as we descend down
2326 * the shadow chain. These chunk macros hide the details of the bit map implementation
2327 * as much as we can.
2329 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is
2330 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest
2331 * order bit represents page 0 in the current range and highest order bit represents
2334 * For further convenience, we also use negative logic for the page state in the bit map.
2335 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has
2336 * been processed. This way we can simply test the 64-bit long word to see if it's zero
2337 * to easily tell if the whole range has been processed. Therefore, the bit map starts
2338 * out with all the bits set. The macros below hide all these details from the caller.
2341 #define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */
2342 /* be the same as the number of bits in */
2343 /* the chunk_state_t type. We use 64 */
2344 /* just for convenience. */
2346 #define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */
2348 typedef uint64_t chunk_state_t
;
2351 * The bit map uses negative logic, so we start out with all 64 bits set to indicate
2352 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE,
2353 * then we mark pages beyond the len as having been "processed" so that we don't waste time
2354 * looking at pages in that range. This can save us from unnecessarily chasing down the
2358 #define CHUNK_INIT(c, len) \
2362 (c) = 0xffffffffffffffffLL; \
2364 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \
2365 MARK_PAGE_HANDLED(c, p); \
2370 * Return true if all pages in the chunk have not yet been processed.
2373 #define CHUNK_NOT_COMPLETE(c) ((c) != 0)
2376 * Return true if the page at offset 'p' in the bit map has already been handled
2377 * while processing a higher level object in the shadow chain.
2380 #define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0)
2383 * Mark the page at offset 'p' in the bit map as having been processed.
2386 #define MARK_PAGE_HANDLED(c, p) \
2388 (c) = (c) & ~(1LL << (p)); \
2393 * Return true if the page at the given offset has been paged out. Object is
2394 * locked upon entry and returned locked.
2400 vm_object_offset_t offset
)
2403 memory_object_t pager
;
2406 * Check the existence map for the page if we have one, otherwise
2407 * ask the pager about this page.
2411 if (object
->existence_map
) {
2412 if (vm_external_state_get(object
->existence_map
, offset
)
2413 == VM_EXTERNAL_STATE_EXISTS
) {
2422 if (object
->internal
&&
2424 !object
->terminating
&&
2425 object
->pager_ready
) {
2428 * We're already holding a "paging in progress" reference
2429 * so the object can't disappear when we release the lock.
2432 assert(object
->paging_in_progress
);
2433 pager
= object
->pager
;
2434 vm_object_unlock(object
);
2436 kr
= memory_object_data_request(
2438 offset
+ object
->paging_offset
,
2439 0, /* just poke the pager */
2443 vm_object_lock(object
);
2445 if (kr
== KERN_SUCCESS
) {
2461 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any
2462 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify
2463 * a size that is less than or equal to the CHUNK_SIZE.
2467 deactivate_pages_in_object(
2469 vm_object_offset_t offset
,
2470 vm_object_size_t size
,
2471 boolean_t kill_page
,
2472 boolean_t reusable_page
,
2476 boolean_t all_reusable
,
2477 chunk_state_t
*chunk_state
)
2481 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
2482 struct vm_page_delayed_work
*dwp
;
2485 unsigned int reusable
= 0;
2489 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the
2490 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may
2491 * have pages marked as having been processed already. We stop the loop early if we find we've handled
2492 * all the pages in the chunk.
2497 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
2499 for(p
= 0; size
&& CHUNK_NOT_COMPLETE(*chunk_state
); p
++, size
-= PAGE_SIZE_64
, offset
+= PAGE_SIZE_64
) {
2502 * If this offset has already been found and handled in a higher level object, then don't
2503 * do anything with it in the current shadow object.
2506 if (PAGE_ALREADY_HANDLED(*chunk_state
, p
))
2510 * See if the page at this offset is around. First check to see if the page is resident,
2511 * then if not, check the existence map or with the pager.
2514 if ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
2517 * We found a page we were looking for. Mark it as "handled" now in the chunk_state
2518 * so that we won't bother looking for a page at this offset again if there are more
2519 * shadow objects. Then deactivate the page.
2522 MARK_PAGE_HANDLED(*chunk_state
, p
);
2524 if (( !VM_PAGE_WIRED(m
)) && (!m
->private) && (!m
->gobbled
) && (!m
->busy
)) {
2527 assert(!m
->laundry
);
2529 clear_refmod
= VM_MEM_REFERENCED
;
2530 dwp
->dw_mask
= DW_clear_reference
;
2532 if ((kill_page
) && (object
->internal
)) {
2533 m
->precious
= FALSE
;
2536 clear_refmod
|= VM_MEM_MODIFIED
;
2539 * This page is now clean and
2540 * reclaimable. Move it out
2541 * of the throttled queue, so
2542 * that vm_pageout_scan() can
2545 dwp
->dw_mask
|= DW_move_page
;
2548 vm_external_state_clr(object
->existence_map
, offset
);
2549 #endif /* MACH_PAGEMAP */
2551 if (reusable_page
&& !m
->reusable
) {
2552 assert(!all_reusable
);
2553 assert(!object
->all_reusable
);
2555 object
->reusable_page_count
++;
2556 assert(object
->resident_page_count
>= object
->reusable_page_count
);
2560 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
2562 if (!m
->throttled
&& !(reusable_page
|| all_reusable
))
2563 dwp
->dw_mask
|= DW_move_page
;
2565 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
2567 if (dw_count
>= dw_limit
) {
2569 OSAddAtomic(reusable
,
2570 &vm_page_stats_reusable
.reusable_count
);
2571 vm_page_stats_reusable
.reusable
+= reusable
;
2574 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
2584 * The page at this offset isn't memory resident, check to see if it's
2585 * been paged out. If so, mark it as handled so we don't bother looking
2586 * for it in the shadow chain.
2589 if (page_is_paged_out(object
, offset
)) {
2590 MARK_PAGE_HANDLED(*chunk_state
, p
);
2593 * If we're killing a non-resident page, then clear the page in the existence
2594 * map so we don't bother paging it back in if it's touched again in the future.
2597 if ((kill_page
) && (object
->internal
)) {
2599 vm_external_state_clr(object
->existence_map
, offset
);
2600 #endif /* MACH_PAGEMAP */
2607 OSAddAtomic(reusable
, &vm_page_stats_reusable
.reusable_count
);
2608 vm_page_stats_reusable
.reusable
+= reusable
;
2613 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
2618 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk"
2619 * will always be less than or equal to the given size. The total range is divided up
2620 * into chunks for efficiency and performance related to the locks and handling the shadow
2621 * chain. This routine returns how much of the given "size" it actually processed. It's
2622 * up to the caler to loop and keep calling this routine until the entire range they want
2623 * to process has been done.
2626 static vm_object_size_t
2628 vm_object_t orig_object
,
2629 vm_object_offset_t offset
,
2630 vm_object_size_t size
,
2631 boolean_t kill_page
,
2632 boolean_t reusable_page
,
2633 boolean_t all_reusable
)
2636 vm_object_t tmp_object
;
2637 vm_object_size_t length
;
2638 chunk_state_t chunk_state
;
2642 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the
2643 * remaining size the caller asked for.
2646 length
= MIN(size
, CHUNK_SIZE
);
2649 * The chunk_state keeps track of which pages we've already processed if there's
2650 * a shadow chain on this object. At this point, we haven't done anything with this
2651 * range of pages yet, so initialize the state to indicate no pages processed yet.
2654 CHUNK_INIT(chunk_state
, length
);
2655 object
= orig_object
;
2658 * Start at the top level object and iterate around the loop once for each object
2659 * in the shadow chain. We stop processing early if we've already found all the pages
2660 * in the range. Otherwise we stop when we run out of shadow objects.
2663 while (object
&& CHUNK_NOT_COMPLETE(chunk_state
)) {
2664 vm_object_paging_begin(object
);
2666 deactivate_pages_in_object(object
, offset
, length
, kill_page
, reusable_page
, all_reusable
, &chunk_state
);
2668 vm_object_paging_end(object
);
2671 * We've finished with this object, see if there's a shadow object. If
2672 * there is, update the offset and lock the new object. We also turn off
2673 * kill_page at this point since we only kill pages in the top most object.
2676 tmp_object
= object
->shadow
;
2680 reusable_page
= FALSE
;
2681 all_reusable
= FALSE
;
2682 offset
+= object
->vo_shadow_offset
;
2683 vm_object_lock(tmp_object
);
2686 if (object
!= orig_object
)
2687 vm_object_unlock(object
);
2689 object
= tmp_object
;
2692 if (object
&& object
!= orig_object
)
2693 vm_object_unlock(object
);
2701 * Move any resident pages in the specified range to the inactive queue. If kill_page is set,
2702 * we also clear the modified status of the page and "forget" any changes that have been made
2706 __private_extern__
void
2707 vm_object_deactivate_pages(
2709 vm_object_offset_t offset
,
2710 vm_object_size_t size
,
2711 boolean_t kill_page
,
2712 boolean_t reusable_page
)
2714 vm_object_size_t length
;
2715 boolean_t all_reusable
;
2718 * We break the range up into chunks and do one chunk at a time. This is for
2719 * efficiency and performance while handling the shadow chains and the locks.
2720 * The deactivate_a_chunk() function returns how much of the range it processed.
2721 * We keep calling this routine until the given size is exhausted.
2725 all_reusable
= FALSE
;
2726 if (reusable_page
&&
2728 object
->vo_size
!= 0 &&
2729 object
->vo_size
== size
&&
2730 object
->reusable_page_count
== 0) {
2731 all_reusable
= TRUE
;
2732 reusable_page
= FALSE
;
2735 if ((reusable_page
|| all_reusable
) && object
->all_reusable
) {
2736 /* This means MADV_FREE_REUSABLE has been called twice, which
2737 * is probably illegal. */
2742 length
= deactivate_a_chunk(object
, offset
, size
, kill_page
, reusable_page
, all_reusable
);
2749 if (!object
->all_reusable
) {
2750 unsigned int reusable
;
2752 object
->all_reusable
= TRUE
;
2753 assert(object
->reusable_page_count
== 0);
2754 /* update global stats */
2755 reusable
= object
->resident_page_count
;
2756 OSAddAtomic(reusable
,
2757 &vm_page_stats_reusable
.reusable_count
);
2758 vm_page_stats_reusable
.reusable
+= reusable
;
2759 vm_page_stats_reusable
.all_reusable_calls
++;
2761 } else if (reusable_page
) {
2762 vm_page_stats_reusable
.partial_reusable_calls
++;
2767 vm_object_reuse_pages(
2769 vm_object_offset_t start_offset
,
2770 vm_object_offset_t end_offset
,
2771 boolean_t allow_partial_reuse
)
2773 vm_object_offset_t cur_offset
;
2775 unsigned int reused
, reusable
;
2777 #define VM_OBJECT_REUSE_PAGE(object, m, reused) \
2779 if ((m) != VM_PAGE_NULL && \
2781 assert((object)->reusable_page_count <= \
2782 (object)->resident_page_count); \
2783 assert((object)->reusable_page_count > 0); \
2784 (object)->reusable_page_count--; \
2785 (m)->reusable = FALSE; \
2793 vm_object_lock_assert_exclusive(object
);
2795 if (object
->all_reusable
) {
2796 assert(object
->reusable_page_count
== 0);
2797 object
->all_reusable
= FALSE
;
2798 if (end_offset
- start_offset
== object
->vo_size
||
2799 !allow_partial_reuse
) {
2800 vm_page_stats_reusable
.all_reuse_calls
++;
2801 reused
= object
->resident_page_count
;
2803 vm_page_stats_reusable
.partial_reuse_calls
++;
2804 queue_iterate(&object
->memq
, m
, vm_page_t
, listq
) {
2805 if (m
->offset
< start_offset
||
2806 m
->offset
>= end_offset
) {
2808 object
->reusable_page_count
++;
2809 assert(object
->resident_page_count
>= object
->reusable_page_count
);
2812 assert(!m
->reusable
);
2817 } else if (object
->resident_page_count
>
2818 ((end_offset
- start_offset
) >> PAGE_SHIFT
)) {
2819 vm_page_stats_reusable
.partial_reuse_calls
++;
2820 for (cur_offset
= start_offset
;
2821 cur_offset
< end_offset
;
2822 cur_offset
+= PAGE_SIZE_64
) {
2823 if (object
->reusable_page_count
== 0) {
2826 m
= vm_page_lookup(object
, cur_offset
);
2827 VM_OBJECT_REUSE_PAGE(object
, m
, reused
);
2830 vm_page_stats_reusable
.partial_reuse_calls
++;
2831 queue_iterate(&object
->memq
, m
, vm_page_t
, listq
) {
2832 if (object
->reusable_page_count
== 0) {
2835 if (m
->offset
< start_offset
||
2836 m
->offset
>= end_offset
) {
2839 VM_OBJECT_REUSE_PAGE(object
, m
, reused
);
2843 /* update global stats */
2844 OSAddAtomic(reusable
-reused
, &vm_page_stats_reusable
.reusable_count
);
2845 vm_page_stats_reusable
.reused
+= reused
;
2846 vm_page_stats_reusable
.reusable
+= reusable
;
2850 * Routine: vm_object_pmap_protect
2853 * Reduces the permission for all physical
2854 * pages in the specified object range.
2856 * If removing write permission only, it is
2857 * sufficient to protect only the pages in
2858 * the top-level object; only those pages may
2859 * have write permission.
2861 * If removing all access, we must follow the
2862 * shadow chain from the top-level object to
2863 * remove access to all pages in shadowed objects.
2865 * The object must *not* be locked. The object must
2866 * be temporary/internal.
2868 * If pmap is not NULL, this routine assumes that
2869 * the only mappings for the pages are in that
2873 __private_extern__
void
2874 vm_object_pmap_protect(
2875 register vm_object_t object
,
2876 register vm_object_offset_t offset
,
2877 vm_object_size_t size
,
2879 vm_map_offset_t pmap_start
,
2882 if (object
== VM_OBJECT_NULL
)
2884 size
= vm_object_round_page(size
);
2885 offset
= vm_object_trunc_page(offset
);
2887 vm_object_lock(object
);
2889 if (object
->phys_contiguous
) {
2891 vm_object_unlock(object
);
2892 pmap_protect(pmap
, pmap_start
, pmap_start
+ size
, prot
);
2894 vm_object_offset_t phys_start
, phys_end
, phys_addr
;
2896 phys_start
= object
->vo_shadow_offset
+ offset
;
2897 phys_end
= phys_start
+ size
;
2898 assert(phys_start
<= phys_end
);
2899 assert(phys_end
<= object
->vo_shadow_offset
+ object
->vo_size
);
2900 vm_object_unlock(object
);
2902 for (phys_addr
= phys_start
;
2903 phys_addr
< phys_end
;
2904 phys_addr
+= PAGE_SIZE_64
) {
2905 pmap_page_protect((ppnum_t
) (phys_addr
>> PAGE_SHIFT
), prot
);
2911 assert(object
->internal
);
2914 if (ptoa_64(object
->resident_page_count
) > size
/2 && pmap
!= PMAP_NULL
) {
2915 vm_object_unlock(object
);
2916 pmap_protect(pmap
, pmap_start
, pmap_start
+ size
, prot
);
2920 /* if we are doing large ranges with respect to resident */
2921 /* page count then we should interate over pages otherwise */
2922 /* inverse page look-up will be faster */
2923 if (ptoa_64(object
->resident_page_count
/ 4) < size
) {
2925 vm_object_offset_t end
;
2927 end
= offset
+ size
;
2929 if (pmap
!= PMAP_NULL
) {
2930 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
2931 if (!p
->fictitious
&&
2932 (offset
<= p
->offset
) && (p
->offset
< end
)) {
2933 vm_map_offset_t start
;
2935 start
= pmap_start
+ p
->offset
- offset
;
2936 pmap_protect(pmap
, start
, start
+ PAGE_SIZE_64
, prot
);
2940 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
2941 if (!p
->fictitious
&&
2942 (offset
<= p
->offset
) && (p
->offset
< end
)) {
2944 pmap_page_protect(p
->phys_page
, prot
);
2950 vm_object_offset_t end
;
2951 vm_object_offset_t target_off
;
2953 end
= offset
+ size
;
2955 if (pmap
!= PMAP_NULL
) {
2956 for(target_off
= offset
;
2958 target_off
+= PAGE_SIZE
) {
2959 p
= vm_page_lookup(object
, target_off
);
2960 if (p
!= VM_PAGE_NULL
) {
2961 vm_object_offset_t start
;
2962 start
= pmap_start
+
2963 (p
->offset
- offset
);
2964 pmap_protect(pmap
, start
,
2965 start
+ PAGE_SIZE
, prot
);
2969 for(target_off
= offset
;
2970 target_off
< end
; target_off
+= PAGE_SIZE
) {
2971 p
= vm_page_lookup(object
, target_off
);
2972 if (p
!= VM_PAGE_NULL
) {
2973 pmap_page_protect(p
->phys_page
, prot
);
2979 if (prot
== VM_PROT_NONE
) {
2981 * Must follow shadow chain to remove access
2982 * to pages in shadowed objects.
2984 register vm_object_t next_object
;
2986 next_object
= object
->shadow
;
2987 if (next_object
!= VM_OBJECT_NULL
) {
2988 offset
+= object
->vo_shadow_offset
;
2989 vm_object_lock(next_object
);
2990 vm_object_unlock(object
);
2991 object
= next_object
;
2995 * End of chain - we are done.
3002 * Pages in shadowed objects may never have
3003 * write permission - we may stop here.
3009 vm_object_unlock(object
);
3013 * Routine: vm_object_copy_slowly
3016 * Copy the specified range of the source
3017 * virtual memory object without using
3018 * protection-based optimizations (such
3019 * as copy-on-write). The pages in the
3020 * region are actually copied.
3022 * In/out conditions:
3023 * The caller must hold a reference and a lock
3024 * for the source virtual memory object. The source
3025 * object will be returned *unlocked*.
3028 * If the copy is completed successfully, KERN_SUCCESS is
3029 * returned. If the caller asserted the interruptible
3030 * argument, and an interruption occurred while waiting
3031 * for a user-generated event, MACH_SEND_INTERRUPTED is
3032 * returned. Other values may be returned to indicate
3033 * hard errors during the copy operation.
3035 * A new virtual memory object is returned in a
3036 * parameter (_result_object). The contents of this
3037 * new object, starting at a zero offset, are a copy
3038 * of the source memory region. In the event of
3039 * an error, this parameter will contain the value
3042 __private_extern__ kern_return_t
3043 vm_object_copy_slowly(
3044 register vm_object_t src_object
,
3045 vm_object_offset_t src_offset
,
3046 vm_object_size_t size
,
3047 boolean_t interruptible
,
3048 vm_object_t
*_result_object
) /* OUT */
3050 vm_object_t new_object
;
3051 vm_object_offset_t new_offset
;
3053 struct vm_object_fault_info fault_info
;
3055 XPR(XPR_VM_OBJECT
, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
3056 src_object
, src_offset
, size
, 0, 0);
3059 vm_object_unlock(src_object
);
3060 *_result_object
= VM_OBJECT_NULL
;
3061 return(KERN_INVALID_ARGUMENT
);
3065 * Prevent destruction of the source object while we copy.
3068 vm_object_reference_locked(src_object
);
3069 vm_object_unlock(src_object
);
3072 * Create a new object to hold the copied pages.
3074 * We fill the new object starting at offset 0,
3075 * regardless of the input offset.
3076 * We don't bother to lock the new object within
3077 * this routine, since we have the only reference.
3080 new_object
= vm_object_allocate(size
);
3083 assert(size
== trunc_page_64(size
)); /* Will the loop terminate? */
3085 fault_info
.interruptible
= interruptible
;
3086 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
3087 fault_info
.user_tag
= 0;
3088 fault_info
.lo_offset
= src_offset
;
3089 fault_info
.hi_offset
= src_offset
+ size
;
3090 fault_info
.no_cache
= FALSE
;
3091 fault_info
.stealth
= TRUE
;
3092 fault_info
.io_sync
= FALSE
;
3093 fault_info
.cs_bypass
= FALSE
;
3094 fault_info
.mark_zf_absent
= FALSE
;
3098 src_offset
+= PAGE_SIZE_64
,
3099 new_offset
+= PAGE_SIZE_64
, size
-= PAGE_SIZE_64
3102 vm_fault_return_t result
;
3104 vm_object_lock(new_object
);
3106 while ((new_page
= vm_page_alloc(new_object
, new_offset
))
3109 vm_object_unlock(new_object
);
3111 if (!vm_page_wait(interruptible
)) {
3112 vm_object_deallocate(new_object
);
3113 vm_object_deallocate(src_object
);
3114 *_result_object
= VM_OBJECT_NULL
;
3115 return(MACH_SEND_INTERRUPTED
);
3117 vm_object_lock(new_object
);
3119 vm_object_unlock(new_object
);
3122 vm_prot_t prot
= VM_PROT_READ
;
3123 vm_page_t _result_page
;
3126 vm_page_t result_page
;
3127 kern_return_t error_code
;
3129 vm_object_lock(src_object
);
3130 vm_object_paging_begin(src_object
);
3132 if (size
> (vm_size_t
) -1) {
3133 /* 32-bit overflow */
3134 fault_info
.cluster_size
= (vm_size_t
) (0 - PAGE_SIZE
);
3136 fault_info
.cluster_size
= (vm_size_t
) size
;
3137 assert(fault_info
.cluster_size
== size
);
3140 XPR(XPR_VM_FAULT
,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
3141 result
= vm_fault_page(src_object
, src_offset
,
3142 VM_PROT_READ
, FALSE
,
3143 &prot
, &_result_page
, &top_page
,
3145 &error_code
, FALSE
, FALSE
, &fault_info
);
3148 case VM_FAULT_SUCCESS
:
3149 result_page
= _result_page
;
3152 * We don't need to hold the object
3153 * lock -- the busy page will be enough.
3154 * [We don't care about picking up any
3155 * new modifications.]
3157 * Copy the page to the new object.
3160 * If result_page is clean,
3161 * we could steal it instead
3165 vm_object_unlock(result_page
->object
);
3166 vm_page_copy(result_page
, new_page
);
3169 * Let go of both pages (make them
3170 * not busy, perform wakeup, activate).
3172 vm_object_lock(new_object
);
3173 new_page
->dirty
= TRUE
;
3174 PAGE_WAKEUP_DONE(new_page
);
3175 vm_object_unlock(new_object
);
3177 vm_object_lock(result_page
->object
);
3178 PAGE_WAKEUP_DONE(result_page
);
3180 vm_page_lockspin_queues();
3181 if (!result_page
->active
&&
3182 !result_page
->inactive
&&
3183 !result_page
->throttled
)
3184 vm_page_activate(result_page
);
3185 vm_page_activate(new_page
);
3186 vm_page_unlock_queues();
3189 * Release paging references and
3190 * top-level placeholder page, if any.
3193 vm_fault_cleanup(result_page
->object
,
3198 case VM_FAULT_RETRY
:
3201 case VM_FAULT_MEMORY_SHORTAGE
:
3202 if (vm_page_wait(interruptible
))
3206 case VM_FAULT_INTERRUPTED
:
3207 vm_object_lock(new_object
);
3208 VM_PAGE_FREE(new_page
);
3209 vm_object_unlock(new_object
);
3211 vm_object_deallocate(new_object
);
3212 vm_object_deallocate(src_object
);
3213 *_result_object
= VM_OBJECT_NULL
;
3214 return(MACH_SEND_INTERRUPTED
);
3216 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
3217 /* success but no VM page: fail */
3218 vm_object_paging_end(src_object
);
3219 vm_object_unlock(src_object
);
3221 case VM_FAULT_MEMORY_ERROR
:
3224 * (a) ignore pages that we can't
3226 * (b) return the null object if
3227 * any page fails [chosen]
3230 vm_object_lock(new_object
);
3231 VM_PAGE_FREE(new_page
);
3232 vm_object_unlock(new_object
);
3234 vm_object_deallocate(new_object
);
3235 vm_object_deallocate(src_object
);
3236 *_result_object
= VM_OBJECT_NULL
;
3237 return(error_code
? error_code
:
3241 panic("vm_object_copy_slowly: unexpected error"
3242 " 0x%x from vm_fault_page()\n", result
);
3244 } while (result
!= VM_FAULT_SUCCESS
);
3248 * Lose the extra reference, and return our object.
3250 vm_object_deallocate(src_object
);
3251 *_result_object
= new_object
;
3252 return(KERN_SUCCESS
);
3256 * Routine: vm_object_copy_quickly
3259 * Copy the specified range of the source virtual
3260 * memory object, if it can be done without waiting
3261 * for user-generated events.
3264 * If the copy is successful, the copy is returned in
3265 * the arguments; otherwise, the arguments are not
3268 * In/out conditions:
3269 * The object should be unlocked on entry and exit.
3273 __private_extern__ boolean_t
3274 vm_object_copy_quickly(
3275 vm_object_t
*_object
, /* INOUT */
3276 __unused vm_object_offset_t offset
, /* IN */
3277 __unused vm_object_size_t size
, /* IN */
3278 boolean_t
*_src_needs_copy
, /* OUT */
3279 boolean_t
*_dst_needs_copy
) /* OUT */
3281 vm_object_t object
= *_object
;
3282 memory_object_copy_strategy_t copy_strategy
;
3284 XPR(XPR_VM_OBJECT
, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
3285 *_object
, offset
, size
, 0, 0);
3286 if (object
== VM_OBJECT_NULL
) {
3287 *_src_needs_copy
= FALSE
;
3288 *_dst_needs_copy
= FALSE
;
3292 vm_object_lock(object
);
3294 copy_strategy
= object
->copy_strategy
;
3296 switch (copy_strategy
) {
3297 case MEMORY_OBJECT_COPY_SYMMETRIC
:
3300 * Symmetric copy strategy.
3301 * Make another reference to the object.
3302 * Leave object/offset unchanged.
3305 vm_object_reference_locked(object
);
3306 object
->shadowed
= TRUE
;
3307 vm_object_unlock(object
);
3310 * Both source and destination must make
3311 * shadows, and the source must be made
3312 * read-only if not already.
3315 *_src_needs_copy
= TRUE
;
3316 *_dst_needs_copy
= TRUE
;
3320 case MEMORY_OBJECT_COPY_DELAY
:
3321 vm_object_unlock(object
);
3325 vm_object_unlock(object
);
3331 static int copy_call_count
= 0;
3332 static int copy_call_sleep_count
= 0;
3333 static int copy_call_restart_count
= 0;
3336 * Routine: vm_object_copy_call [internal]
3339 * Copy the source object (src_object), using the
3340 * user-managed copy algorithm.
3342 * In/out conditions:
3343 * The source object must be locked on entry. It
3344 * will be *unlocked* on exit.
3347 * If the copy is successful, KERN_SUCCESS is returned.
3348 * A new object that represents the copied virtual
3349 * memory is returned in a parameter (*_result_object).
3350 * If the return value indicates an error, this parameter
3353 static kern_return_t
3354 vm_object_copy_call(
3355 vm_object_t src_object
,
3356 vm_object_offset_t src_offset
,
3357 vm_object_size_t size
,
3358 vm_object_t
*_result_object
) /* OUT */
3362 boolean_t check_ready
= FALSE
;
3363 uint32_t try_failed_count
= 0;
3366 * If a copy is already in progress, wait and retry.
3369 * Consider making this call interruptable, as Mike
3370 * intended it to be.
3373 * Need a counter or version or something to allow
3374 * us to use the copy that the currently requesting
3375 * thread is obtaining -- is it worth adding to the
3376 * vm object structure? Depends how common this case it.
3379 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
3380 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
3382 copy_call_restart_count
++;
3386 * Indicate (for the benefit of memory_object_create_copy)
3387 * that we want a copy for src_object. (Note that we cannot
3388 * do a real assert_wait before calling memory_object_copy,
3389 * so we simply set the flag.)
3392 vm_object_set_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
);
3393 vm_object_unlock(src_object
);
3396 * Ask the memory manager to give us a memory object
3397 * which represents a copy of the src object.
3398 * The memory manager may give us a memory object
3399 * which we already have, or it may give us a
3400 * new memory object. This memory object will arrive
3401 * via memory_object_create_copy.
3404 kr
= KERN_FAILURE
; /* XXX need to change memory_object.defs */
3405 if (kr
!= KERN_SUCCESS
) {
3410 * Wait for the copy to arrive.
3412 vm_object_lock(src_object
);
3413 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
3414 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
3416 copy_call_sleep_count
++;
3419 assert(src_object
->copy
!= VM_OBJECT_NULL
);
3420 copy
= src_object
->copy
;
3421 if (!vm_object_lock_try(copy
)) {
3422 vm_object_unlock(src_object
);
3425 mutex_pause(try_failed_count
); /* wait a bit */
3427 vm_object_lock(src_object
);
3430 if (copy
->vo_size
< src_offset
+size
)
3431 copy
->vo_size
= src_offset
+size
;
3433 if (!copy
->pager_ready
)
3439 *_result_object
= copy
;
3440 vm_object_unlock(copy
);
3441 vm_object_unlock(src_object
);
3443 /* Wait for the copy to be ready. */
3444 if (check_ready
== TRUE
) {
3445 vm_object_lock(copy
);
3446 while (!copy
->pager_ready
) {
3447 vm_object_sleep(copy
, VM_OBJECT_EVENT_PAGER_READY
, THREAD_UNINT
);
3449 vm_object_unlock(copy
);
3452 return KERN_SUCCESS
;
3455 static int copy_delayed_lock_collisions
= 0;
3456 static int copy_delayed_max_collisions
= 0;
3457 static int copy_delayed_lock_contention
= 0;
3458 static int copy_delayed_protect_iterate
= 0;
3461 * Routine: vm_object_copy_delayed [internal]
3464 * Copy the specified virtual memory object, using
3465 * the asymmetric copy-on-write algorithm.
3467 * In/out conditions:
3468 * The src_object must be locked on entry. It will be unlocked
3469 * on exit - so the caller must also hold a reference to it.
3471 * This routine will not block waiting for user-generated
3472 * events. It is not interruptible.
3474 __private_extern__ vm_object_t
3475 vm_object_copy_delayed(
3476 vm_object_t src_object
,
3477 vm_object_offset_t src_offset
,
3478 vm_object_size_t size
,
3479 boolean_t src_object_shared
)
3481 vm_object_t new_copy
= VM_OBJECT_NULL
;
3482 vm_object_t old_copy
;
3484 vm_object_size_t copy_size
= src_offset
+ size
;
3489 * The user-level memory manager wants to see all of the changes
3490 * to this object, but it has promised not to make any changes on
3493 * Perform an asymmetric copy-on-write, as follows:
3494 * Create a new object, called a "copy object" to hold
3495 * pages modified by the new mapping (i.e., the copy,
3496 * not the original mapping).
3497 * Record the original object as the backing object for
3498 * the copy object. If the original mapping does not
3499 * change a page, it may be used read-only by the copy.
3500 * Record the copy object in the original object.
3501 * When the original mapping causes a page to be modified,
3502 * it must be copied to a new page that is "pushed" to
3504 * Mark the new mapping (the copy object) copy-on-write.
3505 * This makes the copy object itself read-only, allowing
3506 * it to be reused if the original mapping makes no
3507 * changes, and simplifying the synchronization required
3508 * in the "push" operation described above.
3510 * The copy-on-write is said to be assymetric because the original
3511 * object is *not* marked copy-on-write. A copied page is pushed
3512 * to the copy object, regardless which party attempted to modify
3515 * Repeated asymmetric copy operations may be done. If the
3516 * original object has not been changed since the last copy, its
3517 * copy object can be reused. Otherwise, a new copy object can be
3518 * inserted between the original object and its previous copy
3519 * object. Since any copy object is read-only, this cannot affect
3520 * affect the contents of the previous copy object.
3522 * Note that a copy object is higher in the object tree than the
3523 * original object; therefore, use of the copy object recorded in
3524 * the original object must be done carefully, to avoid deadlock.
3530 * Wait for paging in progress.
3532 if (!src_object
->true_share
&&
3533 (src_object
->paging_in_progress
!= 0 ||
3534 src_object
->activity_in_progress
!= 0)) {
3535 if (src_object_shared
== TRUE
) {
3536 vm_object_unlock(src_object
);
3537 vm_object_lock(src_object
);
3538 src_object_shared
= FALSE
;
3541 vm_object_paging_wait(src_object
, THREAD_UNINT
);
3544 * See whether we can reuse the result of a previous
3548 old_copy
= src_object
->copy
;
3549 if (old_copy
!= VM_OBJECT_NULL
) {
3553 * Try to get the locks (out of order)
3555 if (src_object_shared
== TRUE
)
3556 lock_granted
= vm_object_lock_try_shared(old_copy
);
3558 lock_granted
= vm_object_lock_try(old_copy
);
3560 if (!lock_granted
) {
3561 vm_object_unlock(src_object
);
3563 if (collisions
++ == 0)
3564 copy_delayed_lock_contention
++;
3565 mutex_pause(collisions
);
3567 /* Heisenberg Rules */
3568 copy_delayed_lock_collisions
++;
3570 if (collisions
> copy_delayed_max_collisions
)
3571 copy_delayed_max_collisions
= collisions
;
3573 if (src_object_shared
== TRUE
)
3574 vm_object_lock_shared(src_object
);
3576 vm_object_lock(src_object
);
3582 * Determine whether the old copy object has
3586 if (old_copy
->resident_page_count
== 0 &&
3587 !old_copy
->pager_created
) {
3589 * It has not been modified.
3591 * Return another reference to
3592 * the existing copy-object if
3593 * we can safely grow it (if
3597 if (old_copy
->vo_size
< copy_size
) {
3598 if (src_object_shared
== TRUE
) {
3599 vm_object_unlock(old_copy
);
3600 vm_object_unlock(src_object
);
3602 vm_object_lock(src_object
);
3603 src_object_shared
= FALSE
;
3607 * We can't perform a delayed copy if any of the
3608 * pages in the extended range are wired (because
3609 * we can't safely take write permission away from
3610 * wired pages). If the pages aren't wired, then
3611 * go ahead and protect them.
3613 copy_delayed_protect_iterate
++;
3615 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
3616 if (!p
->fictitious
&&
3617 p
->offset
>= old_copy
->vo_size
&&
3618 p
->offset
< copy_size
) {
3619 if (VM_PAGE_WIRED(p
)) {
3620 vm_object_unlock(old_copy
);
3621 vm_object_unlock(src_object
);
3623 if (new_copy
!= VM_OBJECT_NULL
) {
3624 vm_object_unlock(new_copy
);
3625 vm_object_deallocate(new_copy
);
3628 return VM_OBJECT_NULL
;
3630 pmap_page_protect(p
->phys_page
,
3631 (VM_PROT_ALL
& ~VM_PROT_WRITE
));
3635 old_copy
->vo_size
= copy_size
;
3637 if (src_object_shared
== TRUE
)
3638 vm_object_reference_shared(old_copy
);
3640 vm_object_reference_locked(old_copy
);
3641 vm_object_unlock(old_copy
);
3642 vm_object_unlock(src_object
);
3644 if (new_copy
!= VM_OBJECT_NULL
) {
3645 vm_object_unlock(new_copy
);
3646 vm_object_deallocate(new_copy
);
3654 * Adjust the size argument so that the newly-created
3655 * copy object will be large enough to back either the
3656 * old copy object or the new mapping.
3658 if (old_copy
->vo_size
> copy_size
)
3659 copy_size
= old_copy
->vo_size
;
3661 if (new_copy
== VM_OBJECT_NULL
) {
3662 vm_object_unlock(old_copy
);
3663 vm_object_unlock(src_object
);
3664 new_copy
= vm_object_allocate(copy_size
);
3665 vm_object_lock(src_object
);
3666 vm_object_lock(new_copy
);
3668 src_object_shared
= FALSE
;
3671 new_copy
->vo_size
= copy_size
;
3674 * The copy-object is always made large enough to
3675 * completely shadow the original object, since
3676 * it may have several users who want to shadow
3677 * the original object at different points.
3680 assert((old_copy
->shadow
== src_object
) &&
3681 (old_copy
->vo_shadow_offset
== (vm_object_offset_t
) 0));
3683 } else if (new_copy
== VM_OBJECT_NULL
) {
3684 vm_object_unlock(src_object
);
3685 new_copy
= vm_object_allocate(copy_size
);
3686 vm_object_lock(src_object
);
3687 vm_object_lock(new_copy
);
3689 src_object_shared
= FALSE
;
3694 * We now have the src object locked, and the new copy object
3695 * allocated and locked (and potentially the old copy locked).
3696 * Before we go any further, make sure we can still perform
3697 * a delayed copy, as the situation may have changed.
3699 * Specifically, we can't perform a delayed copy if any of the
3700 * pages in the range are wired (because we can't safely take
3701 * write permission away from wired pages). If the pages aren't
3702 * wired, then go ahead and protect them.
3704 copy_delayed_protect_iterate
++;
3706 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
3707 if (!p
->fictitious
&& p
->offset
< copy_size
) {
3708 if (VM_PAGE_WIRED(p
)) {
3710 vm_object_unlock(old_copy
);
3711 vm_object_unlock(src_object
);
3712 vm_object_unlock(new_copy
);
3713 vm_object_deallocate(new_copy
);
3714 return VM_OBJECT_NULL
;
3716 pmap_page_protect(p
->phys_page
,
3717 (VM_PROT_ALL
& ~VM_PROT_WRITE
));
3721 if (old_copy
!= VM_OBJECT_NULL
) {
3723 * Make the old copy-object shadow the new one.
3724 * It will receive no more pages from the original
3728 /* remove ref. from old_copy */
3729 vm_object_lock_assert_exclusive(src_object
);
3730 src_object
->ref_count
--;
3731 assert(src_object
->ref_count
> 0);
3732 vm_object_lock_assert_exclusive(old_copy
);
3733 old_copy
->shadow
= new_copy
;
3734 vm_object_lock_assert_exclusive(new_copy
);
3735 assert(new_copy
->ref_count
> 0);
3736 new_copy
->ref_count
++; /* for old_copy->shadow ref. */
3739 if (old_copy
->res_count
) {
3740 VM_OBJ_RES_INCR(new_copy
);
3741 VM_OBJ_RES_DECR(src_object
);
3745 vm_object_unlock(old_copy
); /* done with old_copy */
3749 * Point the new copy at the existing object.
3751 vm_object_lock_assert_exclusive(new_copy
);
3752 new_copy
->shadow
= src_object
;
3753 new_copy
->vo_shadow_offset
= 0;
3754 new_copy
->shadowed
= TRUE
; /* caller must set needs_copy */
3756 vm_object_lock_assert_exclusive(src_object
);
3757 vm_object_reference_locked(src_object
);
3758 src_object
->copy
= new_copy
;
3759 vm_object_unlock(src_object
);
3760 vm_object_unlock(new_copy
);
3763 "vm_object_copy_delayed: used copy object %X for source %X\n",
3764 new_copy
, src_object
, 0, 0, 0);
3770 * Routine: vm_object_copy_strategically
3773 * Perform a copy according to the source object's
3774 * declared strategy. This operation may block,
3775 * and may be interrupted.
3777 __private_extern__ kern_return_t
3778 vm_object_copy_strategically(
3779 register vm_object_t src_object
,
3780 vm_object_offset_t src_offset
,
3781 vm_object_size_t size
,
3782 vm_object_t
*dst_object
, /* OUT */
3783 vm_object_offset_t
*dst_offset
, /* OUT */
3784 boolean_t
*dst_needs_copy
) /* OUT */
3787 boolean_t interruptible
= THREAD_ABORTSAFE
; /* XXX */
3788 boolean_t object_lock_shared
= FALSE
;
3789 memory_object_copy_strategy_t copy_strategy
;
3791 assert(src_object
!= VM_OBJECT_NULL
);
3793 copy_strategy
= src_object
->copy_strategy
;
3795 if (copy_strategy
== MEMORY_OBJECT_COPY_DELAY
) {
3796 vm_object_lock_shared(src_object
);
3797 object_lock_shared
= TRUE
;
3799 vm_object_lock(src_object
);
3802 * The copy strategy is only valid if the memory manager
3803 * is "ready". Internal objects are always ready.
3806 while (!src_object
->internal
&& !src_object
->pager_ready
) {
3807 wait_result_t wait_result
;
3809 if (object_lock_shared
== TRUE
) {
3810 vm_object_unlock(src_object
);
3811 vm_object_lock(src_object
);
3812 object_lock_shared
= FALSE
;
3815 wait_result
= vm_object_sleep( src_object
,
3816 VM_OBJECT_EVENT_PAGER_READY
,
3818 if (wait_result
!= THREAD_AWAKENED
) {
3819 vm_object_unlock(src_object
);
3820 *dst_object
= VM_OBJECT_NULL
;
3822 *dst_needs_copy
= FALSE
;
3823 return(MACH_SEND_INTERRUPTED
);
3828 * Use the appropriate copy strategy.
3831 switch (copy_strategy
) {
3832 case MEMORY_OBJECT_COPY_DELAY
:
3833 *dst_object
= vm_object_copy_delayed(src_object
,
3834 src_offset
, size
, object_lock_shared
);
3835 if (*dst_object
!= VM_OBJECT_NULL
) {
3836 *dst_offset
= src_offset
;
3837 *dst_needs_copy
= TRUE
;
3838 result
= KERN_SUCCESS
;
3841 vm_object_lock(src_object
);
3842 /* fall thru when delayed copy not allowed */
3844 case MEMORY_OBJECT_COPY_NONE
:
3845 result
= vm_object_copy_slowly(src_object
, src_offset
, size
,
3846 interruptible
, dst_object
);
3847 if (result
== KERN_SUCCESS
) {
3849 *dst_needs_copy
= FALSE
;
3853 case MEMORY_OBJECT_COPY_CALL
:
3854 result
= vm_object_copy_call(src_object
, src_offset
, size
,
3856 if (result
== KERN_SUCCESS
) {
3857 *dst_offset
= src_offset
;
3858 *dst_needs_copy
= TRUE
;
3862 case MEMORY_OBJECT_COPY_SYMMETRIC
:
3863 XPR(XPR_VM_OBJECT
, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object
, src_offset
, size
, 0, 0);
3864 vm_object_unlock(src_object
);
3865 result
= KERN_MEMORY_RESTART_COPY
;
3869 panic("copy_strategically: bad strategy");
3870 result
= KERN_INVALID_ARGUMENT
;
3878 * Create a new object which is backed by the
3879 * specified existing object range. The source
3880 * object reference is deallocated.
3882 * The new object and offset into that object
3883 * are returned in the source parameters.
3885 boolean_t vm_object_shadow_check
= TRUE
;
3887 __private_extern__ boolean_t
3889 vm_object_t
*object
, /* IN/OUT */
3890 vm_object_offset_t
*offset
, /* IN/OUT */
3891 vm_object_size_t length
)
3893 register vm_object_t source
;
3894 register vm_object_t result
;
3900 * This assertion is valid but it gets triggered by Rosetta for example
3901 * due to a combination of vm_remap() that changes a VM object's
3902 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
3903 * that then sets "needs_copy" on its map entry. This creates a
3904 * mapping situation that VM should never see and doesn't know how to
3906 * It's not clear if this can create any real problem but we should
3907 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
3908 * do more than just set "needs_copy" to handle the copy-on-write...
3909 * In the meantime, let's disable the assertion.
3911 assert(source
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
);
3915 * Determine if we really need a shadow.
3917 * If the source object is larger than what we are trying
3918 * to create, then force the shadow creation even if the
3919 * ref count is 1. This will allow us to [potentially]
3920 * collapse the underlying object away in the future
3921 * (freeing up the extra data it might contain and that
3924 if (vm_object_shadow_check
&&
3925 source
->vo_size
== length
&&
3926 source
->ref_count
== 1 &&
3927 (source
->shadow
== VM_OBJECT_NULL
||
3928 source
->shadow
->copy
== VM_OBJECT_NULL
) )
3930 source
->shadowed
= FALSE
;
3935 * Allocate a new object with the given length
3938 if ((result
= vm_object_allocate(length
)) == VM_OBJECT_NULL
)
3939 panic("vm_object_shadow: no object for shadowing");
3942 * The new object shadows the source object, adding
3943 * a reference to it. Our caller changes his reference
3944 * to point to the new object, removing a reference to
3945 * the source object. Net result: no change of reference
3948 result
->shadow
= source
;
3951 * Store the offset into the source object,
3952 * and fix up the offset into the new object.
3955 result
->vo_shadow_offset
= *offset
;
3958 * Return the new things
3967 * The relationship between vm_object structures and
3968 * the memory_object requires careful synchronization.
3970 * All associations are created by memory_object_create_named
3971 * for external pagers and vm_object_pager_create for internal
3972 * objects as follows:
3974 * pager: the memory_object itself, supplied by
3975 * the user requesting a mapping (or the kernel,
3976 * when initializing internal objects); the
3977 * kernel simulates holding send rights by keeping
3981 * the memory object control port,
3982 * created by the kernel; the kernel holds
3983 * receive (and ownership) rights to this
3984 * port, but no other references.
3986 * When initialization is complete, the "initialized" field
3987 * is asserted. Other mappings using a particular memory object,
3988 * and any references to the vm_object gained through the
3989 * port association must wait for this initialization to occur.
3991 * In order to allow the memory manager to set attributes before
3992 * requests (notably virtual copy operations, but also data or
3993 * unlock requests) are made, a "ready" attribute is made available.
3994 * Only the memory manager may affect the value of this attribute.
3995 * Its value does not affect critical kernel functions, such as
3996 * internal object initialization or destruction. [Furthermore,
3997 * memory objects created by the kernel are assumed to be ready
3998 * immediately; the default memory manager need not explicitly
3999 * set the "ready" attribute.]
4001 * [Both the "initialized" and "ready" attribute wait conditions
4002 * use the "pager" field as the wait event.]
4004 * The port associations can be broken down by any of the
4005 * following routines:
4006 * vm_object_terminate:
4007 * No references to the vm_object remain, and
4008 * the object cannot (or will not) be cached.
4009 * This is the normal case, and is done even
4010 * though one of the other cases has already been
4012 * memory_object_destroy:
4013 * The memory manager has requested that the
4014 * kernel relinquish references to the memory
4015 * object. [The memory manager may not want to
4016 * destroy the memory object, but may wish to
4017 * refuse or tear down existing memory mappings.]
4019 * Each routine that breaks an association must break all of
4020 * them at once. At some later time, that routine must clear
4021 * the pager field and release the memory object references.
4022 * [Furthermore, each routine must cope with the simultaneous
4023 * or previous operations of the others.]
4025 * In addition to the lock on the object, the vm_object_hash_lock
4026 * governs the associations. References gained through the
4027 * association require use of the hash lock.
4029 * Because the pager field may be cleared spontaneously, it
4030 * cannot be used to determine whether a memory object has
4031 * ever been associated with a particular vm_object. [This
4032 * knowledge is important to the shadow object mechanism.]
4033 * For this reason, an additional "created" attribute is
4036 * During various paging operations, the pager reference found in the
4037 * vm_object must be valid. To prevent this from being released,
4038 * (other than being removed, i.e., made null), routines may use
4039 * the vm_object_paging_begin/end routines [actually, macros].
4040 * The implementation uses the "paging_in_progress" and "wanted" fields.
4041 * [Operations that alter the validity of the pager values include the
4042 * termination routines and vm_object_collapse.]
4047 * Routine: vm_object_enter
4049 * Find a VM object corresponding to the given
4050 * pager; if no such object exists, create one,
4051 * and initialize the pager.
4055 memory_object_t pager
,
4056 vm_object_size_t size
,
4061 register vm_object_t object
;
4062 vm_object_t new_object
;
4063 boolean_t must_init
;
4064 vm_object_hash_entry_t entry
, new_entry
;
4065 uint32_t try_failed_count
= 0;
4068 if (pager
== MEMORY_OBJECT_NULL
)
4069 return(vm_object_allocate(size
));
4071 new_object
= VM_OBJECT_NULL
;
4072 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
4076 * Look for an object associated with this port.
4079 lck
= vm_object_hash_lock_spin(pager
);
4081 entry
= vm_object_hash_lookup(pager
, FALSE
);
4083 if (entry
== VM_OBJECT_HASH_ENTRY_NULL
) {
4084 if (new_object
== VM_OBJECT_NULL
) {
4086 * We must unlock to create a new object;
4087 * if we do so, we must try the lookup again.
4089 vm_object_hash_unlock(lck
);
4090 assert(new_entry
== VM_OBJECT_HASH_ENTRY_NULL
);
4091 new_entry
= vm_object_hash_entry_alloc(pager
);
4092 new_object
= vm_object_allocate(size
);
4093 lck
= vm_object_hash_lock_spin(pager
);
4096 * Lookup failed twice, and we have something
4097 * to insert; set the object.
4099 vm_object_hash_insert(new_entry
, new_object
);
4101 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
4102 new_object
= VM_OBJECT_NULL
;
4105 } else if (entry
->object
== VM_OBJECT_NULL
) {
4107 * If a previous object is being terminated,
4108 * we must wait for the termination message
4109 * to be queued (and lookup the entry again).
4111 entry
->waiting
= TRUE
;
4112 entry
= VM_OBJECT_HASH_ENTRY_NULL
;
4113 assert_wait((event_t
) pager
, THREAD_UNINT
);
4114 vm_object_hash_unlock(lck
);
4116 thread_block(THREAD_CONTINUE_NULL
);
4117 lck
= vm_object_hash_lock_spin(pager
);
4119 } while (entry
== VM_OBJECT_HASH_ENTRY_NULL
);
4121 object
= entry
->object
;
4122 assert(object
!= VM_OBJECT_NULL
);
4125 if ( !vm_object_lock_try(object
)) {
4127 vm_object_hash_unlock(lck
);
4130 mutex_pause(try_failed_count
); /* wait a bit */
4133 assert(!internal
|| object
->internal
);
4135 if (object
->ref_count
== 0) {
4136 if ( !vm_object_cache_lock_try()) {
4138 vm_object_hash_unlock(lck
);
4139 vm_object_unlock(object
);
4142 mutex_pause(try_failed_count
); /* wait a bit */
4145 XPR(XPR_VM_OBJECT_CACHE
,
4146 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
4148 vm_object_cached_list
.next
,
4149 vm_object_cached_list
.prev
, 0,0);
4150 queue_remove(&vm_object_cached_list
, object
,
4151 vm_object_t
, cached_list
);
4152 vm_object_cached_count
--;
4154 vm_object_cache_unlock();
4158 assert(!object
->named
);
4159 object
->named
= TRUE
;
4161 vm_object_lock_assert_exclusive(object
);
4162 object
->ref_count
++;
4163 vm_object_res_reference(object
);
4165 vm_object_hash_unlock(lck
);
4166 vm_object_unlock(object
);
4170 vm_object_hash_unlock(lck
);
4172 assert(object
->ref_count
> 0);
4174 VM_STAT_INCR(lookups
);
4177 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
4178 pager
, object
, must_init
, 0, 0);
4181 * If we raced to create a vm_object but lost, let's
4185 if (new_object
!= VM_OBJECT_NULL
)
4186 vm_object_deallocate(new_object
);
4188 if (new_entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
4189 vm_object_hash_entry_free(new_entry
);
4192 memory_object_control_t control
;
4195 * Allocate request port.
4198 control
= memory_object_control_allocate(object
);
4199 assert (control
!= MEMORY_OBJECT_CONTROL_NULL
);
4201 vm_object_lock(object
);
4202 assert(object
!= kernel_object
);
4205 * Copy the reference we were given.
4208 memory_object_reference(pager
);
4209 object
->pager_created
= TRUE
;
4210 object
->pager
= pager
;
4211 object
->internal
= internal
;
4212 object
->pager_trusted
= internal
;
4214 /* copy strategy invalid until set by memory manager */
4215 object
->copy_strategy
= MEMORY_OBJECT_COPY_INVALID
;
4217 object
->pager_control
= control
;
4218 object
->pager_ready
= FALSE
;
4220 vm_object_unlock(object
);
4223 * Let the pager know we're using it.
4226 (void) memory_object_init(pager
,
4227 object
->pager_control
,
4230 vm_object_lock(object
);
4232 object
->named
= TRUE
;
4234 object
->pager_ready
= TRUE
;
4235 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
4238 object
->pager_initialized
= TRUE
;
4239 vm_object_wakeup(object
, VM_OBJECT_EVENT_INITIALIZED
);
4241 vm_object_lock(object
);
4245 * [At this point, the object must be locked]
4249 * Wait for the work above to be done by the first
4250 * thread to map this object.
4253 while (!object
->pager_initialized
) {
4254 vm_object_sleep(object
,
4255 VM_OBJECT_EVENT_INITIALIZED
,
4258 vm_object_unlock(object
);
4261 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
4262 object
, object
->pager
, internal
, 0,0);
4267 * Routine: vm_object_pager_create
4269 * Create a memory object for an internal object.
4270 * In/out conditions:
4271 * The object is locked on entry and exit;
4272 * it may be unlocked within this call.
4274 * Only one thread may be performing a
4275 * vm_object_pager_create on an object at
4276 * a time. Presumably, only the pageout
4277 * daemon will be using this routine.
4281 vm_object_pager_create(
4282 register vm_object_t object
)
4284 memory_object_t pager
;
4285 vm_object_hash_entry_t entry
;
4288 vm_object_size_t size
;
4289 vm_external_map_t map
;
4290 #endif /* MACH_PAGEMAP */
4292 XPR(XPR_VM_OBJECT
, "vm_object_pager_create, object 0x%X\n",
4295 assert(object
!= kernel_object
);
4297 if (memory_manager_default_check() != KERN_SUCCESS
)
4301 * Prevent collapse or termination by holding a paging reference
4304 vm_object_paging_begin(object
);
4305 if (object
->pager_created
) {
4307 * Someone else got to it first...
4308 * wait for them to finish initializing the ports
4310 while (!object
->pager_initialized
) {
4311 vm_object_sleep(object
,
4312 VM_OBJECT_EVENT_INITIALIZED
,
4315 vm_object_paging_end(object
);
4320 * Indicate that a memory object has been assigned
4321 * before dropping the lock, to prevent a race.
4324 object
->pager_created
= TRUE
;
4325 object
->paging_offset
= 0;
4328 size
= object
->vo_size
;
4329 #endif /* MACH_PAGEMAP */
4330 vm_object_unlock(object
);
4333 map
= vm_external_create(size
);
4334 vm_object_lock(object
);
4335 assert(object
->vo_size
== size
);
4336 object
->existence_map
= map
;
4337 vm_object_unlock(object
);
4338 #endif /* MACH_PAGEMAP */
4340 if ((uint32_t) object
->vo_size
!= object
->vo_size
) {
4341 panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n",
4342 (uint64_t) object
->vo_size
);
4346 * Create the [internal] pager, and associate it with this object.
4348 * We make the association here so that vm_object_enter()
4349 * can look up the object to complete initializing it. No
4350 * user will ever map this object.
4353 memory_object_default_t dmm
;
4355 /* acquire a reference for the default memory manager */
4356 dmm
= memory_manager_default_reference();
4358 assert(object
->temporary
);
4360 /* create our new memory object */
4361 assert((vm_size_t
) object
->vo_size
== object
->vo_size
);
4362 (void) memory_object_create(dmm
, (vm_size_t
) object
->vo_size
,
4365 memory_object_default_deallocate(dmm
);
4368 entry
= vm_object_hash_entry_alloc(pager
);
4370 lck
= vm_object_hash_lock_spin(pager
);
4371 vm_object_hash_insert(entry
, object
);
4372 vm_object_hash_unlock(lck
);
4375 * A reference was returned by
4376 * memory_object_create(), and it is
4377 * copied by vm_object_enter().
4380 if (vm_object_enter(pager
, object
->vo_size
, TRUE
, TRUE
, FALSE
) != object
)
4381 panic("vm_object_pager_create: mismatch");
4384 * Drop the reference we were passed.
4386 memory_object_deallocate(pager
);
4388 vm_object_lock(object
);
4391 * Release the paging reference
4393 vm_object_paging_end(object
);
4397 * Routine: vm_object_remove
4399 * Eliminate the pager/object association
4402 * The object cache must be locked.
4404 __private_extern__
void
4408 memory_object_t pager
;
4410 if ((pager
= object
->pager
) != MEMORY_OBJECT_NULL
) {
4411 vm_object_hash_entry_t entry
;
4413 entry
= vm_object_hash_lookup(pager
, FALSE
);
4414 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
4415 entry
->object
= VM_OBJECT_NULL
;
4421 * Global variables for vm_object_collapse():
4423 * Counts for normal collapses and bypasses.
4424 * Debugging variables, to watch or disable collapse.
4426 static long object_collapses
= 0;
4427 static long object_bypasses
= 0;
4429 static boolean_t vm_object_collapse_allowed
= TRUE
;
4430 static boolean_t vm_object_bypass_allowed
= TRUE
;
4433 static int vm_external_discarded
;
4434 static int vm_external_collapsed
;
4437 unsigned long vm_object_collapse_encrypted
= 0;
4440 * Routine: vm_object_do_collapse
4442 * Collapse an object with the object backing it.
4443 * Pages in the backing object are moved into the
4444 * parent, and the backing object is deallocated.
4446 * Both objects and the cache are locked; the page
4447 * queues are unlocked.
4451 vm_object_do_collapse(
4453 vm_object_t backing_object
)
4456 vm_object_offset_t new_offset
, backing_offset
;
4457 vm_object_size_t size
;
4459 vm_object_lock_assert_exclusive(object
);
4460 vm_object_lock_assert_exclusive(backing_object
);
4462 backing_offset
= object
->vo_shadow_offset
;
4463 size
= object
->vo_size
;
4466 * Move all in-memory pages from backing_object
4467 * to the parent. Pages that have been paged out
4468 * will be overwritten by any of the parent's
4469 * pages that shadow them.
4472 while (!queue_empty(&backing_object
->memq
)) {
4474 p
= (vm_page_t
) queue_first(&backing_object
->memq
);
4476 new_offset
= (p
->offset
- backing_offset
);
4478 assert(!p
->busy
|| p
->absent
);
4481 * If the parent has a page here, or if
4482 * this page falls outside the parent,
4485 * Otherwise, move it as planned.
4488 if (p
->offset
< backing_offset
|| new_offset
>= size
) {
4493 * The encryption key includes the "pager" and the
4494 * "paging_offset". These will not change during the
4495 * object collapse, so we can just move an encrypted
4496 * page from one object to the other in this case.
4497 * We can't decrypt the page here, since we can't drop
4501 vm_object_collapse_encrypted
++;
4503 pp
= vm_page_lookup(object
, new_offset
);
4504 if (pp
== VM_PAGE_NULL
) {
4507 * Parent now has no page.
4508 * Move the backing object's page up.
4511 vm_page_rename(p
, object
, new_offset
, TRUE
);
4513 } else if (pp
->absent
) {
4516 * Parent has an absent page...
4517 * it's not being paged in, so
4518 * it must really be missing from
4521 * Throw out the absent page...
4522 * any faults looking for that
4523 * page will restart with the new
4528 vm_page_rename(p
, object
, new_offset
, TRUE
);
4529 #endif /* MACH_PAGEMAP */
4531 assert(! pp
->absent
);
4534 * Parent object has a real page.
4535 * Throw away the backing object's
4544 assert((!object
->pager_created
&& (object
->pager
== MEMORY_OBJECT_NULL
))
4545 || (!backing_object
->pager_created
4546 && (backing_object
->pager
== MEMORY_OBJECT_NULL
)));
4548 assert(!object
->pager_created
&& object
->pager
== MEMORY_OBJECT_NULL
);
4549 #endif /* !MACH_PAGEMAP */
4551 if (backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
4552 vm_object_hash_entry_t entry
;
4555 * Move the pager from backing_object to object.
4557 * XXX We're only using part of the paging space
4558 * for keeps now... we ought to discard the
4562 assert(!object
->paging_in_progress
);
4563 assert(!object
->activity_in_progress
);
4564 object
->pager
= backing_object
->pager
;
4566 if (backing_object
->hashed
) {
4569 lck
= vm_object_hash_lock_spin(backing_object
->pager
);
4570 entry
= vm_object_hash_lookup(object
->pager
, FALSE
);
4571 assert(entry
!= VM_OBJECT_HASH_ENTRY_NULL
);
4572 entry
->object
= object
;
4573 vm_object_hash_unlock(lck
);
4575 object
->hashed
= TRUE
;
4577 object
->pager_created
= backing_object
->pager_created
;
4578 object
->pager_control
= backing_object
->pager_control
;
4579 object
->pager_ready
= backing_object
->pager_ready
;
4580 object
->pager_initialized
= backing_object
->pager_initialized
;
4581 object
->paging_offset
=
4582 backing_object
->paging_offset
+ backing_offset
;
4583 if (object
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
4584 memory_object_control_collapse(object
->pager_control
,
4591 * If the shadow offset is 0, the use the existence map from
4592 * the backing object if there is one. If the shadow offset is
4593 * not zero, toss it.
4595 * XXX - If the shadow offset is not 0 then a bit copy is needed
4596 * if the map is to be salvaged. For now, we just just toss the
4597 * old map, giving the collapsed object no map. This means that
4598 * the pager is invoked for zero fill pages. If analysis shows
4599 * that this happens frequently and is a performance hit, then
4600 * this code should be fixed to salvage the map.
4602 assert(object
->existence_map
== VM_EXTERNAL_NULL
);
4603 if (backing_offset
|| (size
!= backing_object
->vo_size
)) {
4604 vm_external_discarded
++;
4605 vm_external_destroy(backing_object
->existence_map
,
4606 backing_object
->vo_size
);
4609 vm_external_collapsed
++;
4610 object
->existence_map
= backing_object
->existence_map
;
4612 backing_object
->existence_map
= VM_EXTERNAL_NULL
;
4613 #endif /* MACH_PAGEMAP */
4616 * Object now shadows whatever backing_object did.
4617 * Note that the reference to backing_object->shadow
4618 * moves from within backing_object to within object.
4621 assert(!object
->phys_contiguous
);
4622 assert(!backing_object
->phys_contiguous
);
4623 object
->shadow
= backing_object
->shadow
;
4624 if (object
->shadow
) {
4625 object
->vo_shadow_offset
+= backing_object
->vo_shadow_offset
;
4627 /* no shadow, therefore no shadow offset... */
4628 object
->vo_shadow_offset
= 0;
4630 assert((object
->shadow
== VM_OBJECT_NULL
) ||
4631 (object
->shadow
->copy
!= backing_object
));
4634 * Discard backing_object.
4636 * Since the backing object has no pages, no
4637 * pager left, and no object references within it,
4638 * all that is necessary is to dispose of it.
4641 assert((backing_object
->ref_count
== 1) &&
4642 (backing_object
->resident_page_count
== 0) &&
4643 (backing_object
->paging_in_progress
== 0) &&
4644 (backing_object
->activity_in_progress
== 0));
4646 backing_object
->alive
= FALSE
;
4647 vm_object_unlock(backing_object
);
4649 XPR(XPR_VM_OBJECT
, "vm_object_collapse, collapsed 0x%X\n",
4650 backing_object
, 0,0,0,0);
4652 vm_object_lock_destroy(backing_object
);
4654 zfree(vm_object_zone
, backing_object
);
4660 vm_object_do_bypass(
4662 vm_object_t backing_object
)
4665 * Make the parent shadow the next object
4669 vm_object_lock_assert_exclusive(object
);
4670 vm_object_lock_assert_exclusive(backing_object
);
4674 * Do object reference in-line to
4675 * conditionally increment shadow's
4676 * residence count. If object is not
4677 * resident, leave residence count
4680 if (backing_object
->shadow
!= VM_OBJECT_NULL
) {
4681 vm_object_lock(backing_object
->shadow
);
4682 vm_object_lock_assert_exclusive(backing_object
->shadow
);
4683 backing_object
->shadow
->ref_count
++;
4684 if (object
->res_count
!= 0)
4685 vm_object_res_reference(backing_object
->shadow
);
4686 vm_object_unlock(backing_object
->shadow
);
4688 #else /* TASK_SWAPPER */
4689 vm_object_reference(backing_object
->shadow
);
4690 #endif /* TASK_SWAPPER */
4692 assert(!object
->phys_contiguous
);
4693 assert(!backing_object
->phys_contiguous
);
4694 object
->shadow
= backing_object
->shadow
;
4695 if (object
->shadow
) {
4696 object
->vo_shadow_offset
+= backing_object
->vo_shadow_offset
;
4698 /* no shadow, therefore no shadow offset... */
4699 object
->vo_shadow_offset
= 0;
4703 * Backing object might have had a copy pointer
4704 * to us. If it did, clear it.
4706 if (backing_object
->copy
== object
) {
4707 backing_object
->copy
= VM_OBJECT_NULL
;
4711 * Drop the reference count on backing_object.
4713 * Since its ref_count was at least 2, it
4714 * will not vanish; so we don't need to call
4715 * vm_object_deallocate.
4716 * [with a caveat for "named" objects]
4718 * The res_count on the backing object is
4719 * conditionally decremented. It's possible
4720 * (via vm_pageout_scan) to get here with
4721 * a "swapped" object, which has a 0 res_count,
4722 * in which case, the backing object res_count
4723 * is already down by one.
4725 * Don't call vm_object_deallocate unless
4726 * ref_count drops to zero.
4728 * The ref_count can drop to zero here if the
4729 * backing object could be bypassed but not
4730 * collapsed, such as when the backing object
4731 * is temporary and cachable.
4734 if (backing_object
->ref_count
> 2 ||
4735 (!backing_object
->named
&& backing_object
->ref_count
> 1)) {
4736 vm_object_lock_assert_exclusive(backing_object
);
4737 backing_object
->ref_count
--;
4739 if (object
->res_count
!= 0)
4740 vm_object_res_deallocate(backing_object
);
4741 assert(backing_object
->ref_count
> 0);
4742 #endif /* TASK_SWAPPER */
4743 vm_object_unlock(backing_object
);
4747 * Drop locks so that we can deallocate
4748 * the backing object.
4752 if (object
->res_count
== 0) {
4753 /* XXX get a reference for the deallocate below */
4754 vm_object_res_reference(backing_object
);
4756 #endif /* TASK_SWAPPER */
4757 vm_object_unlock(object
);
4758 vm_object_unlock(backing_object
);
4759 vm_object_deallocate(backing_object
);
4762 * Relock object. We don't have to reverify
4763 * its state since vm_object_collapse will
4764 * do that for us as it starts at the
4768 vm_object_lock(object
);
4776 * vm_object_collapse:
4778 * Perform an object collapse or an object bypass if appropriate.
4779 * The real work of collapsing and bypassing is performed in
4780 * the routines vm_object_do_collapse and vm_object_do_bypass.
4782 * Requires that the object be locked and the page queues be unlocked.
4785 static unsigned long vm_object_collapse_calls
= 0;
4786 static unsigned long vm_object_collapse_objects
= 0;
4787 static unsigned long vm_object_collapse_do_collapse
= 0;
4788 static unsigned long vm_object_collapse_do_bypass
= 0;
4789 static unsigned long vm_object_collapse_delays
= 0;
4790 __private_extern__
void
4792 register vm_object_t object
,
4793 register vm_object_offset_t hint_offset
,
4794 boolean_t can_bypass
)
4796 register vm_object_t backing_object
;
4797 register unsigned int rcount
;
4798 register unsigned int size
;
4799 vm_object_t original_object
;
4800 int object_lock_type
;
4801 int backing_object_lock_type
;
4803 vm_object_collapse_calls
++;
4805 if (! vm_object_collapse_allowed
&&
4806 ! (can_bypass
&& vm_object_bypass_allowed
)) {
4810 XPR(XPR_VM_OBJECT
, "vm_object_collapse, obj 0x%X\n",
4813 if (object
== VM_OBJECT_NULL
)
4816 original_object
= object
;
4819 * The top object was locked "exclusive" by the caller.
4820 * In the first pass, to determine if we can collapse the shadow chain,
4821 * take a "shared" lock on the shadow objects. If we can collapse,
4822 * we'll have to go down the chain again with exclusive locks.
4824 object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
4825 backing_object_lock_type
= OBJECT_LOCK_SHARED
;
4828 object
= original_object
;
4829 vm_object_lock_assert_exclusive(object
);
4832 vm_object_collapse_objects
++;
4834 * Verify that the conditions are right for either
4835 * collapse or bypass:
4839 * There is a backing object, and
4842 backing_object
= object
->shadow
;
4843 if (backing_object
== VM_OBJECT_NULL
) {
4844 if (object
!= original_object
) {
4845 vm_object_unlock(object
);
4849 if (backing_object_lock_type
== OBJECT_LOCK_SHARED
) {
4850 vm_object_lock_shared(backing_object
);
4852 vm_object_lock(backing_object
);
4856 * No pages in the object are currently
4857 * being paged out, and
4859 if (object
->paging_in_progress
!= 0 ||
4860 object
->activity_in_progress
!= 0) {
4861 /* try and collapse the rest of the shadow chain */
4862 if (object
!= original_object
) {
4863 vm_object_unlock(object
);
4865 object
= backing_object
;
4866 object_lock_type
= backing_object_lock_type
;
4872 * The backing object is not read_only,
4873 * and no pages in the backing object are
4874 * currently being paged out.
4875 * The backing object is internal.
4879 if (!backing_object
->internal
||
4880 backing_object
->paging_in_progress
!= 0 ||
4881 backing_object
->activity_in_progress
!= 0) {
4882 /* try and collapse the rest of the shadow chain */
4883 if (object
!= original_object
) {
4884 vm_object_unlock(object
);
4886 object
= backing_object
;
4887 object_lock_type
= backing_object_lock_type
;
4892 * The backing object can't be a copy-object:
4893 * the shadow_offset for the copy-object must stay
4894 * as 0. Furthermore (for the 'we have all the
4895 * pages' case), if we bypass backing_object and
4896 * just shadow the next object in the chain, old
4897 * pages from that object would then have to be copied
4898 * BOTH into the (former) backing_object and into the
4901 if (backing_object
->shadow
!= VM_OBJECT_NULL
&&
4902 backing_object
->shadow
->copy
== backing_object
) {
4903 /* try and collapse the rest of the shadow chain */
4904 if (object
!= original_object
) {
4905 vm_object_unlock(object
);
4907 object
= backing_object
;
4908 object_lock_type
= backing_object_lock_type
;
4913 * We can now try to either collapse the backing
4914 * object (if the parent is the only reference to
4915 * it) or (perhaps) remove the parent's reference
4918 * If there is exactly one reference to the backing
4919 * object, we may be able to collapse it into the
4922 * If MACH_PAGEMAP is defined:
4923 * The parent must not have a pager created for it,
4924 * since collapsing a backing_object dumps new pages
4925 * into the parent that its pager doesn't know about
4926 * (and the collapse code can't merge the existence
4929 * As long as one of the objects is still not known
4930 * to the pager, we can collapse them.
4932 if (backing_object
->ref_count
== 1 &&
4933 (!object
->pager_created
4935 || !backing_object
->pager_created
4936 #endif /*!MACH_PAGEMAP */
4937 ) && vm_object_collapse_allowed
) {
4940 * We need the exclusive lock on the VM objects.
4942 if (backing_object_lock_type
!= OBJECT_LOCK_EXCLUSIVE
) {
4944 * We have an object and its shadow locked
4945 * "shared". We can't just upgrade the locks
4946 * to "exclusive", as some other thread might
4947 * also have these objects locked "shared" and
4948 * attempt to upgrade one or the other to
4949 * "exclusive". The upgrades would block
4950 * forever waiting for the other "shared" locks
4952 * So we have to release the locks and go
4953 * down the shadow chain again (since it could
4954 * have changed) with "exclusive" locking.
4956 vm_object_unlock(backing_object
);
4957 if (object
!= original_object
)
4958 vm_object_unlock(object
);
4959 object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
4960 backing_object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
4965 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
4966 backing_object
, object
,
4967 backing_object
->pager
,
4968 backing_object
->pager_control
, 0);
4971 * Collapse the object with its backing
4972 * object, and try again with the object's
4973 * new backing object.
4976 vm_object_do_collapse(object
, backing_object
);
4977 vm_object_collapse_do_collapse
++;
4982 * Collapsing the backing object was not possible
4983 * or permitted, so let's try bypassing it.
4986 if (! (can_bypass
&& vm_object_bypass_allowed
)) {
4987 /* try and collapse the rest of the shadow chain */
4988 if (object
!= original_object
) {
4989 vm_object_unlock(object
);
4991 object
= backing_object
;
4992 object_lock_type
= backing_object_lock_type
;
4998 * If the object doesn't have all its pages present,
4999 * we have to make sure no pages in the backing object
5000 * "show through" before bypassing it.
5002 size
= atop(object
->vo_size
);
5003 rcount
= object
->resident_page_count
;
5004 if (rcount
!= size
) {
5005 vm_object_offset_t offset
;
5006 vm_object_offset_t backing_offset
;
5007 unsigned int backing_rcount
;
5008 unsigned int lookups
= 0;
5011 * If the backing object has a pager but no pagemap,
5012 * then we cannot bypass it, because we don't know
5013 * what pages it has.
5015 if (backing_object
->pager_created
5017 && (backing_object
->existence_map
== VM_EXTERNAL_NULL
)
5018 #endif /* MACH_PAGEMAP */
5020 /* try and collapse the rest of the shadow chain */
5021 if (object
!= original_object
) {
5022 vm_object_unlock(object
);
5024 object
= backing_object
;
5025 object_lock_type
= backing_object_lock_type
;
5030 * If the object has a pager but no pagemap,
5031 * then we cannot bypass it, because we don't know
5032 * what pages it has.
5034 if (object
->pager_created
5036 && (object
->existence_map
== VM_EXTERNAL_NULL
)
5037 #endif /* MACH_PAGEMAP */
5039 /* try and collapse the rest of the shadow chain */
5040 if (object
!= original_object
) {
5041 vm_object_unlock(object
);
5043 object
= backing_object
;
5044 object_lock_type
= backing_object_lock_type
;
5049 * If all of the pages in the backing object are
5050 * shadowed by the parent object, the parent
5051 * object no longer has to shadow the backing
5052 * object; it can shadow the next one in the
5055 * If the backing object has existence info,
5056 * we must check examine its existence info
5061 backing_offset
= object
->vo_shadow_offset
;
5062 backing_rcount
= backing_object
->resident_page_count
;
5065 #define EXISTS_IN_OBJECT(obj, off, rc) \
5066 (vm_external_state_get((obj)->existence_map, \
5067 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
5068 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
5070 #define EXISTS_IN_OBJECT(obj, off, rc) \
5071 (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
5072 #endif /* MACH_PAGEMAP */
5075 * Check the hint location first
5076 * (since it is often the quickest way out of here).
5078 if (object
->cow_hint
!= ~(vm_offset_t
)0)
5079 hint_offset
= (vm_object_offset_t
)object
->cow_hint
;
5081 hint_offset
= (hint_offset
> 8 * PAGE_SIZE_64
) ?
5082 (hint_offset
- 8 * PAGE_SIZE_64
) : 0;
5084 if (EXISTS_IN_OBJECT(backing_object
, hint_offset
+
5085 backing_offset
, backing_rcount
) &&
5086 !EXISTS_IN_OBJECT(object
, hint_offset
, rcount
)) {
5087 /* dependency right at the hint */
5088 object
->cow_hint
= (vm_offset_t
) hint_offset
; /* atomic */
5089 /* try and collapse the rest of the shadow chain */
5090 if (object
!= original_object
) {
5091 vm_object_unlock(object
);
5093 object
= backing_object
;
5094 object_lock_type
= backing_object_lock_type
;
5099 * If the object's window onto the backing_object
5100 * is large compared to the number of resident
5101 * pages in the backing object, it makes sense to
5102 * walk the backing_object's resident pages first.
5104 * NOTE: Pages may be in both the existence map and
5105 * resident. So, we can't permanently decrement
5106 * the rcount here because the second loop may
5107 * find the same pages in the backing object'
5108 * existence map that we found here and we would
5109 * double-decrement the rcount. We also may or
5110 * may not have found the
5112 if (backing_rcount
&&
5114 size
> ((backing_object
->existence_map
) ?
5115 backing_rcount
: (backing_rcount
>> 1))
5117 size
> (backing_rcount
>> 1)
5118 #endif /* MACH_PAGEMAP */
5120 unsigned int rc
= rcount
;
5123 backing_rcount
= backing_object
->resident_page_count
;
5124 p
= (vm_page_t
)queue_first(&backing_object
->memq
);
5126 /* Until we get more than one lookup lock */
5127 if (lookups
> 256) {
5128 vm_object_collapse_delays
++;
5133 offset
= (p
->offset
- backing_offset
);
5134 if (offset
< object
->vo_size
&&
5135 offset
!= hint_offset
&&
5136 !EXISTS_IN_OBJECT(object
, offset
, rc
)) {
5137 /* found a dependency */
5138 object
->cow_hint
= (vm_offset_t
) offset
; /* atomic */
5142 p
= (vm_page_t
) queue_next(&p
->listq
);
5144 } while (--backing_rcount
);
5145 if (backing_rcount
!= 0 ) {
5146 /* try and collapse the rest of the shadow chain */
5147 if (object
!= original_object
) {
5148 vm_object_unlock(object
);
5150 object
= backing_object
;
5151 object_lock_type
= backing_object_lock_type
;
5157 * Walk through the offsets looking for pages in the
5158 * backing object that show through to the object.
5162 || backing_object
->existence_map
5163 #endif /* MACH_PAGEMAP */
5165 offset
= hint_offset
;
5168 (offset
+ PAGE_SIZE_64
< object
->vo_size
) ?
5169 (offset
+ PAGE_SIZE_64
) : 0) != hint_offset
) {
5171 /* Until we get more than one lookup lock */
5172 if (lookups
> 256) {
5173 vm_object_collapse_delays
++;
5178 if (EXISTS_IN_OBJECT(backing_object
, offset
+
5179 backing_offset
, backing_rcount
) &&
5180 !EXISTS_IN_OBJECT(object
, offset
, rcount
)) {
5181 /* found a dependency */
5182 object
->cow_hint
= (vm_offset_t
) offset
; /* atomic */
5186 if (offset
!= hint_offset
) {
5187 /* try and collapse the rest of the shadow chain */
5188 if (object
!= original_object
) {
5189 vm_object_unlock(object
);
5191 object
= backing_object
;
5192 object_lock_type
= backing_object_lock_type
;
5199 * We need "exclusive" locks on the 2 VM objects.
5201 if (backing_object_lock_type
!= OBJECT_LOCK_EXCLUSIVE
) {
5202 vm_object_unlock(backing_object
);
5203 if (object
!= original_object
)
5204 vm_object_unlock(object
);
5205 object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
5206 backing_object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
5210 /* reset the offset hint for any objects deeper in the chain */
5211 object
->cow_hint
= (vm_offset_t
)0;
5214 * All interesting pages in the backing object
5215 * already live in the parent or its pager.
5216 * Thus we can bypass the backing object.
5219 vm_object_do_bypass(object
, backing_object
);
5220 vm_object_collapse_do_bypass
++;
5223 * Try again with this object's new backing object.
5229 if (object
!= original_object
) {
5230 vm_object_unlock(object
);
5235 * Routine: vm_object_page_remove: [internal]
5237 * Removes all physical pages in the specified
5238 * object range from the object's list of pages.
5240 * In/out conditions:
5241 * The object must be locked.
5242 * The object must not have paging_in_progress, usually
5243 * guaranteed by not having a pager.
5245 unsigned int vm_object_page_remove_lookup
= 0;
5246 unsigned int vm_object_page_remove_iterate
= 0;
5248 __private_extern__
void
5249 vm_object_page_remove(
5250 register vm_object_t object
,
5251 register vm_object_offset_t start
,
5252 register vm_object_offset_t end
)
5254 register vm_page_t p
, next
;
5257 * One and two page removals are most popular.
5258 * The factor of 16 here is somewhat arbitrary.
5259 * It balances vm_object_lookup vs iteration.
5262 if (atop_64(end
- start
) < (unsigned)object
->resident_page_count
/16) {
5263 vm_object_page_remove_lookup
++;
5265 for (; start
< end
; start
+= PAGE_SIZE_64
) {
5266 p
= vm_page_lookup(object
, start
);
5267 if (p
!= VM_PAGE_NULL
) {
5268 assert(!p
->cleaning
&& !p
->pageout
);
5269 if (!p
->fictitious
&& p
->pmapped
)
5270 pmap_disconnect(p
->phys_page
);
5275 vm_object_page_remove_iterate
++;
5277 p
= (vm_page_t
) queue_first(&object
->memq
);
5278 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
5279 next
= (vm_page_t
) queue_next(&p
->listq
);
5280 if ((start
<= p
->offset
) && (p
->offset
< end
)) {
5281 assert(!p
->cleaning
&& !p
->pageout
);
5282 if (!p
->fictitious
&& p
->pmapped
)
5283 pmap_disconnect(p
->phys_page
);
5293 * Routine: vm_object_coalesce
5294 * Function: Coalesces two objects backing up adjoining
5295 * regions of memory into a single object.
5297 * returns TRUE if objects were combined.
5299 * NOTE: Only works at the moment if the second object is NULL -
5300 * if it's not, which object do we lock first?
5303 * prev_object First object to coalesce
5304 * prev_offset Offset into prev_object
5305 * next_object Second object into coalesce
5306 * next_offset Offset into next_object
5308 * prev_size Size of reference to prev_object
5309 * next_size Size of reference to next_object
5312 * The object(s) must *not* be locked. The map must be locked
5313 * to preserve the reference to the object(s).
5315 static int vm_object_coalesce_count
= 0;
5317 __private_extern__ boolean_t
5319 register vm_object_t prev_object
,
5320 vm_object_t next_object
,
5321 vm_object_offset_t prev_offset
,
5322 __unused vm_object_offset_t next_offset
,
5323 vm_object_size_t prev_size
,
5324 vm_object_size_t next_size
)
5326 vm_object_size_t newsize
;
5332 if (next_object
!= VM_OBJECT_NULL
) {
5336 if (prev_object
== VM_OBJECT_NULL
) {
5341 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
5342 prev_object
, prev_offset
, prev_size
, next_size
, 0);
5344 vm_object_lock(prev_object
);
5347 * Try to collapse the object first
5349 vm_object_collapse(prev_object
, prev_offset
, TRUE
);
5352 * Can't coalesce if pages not mapped to
5353 * prev_entry may be in use any way:
5354 * . more than one reference
5356 * . shadows another object
5357 * . has a copy elsewhere
5359 * . paging references (pages might be in page-list)
5362 if ((prev_object
->ref_count
> 1) ||
5363 prev_object
->pager_created
||
5364 (prev_object
->shadow
!= VM_OBJECT_NULL
) ||
5365 (prev_object
->copy
!= VM_OBJECT_NULL
) ||
5366 (prev_object
->true_share
!= FALSE
) ||
5367 (prev_object
->purgable
!= VM_PURGABLE_DENY
) ||
5368 (prev_object
->paging_in_progress
!= 0) ||
5369 (prev_object
->activity_in_progress
!= 0)) {
5370 vm_object_unlock(prev_object
);
5374 vm_object_coalesce_count
++;
5377 * Remove any pages that may still be in the object from
5378 * a previous deallocation.
5380 vm_object_page_remove(prev_object
,
5381 prev_offset
+ prev_size
,
5382 prev_offset
+ prev_size
+ next_size
);
5385 * Extend the object if necessary.
5387 newsize
= prev_offset
+ prev_size
+ next_size
;
5388 if (newsize
> prev_object
->vo_size
) {
5391 * We cannot extend an object that has existence info,
5392 * since the existence info might then fail to cover
5393 * the entire object.
5395 * This assertion must be true because the object
5396 * has no pager, and we only create existence info
5397 * for objects with pagers.
5399 assert(prev_object
->existence_map
== VM_EXTERNAL_NULL
);
5400 #endif /* MACH_PAGEMAP */
5401 prev_object
->vo_size
= newsize
;
5404 vm_object_unlock(prev_object
);
5409 * Attach a set of physical pages to an object, so that they can
5410 * be mapped by mapping the object. Typically used to map IO memory.
5412 * The mapping function and its private data are used to obtain the
5413 * physical addresses for each page to be mapped.
5418 vm_object_offset_t offset
,
5419 vm_object_size_t size
,
5420 vm_object_offset_t (*map_fn
)(void *map_fn_data
,
5421 vm_object_offset_t offset
),
5422 void *map_fn_data
) /* private to map_fn */
5428 vm_object_offset_t addr
;
5430 num_pages
= atop_64(size
);
5432 for (i
= 0; i
< num_pages
; i
++, offset
+= PAGE_SIZE_64
) {
5434 addr
= (*map_fn
)(map_fn_data
, offset
);
5436 while ((m
= vm_page_grab_fictitious()) == VM_PAGE_NULL
)
5437 vm_page_more_fictitious();
5439 vm_object_lock(object
);
5440 if ((old_page
= vm_page_lookup(object
, offset
))
5443 VM_PAGE_FREE(old_page
);
5446 assert((ppnum_t
) addr
== addr
);
5447 vm_page_init(m
, (ppnum_t
) addr
, FALSE
);
5449 * private normally requires lock_queues but since we
5450 * are initializing the page, its not necessary here
5452 m
->private = TRUE
; /* don`t free page */
5454 vm_page_insert(m
, object
, offset
);
5456 PAGE_WAKEUP_DONE(m
);
5457 vm_object_unlock(object
);
5461 #include <mach_kdb.h>
5464 #include <ddb/db_output.h>
5465 #include <vm/vm_print.h>
5467 #define printf kdbprintf
5469 extern boolean_t
vm_object_cached(
5470 vm_object_t object
);
5472 extern void print_bitstring(
5475 boolean_t vm_object_print_pages
= FALSE
;
5481 printf("%c%c%c%c%c%c%c%c",
5482 ((byte
& (1 << 0)) ? '1' : '0'),
5483 ((byte
& (1 << 1)) ? '1' : '0'),
5484 ((byte
& (1 << 2)) ? '1' : '0'),
5485 ((byte
& (1 << 3)) ? '1' : '0'),
5486 ((byte
& (1 << 4)) ? '1' : '0'),
5487 ((byte
& (1 << 5)) ? '1' : '0'),
5488 ((byte
& (1 << 6)) ? '1' : '0'),
5489 ((byte
& (1 << 7)) ? '1' : '0'));
5494 __unused
register vm_object_t object
)
5497 register vm_object_t o
;
5499 queue_iterate(&vm_object_cached_list
, o
, vm_object_t
, cached_list
) {
5510 * vm_external_print: [ debug ]
5514 vm_external_map_t emap
,
5515 vm_object_size_t size
)
5517 if (emap
== VM_EXTERNAL_NULL
) {
5520 vm_object_size_t existence_size
= stob(size
);
5521 printf("{ size=%lld, map=[", (uint64_t) existence_size
);
5522 if (existence_size
> 0) {
5523 print_bitstring(emap
[0]);
5525 if (existence_size
> 1) {
5526 print_bitstring(emap
[1]);
5528 if (existence_size
> 2) {
5530 print_bitstring(emap
[existence_size
-1]);
5536 #endif /* MACH_PAGEMAP */
5543 int orig_db_indent
= db_indent
;
5546 if (object
== VM_OBJECT_NULL
) {
5547 db_indent
= orig_db_indent
;
5553 iprintf("object 0x%x", object
);
5554 printf(", shadow=0x%x", object
->shadow
);
5555 printf(", copy=0x%x", object
->copy
);
5556 printf(", pager=0x%x", object
->pager
);
5557 printf(", ref=%d\n", object
->ref_count
);
5560 object
= object
->shadow
;
5566 * vm_object_print: [ debug ]
5569 vm_object_print(db_expr_t db_addr
, __unused boolean_t have_addr
,
5570 __unused db_expr_t arg_count
, __unused
char *modif
)
5573 register vm_page_t p
;
5578 object
= (vm_object_t
) (long) db_addr
;
5579 if (object
== VM_OBJECT_NULL
)
5582 iprintf("object 0x%x\n", object
);
5586 iprintf("size=0x%x", object
->vo_size
);
5587 printf(", memq_hint=%p", object
->memq_hint
);
5588 printf(", ref_count=%d\n", object
->ref_count
);
5591 printf("res_count=%d, ", object
->res_count
);
5592 #endif /* TASK_SWAPPER */
5593 printf("resident_page_count=%d\n", object
->resident_page_count
);
5595 iprintf("shadow=0x%x", object
->shadow
);
5596 if (object
->shadow
) {
5598 vm_object_t shadow
= object
;
5599 while((shadow
= shadow
->shadow
))
5601 printf(" (depth %d)", i
);
5603 printf(", copy=0x%x", object
->copy
);
5604 printf(", shadow_offset=0x%x", object
->vo_shadow_offset
);
5605 printf(", last_alloc=0x%x\n", object
->last_alloc
);
5607 iprintf("pager=0x%x", object
->pager
);
5608 printf(", paging_offset=0x%x", object
->paging_offset
);
5609 printf(", pager_control=0x%x\n", object
->pager_control
);
5611 iprintf("copy_strategy=%d[", object
->copy_strategy
);
5612 switch (object
->copy_strategy
) {
5613 case MEMORY_OBJECT_COPY_NONE
:
5614 printf("copy_none");
5617 case MEMORY_OBJECT_COPY_CALL
:
5618 printf("copy_call");
5621 case MEMORY_OBJECT_COPY_DELAY
:
5622 printf("copy_delay");
5625 case MEMORY_OBJECT_COPY_SYMMETRIC
:
5626 printf("copy_symmetric");
5629 case MEMORY_OBJECT_COPY_INVALID
:
5630 printf("copy_invalid");
5638 iprintf("all_wanted=0x%x<", object
->all_wanted
);
5640 if (vm_object_wanted(object
, VM_OBJECT_EVENT_INITIALIZED
)) {
5641 printf("%sinit", s
);
5644 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGER_READY
)) {
5645 printf("%sready", s
);
5648 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGING_IN_PROGRESS
)) {
5649 printf("%spaging", s
);
5652 if (vm_object_wanted(object
, VM_OBJECT_EVENT_LOCK_IN_PROGRESS
)) {
5653 printf("%slock", s
);
5656 if (vm_object_wanted(object
, VM_OBJECT_EVENT_UNCACHING
)) {
5657 printf("%suncaching", s
);
5660 if (vm_object_wanted(object
, VM_OBJECT_EVENT_COPY_CALL
)) {
5661 printf("%scopy_call", s
);
5664 if (vm_object_wanted(object
, VM_OBJECT_EVENT_CACHING
)) {
5665 printf("%scaching", s
);
5669 printf(", paging_in_progress=%d\n", object
->paging_in_progress
);
5670 printf(", activity_in_progress=%d\n", object
->activity_in_progress
);
5672 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
5673 (object
->pager_created
? "" : "!"),
5674 (object
->pager_initialized
? "" : "!"),
5675 (object
->pager_ready
? "" : "!"),
5676 (object
->can_persist
? "" : "!"),
5677 (object
->pager_trusted
? "" : "!"),
5678 (object
->pageout
? "" : "!"),
5679 (object
->internal
? "internal" : "external"),
5680 (object
->temporary
? "temporary" : "permanent"));
5681 iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
5682 (object
->alive
? "" : "!"),
5683 ((object
->purgable
!= VM_PURGABLE_DENY
) ? "" : "!"),
5684 ((object
->purgable
== VM_PURGABLE_VOLATILE
) ? "" : "!"),
5685 ((object
->purgable
== VM_PURGABLE_EMPTY
) ? "" : "!"),
5686 (object
->shadowed
? "" : "!"),
5687 (vm_object_cached(object
) ? "" : "!"),
5688 (object
->private ? "" : "!"));
5689 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
5690 (object
->advisory_pageout
? "" : "!"),
5691 (object
->silent_overwrite
? "" : "!"));
5694 iprintf("existence_map=");
5695 vm_external_print(object
->existence_map
, object
->vo_size
);
5696 #endif /* MACH_PAGEMAP */
5698 iprintf("paging_object=0x%x\n", object
->paging_object
);
5699 #endif /* MACH_ASSERT */
5701 if (vm_object_print_pages
) {
5703 p
= (vm_page_t
) queue_first(&object
->memq
);
5704 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
5706 iprintf("memory:=");
5707 } else if (count
== 2) {
5716 printf("(off=0x%llX,page=%p)", p
->offset
, p
);
5717 p
= (vm_page_t
) queue_next(&p
->listq
);
5728 * vm_object_find [ debug ]
5730 * Find all tasks which reference the given vm_object.
5733 boolean_t
vm_object_find(vm_object_t object
);
5734 boolean_t vm_object_print_verbose
= FALSE
;
5742 vm_map_entry_t entry
;
5743 boolean_t found
= FALSE
;
5745 queue_iterate(&tasks
, task
, task_t
, tasks
) {
5747 for (entry
= vm_map_first_entry(map
);
5748 entry
&& entry
!= vm_map_to_entry(map
);
5749 entry
= entry
->vme_next
) {
5754 * For the time being skip submaps,
5755 * only the kernel can have submaps,
5756 * and unless we are interested in
5757 * kernel objects, we can simply skip
5758 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
5759 * for a full solution.
5761 if (entry
->is_sub_map
)
5764 obj
= entry
->object
.vm_object
;
5768 while (obj
!= VM_OBJECT_NULL
) {
5769 if (obj
== object
) {
5771 printf("TASK\t\tMAP\t\tENTRY\n");
5774 printf("0x%x\t0x%x\t0x%x\n",
5785 #endif /* MACH_KDB */
5788 vm_object_populate_with_private(
5790 vm_object_offset_t offset
,
5795 vm_object_offset_t base_offset
;
5798 if(!object
->private)
5799 return KERN_FAILURE
;
5801 base_page
= phys_page
;
5803 vm_object_lock(object
);
5804 if(!object
->phys_contiguous
) {
5806 if((base_offset
= trunc_page_64(offset
)) != offset
) {
5807 vm_object_unlock(object
);
5808 return KERN_FAILURE
;
5810 base_offset
+= object
->paging_offset
;
5812 m
= vm_page_lookup(object
, base_offset
);
5813 if(m
!= VM_PAGE_NULL
) {
5815 if (m
->phys_page
!= vm_page_guard_addr
) {
5817 vm_page_lockspin_queues();
5819 vm_page_unlock_queues();
5821 m
->fictitious
= FALSE
;
5822 m
->phys_page
= base_page
;
5829 m
->list_req_pending
= TRUE
;
5831 } else if (m
->phys_page
!= base_page
) {
5834 * pmap call to clear old mapping
5836 pmap_disconnect(m
->phys_page
);
5838 m
->phys_page
= base_page
;
5843 * We're not pointing to the same
5844 * physical page any longer and the
5845 * contents of the new one are not
5846 * supposed to be encrypted.
5847 * XXX What happens to the original
5848 * physical page. Is it lost ?
5850 m
->encrypted
= FALSE
;
5853 while ((m
= vm_page_grab_fictitious()) == VM_PAGE_NULL
)
5854 vm_page_more_fictitious();
5857 * private normally requires lock_queues but since we
5858 * are initializing the page, its not necessary here
5861 m
->fictitious
= FALSE
;
5862 m
->phys_page
= base_page
;
5863 m
->list_req_pending
= TRUE
;
5867 vm_page_insert(m
, object
, base_offset
);
5869 base_page
++; /* Go to the next physical page */
5870 base_offset
+= PAGE_SIZE
;
5874 /* NOTE: we should check the original settings here */
5875 /* if we have a size > zero a pmap call should be made */
5876 /* to disable the range */
5880 /* shadows on contiguous memory are not allowed */
5881 /* we therefore can use the offset field */
5882 object
->vo_shadow_offset
= (vm_object_offset_t
)phys_page
<< PAGE_SHIFT
;
5883 object
->vo_size
= size
;
5885 vm_object_unlock(object
);
5886 return KERN_SUCCESS
;
5890 * memory_object_free_from_cache:
5892 * Walk the vm_object cache list, removing and freeing vm_objects
5893 * which are backed by the pager identified by the caller, (pager_ops).
5894 * Remove up to "count" objects, if there are that may available
5897 * Walk the list at most once, return the number of vm_objects
5901 __private_extern__ kern_return_t
5902 memory_object_free_from_cache(
5903 __unused host_t host
,
5904 __unused memory_object_pager_ops_t pager_ops
,
5908 int object_released
= 0;
5910 register vm_object_t object
= VM_OBJECT_NULL
;
5914 if(host == HOST_NULL)
5915 return(KERN_INVALID_ARGUMENT);
5919 vm_object_cache_lock();
5921 queue_iterate(&vm_object_cached_list
, object
,
5922 vm_object_t
, cached_list
) {
5923 if (object
->pager
&&
5924 (pager_ops
== object
->pager
->mo_pager_ops
)) {
5925 vm_object_lock(object
);
5926 queue_remove(&vm_object_cached_list
, object
,
5927 vm_object_t
, cached_list
);
5928 vm_object_cached_count
--;
5930 vm_object_cache_unlock();
5932 * Since this object is in the cache, we know
5933 * that it is initialized and has only a pager's
5934 * (implicit) reference. Take a reference to avoid
5935 * recursive deallocations.
5938 assert(object
->pager_initialized
);
5939 assert(object
->ref_count
== 0);
5940 vm_object_lock_assert_exclusive(object
);
5941 object
->ref_count
++;
5944 * Terminate the object.
5945 * If the object had a shadow, we let
5946 * vm_object_deallocate deallocate it.
5947 * "pageout" objects have a shadow, but
5948 * maintain a "paging reference" rather
5949 * than a normal reference.
5950 * (We are careful here to limit recursion.)
5952 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
5954 if ((vm_object_terminate(object
) == KERN_SUCCESS
)
5955 && (shadow
!= VM_OBJECT_NULL
)) {
5956 vm_object_deallocate(shadow
);
5959 if(object_released
++ == *count
)
5960 return KERN_SUCCESS
;
5964 vm_object_cache_unlock();
5965 *count
= object_released
;
5969 return KERN_SUCCESS
;
5975 memory_object_create_named(
5976 memory_object_t pager
,
5977 memory_object_offset_t size
,
5978 memory_object_control_t
*control
)
5981 vm_object_hash_entry_t entry
;
5984 *control
= MEMORY_OBJECT_CONTROL_NULL
;
5985 if (pager
== MEMORY_OBJECT_NULL
)
5986 return KERN_INVALID_ARGUMENT
;
5988 lck
= vm_object_hash_lock_spin(pager
);
5989 entry
= vm_object_hash_lookup(pager
, FALSE
);
5991 if ((entry
!= VM_OBJECT_HASH_ENTRY_NULL
) &&
5992 (entry
->object
!= VM_OBJECT_NULL
)) {
5993 if (entry
->object
->named
== TRUE
)
5994 panic("memory_object_create_named: caller already holds the right"); }
5995 vm_object_hash_unlock(lck
);
5997 if ((object
= vm_object_enter(pager
, size
, FALSE
, FALSE
, TRUE
)) == VM_OBJECT_NULL
) {
5998 return(KERN_INVALID_OBJECT
);
6001 /* wait for object (if any) to be ready */
6002 if (object
!= VM_OBJECT_NULL
) {
6003 vm_object_lock(object
);
6004 object
->named
= TRUE
;
6005 while (!object
->pager_ready
) {
6006 vm_object_sleep(object
,
6007 VM_OBJECT_EVENT_PAGER_READY
,
6010 *control
= object
->pager_control
;
6011 vm_object_unlock(object
);
6013 return (KERN_SUCCESS
);
6018 * Routine: memory_object_recover_named [user interface]
6020 * Attempt to recover a named reference for a VM object.
6021 * VM will verify that the object has not already started
6022 * down the termination path, and if it has, will optionally
6023 * wait for that to finish.
6025 * KERN_SUCCESS - we recovered a named reference on the object
6026 * KERN_FAILURE - we could not recover a reference (object dead)
6027 * KERN_INVALID_ARGUMENT - bad memory object control
6030 memory_object_recover_named(
6031 memory_object_control_t control
,
6032 boolean_t wait_on_terminating
)
6036 object
= memory_object_control_to_vm_object(control
);
6037 if (object
== VM_OBJECT_NULL
) {
6038 return (KERN_INVALID_ARGUMENT
);
6041 vm_object_lock(object
);
6043 if (object
->terminating
&& wait_on_terminating
) {
6044 vm_object_wait(object
,
6045 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
6050 if (!object
->alive
) {
6051 vm_object_unlock(object
);
6052 return KERN_FAILURE
;
6055 if (object
->named
== TRUE
) {
6056 vm_object_unlock(object
);
6057 return KERN_SUCCESS
;
6060 if ((object
->ref_count
== 0) && (!object
->terminating
)) {
6061 if (!vm_object_cache_lock_try()) {
6062 vm_object_unlock(object
);
6065 queue_remove(&vm_object_cached_list
, object
,
6066 vm_object_t
, cached_list
);
6067 vm_object_cached_count
--;
6068 XPR(XPR_VM_OBJECT_CACHE
,
6069 "memory_object_recover_named: removing %X, head (%X, %X)\n",
6071 vm_object_cached_list
.next
,
6072 vm_object_cached_list
.prev
, 0,0);
6074 vm_object_cache_unlock();
6077 object
->named
= TRUE
;
6078 vm_object_lock_assert_exclusive(object
);
6079 object
->ref_count
++;
6080 vm_object_res_reference(object
);
6081 while (!object
->pager_ready
) {
6082 vm_object_sleep(object
,
6083 VM_OBJECT_EVENT_PAGER_READY
,
6086 vm_object_unlock(object
);
6087 return (KERN_SUCCESS
);
6092 * vm_object_release_name:
6094 * Enforces name semantic on memory_object reference count decrement
6095 * This routine should not be called unless the caller holds a name
6096 * reference gained through the memory_object_create_named.
6098 * If the TERMINATE_IDLE flag is set, the call will return if the
6099 * reference count is not 1. i.e. idle with the only remaining reference
6101 * If the decision is made to proceed the name field flag is set to
6102 * false and the reference count is decremented. If the RESPECT_CACHE
6103 * flag is set and the reference count has gone to zero, the
6104 * memory_object is checked to see if it is cacheable otherwise when
6105 * the reference count is zero, it is simply terminated.
6108 __private_extern__ kern_return_t
6109 vm_object_release_name(
6114 boolean_t original_object
= TRUE
;
6116 while (object
!= VM_OBJECT_NULL
) {
6118 vm_object_lock(object
);
6120 assert(object
->alive
);
6121 if (original_object
)
6122 assert(object
->named
);
6123 assert(object
->ref_count
> 0);
6126 * We have to wait for initialization before
6127 * destroying or caching the object.
6130 if (object
->pager_created
&& !object
->pager_initialized
) {
6131 assert(!object
->can_persist
);
6132 vm_object_assert_wait(object
,
6133 VM_OBJECT_EVENT_INITIALIZED
,
6135 vm_object_unlock(object
);
6136 thread_block(THREAD_CONTINUE_NULL
);
6140 if (((object
->ref_count
> 1)
6141 && (flags
& MEMORY_OBJECT_TERMINATE_IDLE
))
6142 || (object
->terminating
)) {
6143 vm_object_unlock(object
);
6144 return KERN_FAILURE
;
6146 if (flags
& MEMORY_OBJECT_RELEASE_NO_OP
) {
6147 vm_object_unlock(object
);
6148 return KERN_SUCCESS
;
6152 if ((flags
& MEMORY_OBJECT_RESPECT_CACHE
) &&
6153 (object
->ref_count
== 1)) {
6154 if (original_object
)
6155 object
->named
= FALSE
;
6156 vm_object_unlock(object
);
6157 /* let vm_object_deallocate push this thing into */
6158 /* the cache, if that it is where it is bound */
6159 vm_object_deallocate(object
);
6160 return KERN_SUCCESS
;
6162 VM_OBJ_RES_DECR(object
);
6163 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
6165 if (object
->ref_count
== 1) {
6166 if (vm_object_terminate(object
) != KERN_SUCCESS
) {
6167 if (original_object
) {
6168 return KERN_FAILURE
;
6170 return KERN_SUCCESS
;
6173 if (shadow
!= VM_OBJECT_NULL
) {
6174 original_object
= FALSE
;
6178 return KERN_SUCCESS
;
6180 vm_object_lock_assert_exclusive(object
);
6181 object
->ref_count
--;
6182 assert(object
->ref_count
> 0);
6184 object
->named
= FALSE
;
6185 vm_object_unlock(object
);
6186 return KERN_SUCCESS
;
6191 return KERN_FAILURE
;
6195 __private_extern__ kern_return_t
6196 vm_object_lock_request(
6198 vm_object_offset_t offset
,
6199 vm_object_size_t size
,
6200 memory_object_return_t should_return
,
6204 __unused boolean_t should_flush
;
6206 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
6208 XPR(XPR_MEMORY_OBJECT
,
6209 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
6210 object
, offset
, size
,
6211 (((should_return
&1)<<1)|should_flush
), prot
);
6214 * Check for bogus arguments.
6216 if (object
== VM_OBJECT_NULL
)
6217 return (KERN_INVALID_ARGUMENT
);
6219 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
6220 return (KERN_INVALID_ARGUMENT
);
6222 size
= round_page_64(size
);
6225 * Lock the object, and acquire a paging reference to
6226 * prevent the memory_object reference from being released.
6228 vm_object_lock(object
);
6229 vm_object_paging_begin(object
);
6231 (void)vm_object_update(object
,
6232 offset
, size
, NULL
, NULL
, should_return
, flags
, prot
);
6234 vm_object_paging_end(object
);
6235 vm_object_unlock(object
);
6237 return (KERN_SUCCESS
);
6241 * Empty a purgeable object by grabbing the physical pages assigned to it and
6242 * putting them on the free queue without writing them to backing store, etc.
6243 * When the pages are next touched they will be demand zero-fill pages. We
6244 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
6245 * skip referenced/dirty pages, pages on the active queue, etc. We're more
6246 * than happy to grab these since this is a purgeable object. We mark the
6247 * object as "empty" after reaping its pages.
6249 * On entry the object must be locked and it must be
6250 * purgeable with no delayed copies pending.
6253 vm_object_purge(vm_object_t object
)
6255 vm_object_lock_assert_exclusive(object
);
6257 if (object
->purgable
== VM_PURGABLE_DENY
)
6260 assert(object
->copy
== VM_OBJECT_NULL
);
6261 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
6263 if(object
->purgable
== VM_PURGABLE_VOLATILE
) {
6265 assert(object
->resident_page_count
>=
6266 object
->wired_page_count
);
6267 delta
= (object
->resident_page_count
-
6268 object
->wired_page_count
);
6270 assert(vm_page_purgeable_count
>=
6273 (SInt32
*)&vm_page_purgeable_count
);
6275 if (object
->wired_page_count
!= 0) {
6276 assert(vm_page_purgeable_wired_count
>=
6277 object
->wired_page_count
);
6278 OSAddAtomic(-object
->wired_page_count
,
6279 (SInt32
*)&vm_page_purgeable_wired_count
);
6282 object
->purgable
= VM_PURGABLE_EMPTY
;
6284 vm_object_reap_pages(object
, REAP_PURGEABLE
);
6289 * vm_object_purgeable_control() allows the caller to control and investigate the
6290 * state of a purgeable object. A purgeable object is created via a call to
6291 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
6292 * never be coalesced with any other object -- even other purgeable objects --
6293 * and will thus always remain a distinct object. A purgeable object has
6294 * special semantics when its reference count is exactly 1. If its reference
6295 * count is greater than 1, then a purgeable object will behave like a normal
6296 * object and attempts to use this interface will result in an error return
6297 * of KERN_INVALID_ARGUMENT.
6299 * A purgeable object may be put into a "volatile" state which will make the
6300 * object's pages elligable for being reclaimed without paging to backing
6301 * store if the system runs low on memory. If the pages in a volatile
6302 * purgeable object are reclaimed, the purgeable object is said to have been
6303 * "emptied." When a purgeable object is emptied the system will reclaim as
6304 * many pages from the object as it can in a convenient manner (pages already
6305 * en route to backing store or busy for other reasons are left as is). When
6306 * a purgeable object is made volatile, its pages will generally be reclaimed
6307 * before other pages in the application's working set. This semantic is
6308 * generally used by applications which can recreate the data in the object
6309 * faster than it can be paged in. One such example might be media assets
6310 * which can be reread from a much faster RAID volume.
6312 * A purgeable object may be designated as "non-volatile" which means it will
6313 * behave like all other objects in the system with pages being written to and
6314 * read from backing store as needed to satisfy system memory needs. If the
6315 * object was emptied before the object was made non-volatile, that fact will
6316 * be returned as the old state of the purgeable object (see
6317 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
6318 * were reclaimed as part of emptying the object will be refaulted in as
6319 * zero-fill on demand. It is up to the application to note that an object
6320 * was emptied and recreate the objects contents if necessary. When a
6321 * purgeable object is made non-volatile, its pages will generally not be paged
6322 * out to backing store in the immediate future. A purgeable object may also
6323 * be manually emptied.
6325 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
6326 * volatile purgeable object may be queried at any time. This information may
6327 * be used as a control input to let the application know when the system is
6328 * experiencing memory pressure and is reclaiming memory.
6330 * The specified address may be any address within the purgeable object. If
6331 * the specified address does not represent any object in the target task's
6332 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
6333 * object containing the specified address is not a purgeable object, then
6334 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
6337 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
6338 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
6339 * state is used to set the new state of the purgeable object and return its
6340 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
6341 * object is returned in the parameter state.
6343 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
6344 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
6345 * the non-volatile, volatile and volatile/empty states described above.
6346 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
6347 * immediately reclaim as many pages in the object as can be conveniently
6348 * collected (some may have already been written to backing store or be
6351 * The process of making a purgeable object non-volatile and determining its
6352 * previous state is atomic. Thus, if a purgeable object is made
6353 * VM_PURGABLE_NONVOLATILE and the old state is returned as
6354 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
6355 * completely intact and will remain so until the object is made volatile
6356 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
6357 * was reclaimed while it was in a volatile state and its previous contents
6361 * The object must be locked.
6364 vm_object_purgable_control(
6366 vm_purgable_t control
,
6372 if (object
== VM_OBJECT_NULL
) {
6374 * Object must already be present or it can't be purgeable.
6376 return KERN_INVALID_ARGUMENT
;
6380 * Get current state of the purgeable object.
6382 old_state
= object
->purgable
;
6383 if (old_state
== VM_PURGABLE_DENY
)
6384 return KERN_INVALID_ARGUMENT
;
6386 /* purgeable cant have delayed copies - now or in the future */
6387 assert(object
->copy
== VM_OBJECT_NULL
);
6388 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
6391 * Execute the desired operation.
6393 if (control
== VM_PURGABLE_GET_STATE
) {
6395 return KERN_SUCCESS
;
6398 if ((*state
) & VM_PURGABLE_DEBUG_EMPTY
) {
6399 object
->volatile_empty
= TRUE
;
6401 if ((*state
) & VM_PURGABLE_DEBUG_FAULT
) {
6402 object
->volatile_fault
= TRUE
;
6405 new_state
= *state
& VM_PURGABLE_STATE_MASK
;
6406 if (new_state
== VM_PURGABLE_VOLATILE
&&
6407 object
->volatile_empty
) {
6408 new_state
= VM_PURGABLE_EMPTY
;
6411 switch (new_state
) {
6412 case VM_PURGABLE_DENY
:
6413 case VM_PURGABLE_NONVOLATILE
:
6414 object
->purgable
= new_state
;
6416 if (old_state
== VM_PURGABLE_VOLATILE
) {
6419 assert(object
->resident_page_count
>=
6420 object
->wired_page_count
);
6421 delta
= (object
->resident_page_count
-
6422 object
->wired_page_count
);
6424 assert(vm_page_purgeable_count
>= delta
);
6428 (SInt32
*)&vm_page_purgeable_count
);
6430 if (object
->wired_page_count
!= 0) {
6431 assert(vm_page_purgeable_wired_count
>=
6432 object
->wired_page_count
);
6433 OSAddAtomic(-object
->wired_page_count
,
6434 (SInt32
*)&vm_page_purgeable_wired_count
);
6437 vm_page_lock_queues();
6439 assert(object
->objq
.next
!= NULL
&& object
->objq
.prev
!= NULL
); /* object should be on a queue */
6440 purgeable_q_t queue
= vm_purgeable_object_remove(object
);
6443 vm_purgeable_token_delete_first(queue
);
6444 assert(queue
->debug_count_objects
>=0);
6446 vm_page_unlock_queues();
6450 case VM_PURGABLE_VOLATILE
:
6451 if (object
->volatile_fault
) {
6455 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
6461 refmod
= pmap_disconnect(p
->phys_page
);
6462 if ((refmod
& VM_MEM_MODIFIED
) &&
6469 if (old_state
== VM_PURGABLE_EMPTY
&&
6470 object
->resident_page_count
== 0)
6473 purgeable_q_t queue
;
6475 /* find the correct queue */
6476 if ((*state
&VM_PURGABLE_ORDERING_MASK
) == VM_PURGABLE_ORDERING_OBSOLETE
)
6477 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_OBSOLETE
];
6479 if ((*state
&VM_PURGABLE_BEHAVIOR_MASK
) == VM_PURGABLE_BEHAVIOR_FIFO
)
6480 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_FIFO
];
6482 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_LIFO
];
6485 if (old_state
== VM_PURGABLE_NONVOLATILE
||
6486 old_state
== VM_PURGABLE_EMPTY
) {
6489 /* try to add token... this can fail */
6490 vm_page_lock_queues();
6492 kern_return_t result
= vm_purgeable_token_add(queue
);
6493 if (result
!= KERN_SUCCESS
) {
6494 vm_page_unlock_queues();
6497 vm_page_unlock_queues();
6499 assert(object
->resident_page_count
>=
6500 object
->wired_page_count
);
6501 delta
= (object
->resident_page_count
-
6502 object
->wired_page_count
);
6506 &vm_page_purgeable_count
);
6508 if (object
->wired_page_count
!= 0) {
6509 OSAddAtomic(object
->wired_page_count
,
6510 &vm_page_purgeable_wired_count
);
6513 object
->purgable
= new_state
;
6515 /* object should not be on a queue */
6516 assert(object
->objq
.next
== NULL
&& object
->objq
.prev
== NULL
);
6518 else if (old_state
== VM_PURGABLE_VOLATILE
) {
6520 * if reassigning priorities / purgeable groups, we don't change the
6521 * token queue. So moving priorities will not make pages stay around longer.
6522 * Reasoning is that the algorithm gives most priority to the most important
6523 * object. If a new token is added, the most important object' priority is boosted.
6524 * This biases the system already for purgeable queues that move a lot.
6525 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
6527 assert(object
->objq
.next
!= NULL
&& object
->objq
.prev
!= NULL
); /* object should be on a queue */
6529 purgeable_q_t old_queue
=vm_purgeable_object_remove(object
);
6532 if (old_queue
!= queue
) {
6533 kern_return_t result
;
6535 /* Changing queue. Have to move token. */
6536 vm_page_lock_queues();
6537 vm_purgeable_token_delete_first(old_queue
);
6538 result
= vm_purgeable_token_add(queue
);
6539 vm_page_unlock_queues();
6541 assert(result
==KERN_SUCCESS
); /* this should never fail since we just freed a token */
6544 vm_purgeable_object_add(object
, queue
, (*state
&VM_VOLATILE_GROUP_MASK
)>>VM_VOLATILE_GROUP_SHIFT
);
6546 assert(queue
->debug_count_objects
>=0);
6551 case VM_PURGABLE_EMPTY
:
6552 if (object
->volatile_fault
) {
6556 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
6562 refmod
= pmap_disconnect(p
->phys_page
);
6563 if ((refmod
& VM_MEM_MODIFIED
) &&
6570 if (old_state
!= new_state
) {
6571 assert(old_state
== VM_PURGABLE_NONVOLATILE
||
6572 old_state
== VM_PURGABLE_VOLATILE
);
6573 if (old_state
== VM_PURGABLE_VOLATILE
) {
6574 purgeable_q_t old_queue
;
6576 /* object should be on a queue */
6577 assert(object
->objq
.next
!= NULL
&&
6578 object
->objq
.prev
!= NULL
);
6579 old_queue
= vm_purgeable_object_remove(object
);
6581 vm_page_lock_queues();
6582 vm_purgeable_token_delete_first(old_queue
);
6583 vm_page_unlock_queues();
6585 (void) vm_object_purge(object
);
6592 return KERN_SUCCESS
;
6597 * vm_object_res_deallocate
6599 * (recursively) decrement residence counts on vm objects and their shadows.
6600 * Called from vm_object_deallocate and when swapping out an object.
6602 * The object is locked, and remains locked throughout the function,
6603 * even as we iterate down the shadow chain. Locks on intermediate objects
6604 * will be dropped, but not the original object.
6606 * NOTE: this function used to use recursion, rather than iteration.
6609 __private_extern__
void
6610 vm_object_res_deallocate(
6613 vm_object_t orig_object
= object
;
6615 * Object is locked so it can be called directly
6616 * from vm_object_deallocate. Original object is never
6619 assert(object
->res_count
> 0);
6620 while (--object
->res_count
== 0) {
6621 assert(object
->ref_count
>= object
->res_count
);
6622 vm_object_deactivate_all_pages(object
);
6623 /* iterate on shadow, if present */
6624 if (object
->shadow
!= VM_OBJECT_NULL
) {
6625 vm_object_t tmp_object
= object
->shadow
;
6626 vm_object_lock(tmp_object
);
6627 if (object
!= orig_object
)
6628 vm_object_unlock(object
);
6629 object
= tmp_object
;
6630 assert(object
->res_count
> 0);
6634 if (object
!= orig_object
)
6635 vm_object_unlock(object
);
6639 * vm_object_res_reference
6641 * Internal function to increment residence count on a vm object
6642 * and its shadows. It is called only from vm_object_reference, and
6643 * when swapping in a vm object, via vm_map_swap.
6645 * The object is locked, and remains locked throughout the function,
6646 * even as we iterate down the shadow chain. Locks on intermediate objects
6647 * will be dropped, but not the original object.
6649 * NOTE: this function used to use recursion, rather than iteration.
6652 __private_extern__
void
6653 vm_object_res_reference(
6656 vm_object_t orig_object
= object
;
6658 * Object is locked, so this can be called directly
6659 * from vm_object_reference. This lock is never released.
6661 while ((++object
->res_count
== 1) &&
6662 (object
->shadow
!= VM_OBJECT_NULL
)) {
6663 vm_object_t tmp_object
= object
->shadow
;
6665 assert(object
->ref_count
>= object
->res_count
);
6666 vm_object_lock(tmp_object
);
6667 if (object
!= orig_object
)
6668 vm_object_unlock(object
);
6669 object
= tmp_object
;
6671 if (object
!= orig_object
)
6672 vm_object_unlock(object
);
6673 assert(orig_object
->ref_count
>= orig_object
->res_count
);
6675 #endif /* TASK_SWAPPER */
6678 * vm_object_reference:
6680 * Gets another reference to the given object.
6682 #ifdef vm_object_reference
6683 #undef vm_object_reference
6685 __private_extern__
void
6686 vm_object_reference(
6687 register vm_object_t object
)
6689 if (object
== VM_OBJECT_NULL
)
6692 vm_object_lock(object
);
6693 assert(object
->ref_count
> 0);
6694 vm_object_reference_locked(object
);
6695 vm_object_unlock(object
);
6700 * Scale the vm_object_cache
6701 * This is required to make sure that the vm_object_cache is big
6702 * enough to effectively cache the mapped file.
6703 * This is really important with UBC as all the regular file vnodes
6704 * have memory object associated with them. Havving this cache too
6705 * small results in rapid reclaim of vnodes and hurts performance a LOT!
6707 * This is also needed as number of vnodes can be dynamically scaled.
6710 adjust_vm_object_cache(
6711 __unused vm_size_t oval
,
6712 __unused vm_size_t nval
)
6715 vm_object_cached_max
= nval
;
6716 vm_object_cache_trim(FALSE
);
6718 return (KERN_SUCCESS
);
6720 #endif /* MACH_BSD */
6724 * vm_object_transpose
6726 * This routine takes two VM objects of the same size and exchanges
6727 * their backing store.
6728 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
6729 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
6731 * The VM objects must not be locked by caller.
6733 unsigned int vm_object_transpose_count
= 0;
6735 vm_object_transpose(
6736 vm_object_t object1
,
6737 vm_object_t object2
,
6738 vm_object_size_t transpose_size
)
6740 vm_object_t tmp_object
;
6741 kern_return_t retval
;
6742 boolean_t object1_locked
, object2_locked
;
6744 vm_object_offset_t page_offset
;
6745 lck_mtx_t
*hash_lck
;
6746 vm_object_hash_entry_t hash_entry
;
6748 tmp_object
= VM_OBJECT_NULL
;
6749 object1_locked
= FALSE
; object2_locked
= FALSE
;
6751 if (object1
== object2
||
6752 object1
== VM_OBJECT_NULL
||
6753 object2
== VM_OBJECT_NULL
) {
6755 * If the 2 VM objects are the same, there's
6756 * no point in exchanging their backing store.
6758 retval
= KERN_INVALID_VALUE
;
6763 * Since we need to lock both objects at the same time,
6764 * make sure we always lock them in the same order to
6767 if (object1
> object2
) {
6768 tmp_object
= object1
;
6770 object2
= tmp_object
;
6774 * Allocate a temporary VM object to hold object1's contents
6775 * while we copy object2 to object1.
6777 tmp_object
= vm_object_allocate(transpose_size
);
6778 vm_object_lock(tmp_object
);
6779 tmp_object
->can_persist
= FALSE
;
6783 * Grab control of the 1st VM object.
6785 vm_object_lock(object1
);
6786 object1_locked
= TRUE
;
6787 if (!object1
->alive
|| object1
->terminating
||
6788 object1
->copy
|| object1
->shadow
|| object1
->shadowed
||
6789 object1
->purgable
!= VM_PURGABLE_DENY
) {
6791 * We don't deal with copy or shadow objects (yet).
6793 retval
= KERN_INVALID_VALUE
;
6797 * We're about to mess with the object's backing store and
6798 * taking a "paging_in_progress" reference wouldn't be enough
6799 * to prevent any paging activity on this object, so the caller should
6800 * have "quiesced" the objects beforehand, via a UPL operation with
6801 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
6802 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
6804 * Wait for any paging operation to complete (but only paging, not
6805 * other kind of activities not linked to the pager). After we're
6806 * statisfied that there's no more paging in progress, we keep the
6807 * object locked, to guarantee that no one tries to access its pager.
6809 vm_object_paging_only_wait(object1
, THREAD_UNINT
);
6812 * Same as above for the 2nd object...
6814 vm_object_lock(object2
);
6815 object2_locked
= TRUE
;
6816 if (! object2
->alive
|| object2
->terminating
||
6817 object2
->copy
|| object2
->shadow
|| object2
->shadowed
||
6818 object2
->purgable
!= VM_PURGABLE_DENY
) {
6819 retval
= KERN_INVALID_VALUE
;
6822 vm_object_paging_only_wait(object2
, THREAD_UNINT
);
6825 if (object1
->vo_size
!= object2
->vo_size
||
6826 object1
->vo_size
!= transpose_size
) {
6828 * If the 2 objects don't have the same size, we can't
6829 * exchange their backing stores or one would overflow.
6830 * If their size doesn't match the caller's
6831 * "transpose_size", we can't do it either because the
6832 * transpose operation will affect the entire span of
6835 retval
= KERN_INVALID_VALUE
;
6841 * Transpose the lists of resident pages.
6842 * This also updates the resident_page_count and the memq_hint.
6844 if (object1
->phys_contiguous
|| queue_empty(&object1
->memq
)) {
6846 * No pages in object1, just transfer pages
6847 * from object2 to object1. No need to go through
6848 * an intermediate object.
6850 while (!queue_empty(&object2
->memq
)) {
6851 page
= (vm_page_t
) queue_first(&object2
->memq
);
6852 vm_page_rename(page
, object1
, page
->offset
, FALSE
);
6854 assert(queue_empty(&object2
->memq
));
6855 } else if (object2
->phys_contiguous
|| queue_empty(&object2
->memq
)) {
6857 * No pages in object2, just transfer pages
6858 * from object1 to object2. No need to go through
6859 * an intermediate object.
6861 while (!queue_empty(&object1
->memq
)) {
6862 page
= (vm_page_t
) queue_first(&object1
->memq
);
6863 vm_page_rename(page
, object2
, page
->offset
, FALSE
);
6865 assert(queue_empty(&object1
->memq
));
6867 /* transfer object1's pages to tmp_object */
6868 while (!queue_empty(&object1
->memq
)) {
6869 page
= (vm_page_t
) queue_first(&object1
->memq
);
6870 page_offset
= page
->offset
;
6871 vm_page_remove(page
, TRUE
);
6872 page
->offset
= page_offset
;
6873 queue_enter(&tmp_object
->memq
, page
, vm_page_t
, listq
);
6875 assert(queue_empty(&object1
->memq
));
6876 /* transfer object2's pages to object1 */
6877 while (!queue_empty(&object2
->memq
)) {
6878 page
= (vm_page_t
) queue_first(&object2
->memq
);
6879 vm_page_rename(page
, object1
, page
->offset
, FALSE
);
6881 assert(queue_empty(&object2
->memq
));
6882 /* transfer tmp_object's pages to object1 */
6883 while (!queue_empty(&tmp_object
->memq
)) {
6884 page
= (vm_page_t
) queue_first(&tmp_object
->memq
);
6885 queue_remove(&tmp_object
->memq
, page
,
6887 vm_page_insert(page
, object2
, page
->offset
);
6889 assert(queue_empty(&tmp_object
->memq
));
6892 #define __TRANSPOSE_FIELD(field) \
6894 tmp_object->field = object1->field; \
6895 object1->field = object2->field; \
6896 object2->field = tmp_object->field; \
6899 /* "Lock" refers to the object not its contents */
6900 /* "size" should be identical */
6901 assert(object1
->vo_size
== object2
->vo_size
);
6902 /* "memq_hint" was updated above when transposing pages */
6903 /* "ref_count" refers to the object not its contents */
6905 /* "res_count" refers to the object not its contents */
6907 /* "resident_page_count" was updated above when transposing pages */
6908 /* "wired_page_count" was updated above when transposing pages */
6909 /* "reusable_page_count" was updated above when transposing pages */
6910 /* there should be no "copy" */
6911 assert(!object1
->copy
);
6912 assert(!object2
->copy
);
6913 /* there should be no "shadow" */
6914 assert(!object1
->shadow
);
6915 assert(!object2
->shadow
);
6916 __TRANSPOSE_FIELD(vo_shadow_offset
); /* used by phys_contiguous objects */
6917 __TRANSPOSE_FIELD(pager
);
6918 __TRANSPOSE_FIELD(paging_offset
);
6919 __TRANSPOSE_FIELD(pager_control
);
6920 /* update the memory_objects' pointers back to the VM objects */
6921 if (object1
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
6922 memory_object_control_collapse(object1
->pager_control
,
6925 if (object2
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
6926 memory_object_control_collapse(object2
->pager_control
,
6929 __TRANSPOSE_FIELD(copy_strategy
);
6930 /* "paging_in_progress" refers to the object not its contents */
6931 assert(!object1
->paging_in_progress
);
6932 assert(!object2
->paging_in_progress
);
6933 assert(object1
->activity_in_progress
);
6934 assert(object2
->activity_in_progress
);
6935 /* "all_wanted" refers to the object not its contents */
6936 __TRANSPOSE_FIELD(pager_created
);
6937 __TRANSPOSE_FIELD(pager_initialized
);
6938 __TRANSPOSE_FIELD(pager_ready
);
6939 __TRANSPOSE_FIELD(pager_trusted
);
6940 __TRANSPOSE_FIELD(can_persist
);
6941 __TRANSPOSE_FIELD(internal
);
6942 __TRANSPOSE_FIELD(temporary
);
6943 __TRANSPOSE_FIELD(private);
6944 __TRANSPOSE_FIELD(pageout
);
6945 /* "alive" should be set */
6946 assert(object1
->alive
);
6947 assert(object2
->alive
);
6948 /* "purgeable" should be non-purgeable */
6949 assert(object1
->purgable
== VM_PURGABLE_DENY
);
6950 assert(object2
->purgable
== VM_PURGABLE_DENY
);
6951 /* "shadowed" refers to the the object not its contents */
6952 __TRANSPOSE_FIELD(silent_overwrite
);
6953 __TRANSPOSE_FIELD(advisory_pageout
);
6954 __TRANSPOSE_FIELD(true_share
);
6955 /* "terminating" should not be set */
6956 assert(!object1
->terminating
);
6957 assert(!object2
->terminating
);
6958 __TRANSPOSE_FIELD(named
);
6959 /* "shadow_severed" refers to the object not its contents */
6960 __TRANSPOSE_FIELD(phys_contiguous
);
6961 __TRANSPOSE_FIELD(nophyscache
);
6962 /* "cached_list.next" points to transposed object */
6963 object1
->cached_list
.next
= (queue_entry_t
) object2
;
6964 object2
->cached_list
.next
= (queue_entry_t
) object1
;
6965 /* "cached_list.prev" should be NULL */
6966 assert(object1
->cached_list
.prev
== NULL
);
6967 assert(object2
->cached_list
.prev
== NULL
);
6968 /* "msr_q" is linked to the object not its contents */
6969 assert(queue_empty(&object1
->msr_q
));
6970 assert(queue_empty(&object2
->msr_q
));
6971 __TRANSPOSE_FIELD(last_alloc
);
6972 __TRANSPOSE_FIELD(sequential
);
6973 __TRANSPOSE_FIELD(pages_created
);
6974 __TRANSPOSE_FIELD(pages_used
);
6975 __TRANSPOSE_FIELD(scan_collisions
);
6977 __TRANSPOSE_FIELD(existence_map
);
6979 __TRANSPOSE_FIELD(cow_hint
);
6981 __TRANSPOSE_FIELD(paging_object
);
6983 __TRANSPOSE_FIELD(wimg_bits
);
6984 __TRANSPOSE_FIELD(set_cache_attr
);
6985 __TRANSPOSE_FIELD(code_signed
);
6986 if (object1
->hashed
) {
6987 hash_lck
= vm_object_hash_lock_spin(object2
->pager
);
6988 hash_entry
= vm_object_hash_lookup(object2
->pager
, FALSE
);
6989 assert(hash_entry
!= VM_OBJECT_HASH_ENTRY_NULL
);
6990 hash_entry
->object
= object2
;
6991 vm_object_hash_unlock(hash_lck
);
6993 if (object2
->hashed
) {
6994 hash_lck
= vm_object_hash_lock_spin(object1
->pager
);
6995 hash_entry
= vm_object_hash_lookup(object1
->pager
, FALSE
);
6996 assert(hash_entry
!= VM_OBJECT_HASH_ENTRY_NULL
);
6997 hash_entry
->object
= object1
;
6998 vm_object_hash_unlock(hash_lck
);
7000 __TRANSPOSE_FIELD(hashed
);
7001 object1
->transposed
= TRUE
;
7002 object2
->transposed
= TRUE
;
7003 __TRANSPOSE_FIELD(mapping_in_progress
);
7004 __TRANSPOSE_FIELD(volatile_empty
);
7005 __TRANSPOSE_FIELD(volatile_fault
);
7006 __TRANSPOSE_FIELD(all_reusable
);
7007 assert(object1
->blocked_access
);
7008 assert(object2
->blocked_access
);
7009 assert(object1
->__object2_unused_bits
== 0);
7010 assert(object2
->__object2_unused_bits
== 0);
7012 /* "uplq" refers to the object not its contents (see upl_transpose()) */
7014 assert(object1
->objq
.next
== NULL
);
7015 assert(object1
->objq
.prev
== NULL
);
7016 assert(object2
->objq
.next
== NULL
);
7017 assert(object2
->objq
.prev
== NULL
);
7019 #undef __TRANSPOSE_FIELD
7021 retval
= KERN_SUCCESS
;
7027 if (tmp_object
!= VM_OBJECT_NULL
) {
7028 vm_object_unlock(tmp_object
);
7030 * Re-initialize the temporary object to avoid
7031 * deallocating a real pager.
7033 _vm_object_allocate(transpose_size
, tmp_object
);
7034 vm_object_deallocate(tmp_object
);
7035 tmp_object
= VM_OBJECT_NULL
;
7038 if (object1_locked
) {
7039 vm_object_unlock(object1
);
7040 object1_locked
= FALSE
;
7042 if (object2_locked
) {
7043 vm_object_unlock(object2
);
7044 object2_locked
= FALSE
;
7047 vm_object_transpose_count
++;
7054 * vm_object_cluster_size
7056 * Determine how big a cluster we should issue an I/O for...
7058 * Inputs: *start == offset of page needed
7059 * *length == maximum cluster pager can handle
7060 * Outputs: *start == beginning offset of cluster
7061 * *length == length of cluster to try
7063 * The original *start will be encompassed by the cluster
7066 extern int speculative_reads_disabled
;
7067 extern int ignore_is_ssd
;
7070 unsigned int preheat_pages_max
= MAX_UPL_TRANSFER
;
7071 unsigned int preheat_pages_min
= 8;
7073 unsigned int preheat_pages_max
= MAX_UPL_TRANSFER
;
7074 unsigned int preheat_pages_min
= 8;
7077 uint32_t pre_heat_scaling
[MAX_UPL_TRANSFER
+ 1];
7078 uint32_t pre_heat_cluster
[MAX_UPL_TRANSFER
+ 1];
7081 __private_extern__
void
7082 vm_object_cluster_size(vm_object_t object
, vm_object_offset_t
*start
,
7083 vm_size_t
*length
, vm_object_fault_info_t fault_info
, uint32_t *io_streaming
)
7085 vm_size_t pre_heat_size
;
7086 vm_size_t tail_size
;
7087 vm_size_t head_size
;
7088 vm_size_t max_length
;
7089 vm_size_t cluster_size
;
7090 vm_object_offset_t object_size
;
7091 vm_object_offset_t orig_start
;
7092 vm_object_offset_t target_start
;
7093 vm_object_offset_t offset
;
7094 vm_behavior_t behavior
;
7095 boolean_t look_behind
= TRUE
;
7096 boolean_t look_ahead
= TRUE
;
7097 boolean_t isSSD
= FALSE
;
7098 uint32_t throttle_limit
;
7100 int sequential_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
7101 unsigned int max_ph_size
;
7102 unsigned int min_ph_size
;
7103 unsigned int min_ph_size_in_bytes
;
7105 assert( !(*length
& PAGE_MASK
));
7106 assert( !(*start
& PAGE_MASK_64
));
7109 * remember maxiumum length of run requested
7111 max_length
= *length
;
7113 * we'll always return a cluster size of at least
7114 * 1 page, since the original fault must always
7117 *length
= PAGE_SIZE
;
7120 if (speculative_reads_disabled
|| fault_info
== NULL
) {
7122 * no cluster... just fault the page in
7126 orig_start
= *start
;
7127 target_start
= orig_start
;
7128 cluster_size
= round_page(fault_info
->cluster_size
);
7129 behavior
= fault_info
->behavior
;
7131 vm_object_lock(object
);
7133 if (object
->pager
== MEMORY_OBJECT_NULL
)
7134 goto out
; /* pager is gone for this object, nothing more to do */
7137 vnode_pager_get_isSSD(object
->pager
, &isSSD
);
7139 min_ph_size
= preheat_pages_min
;
7140 max_ph_size
= preheat_pages_max
;
7146 if (min_ph_size
< 1)
7149 if (max_ph_size
< 1)
7151 else if (max_ph_size
> MAX_UPL_TRANSFER
)
7152 max_ph_size
= MAX_UPL_TRANSFER
;
7154 if (max_length
> (max_ph_size
* PAGE_SIZE
))
7155 max_length
= max_ph_size
* PAGE_SIZE
;
7157 if (max_length
<= PAGE_SIZE
)
7160 min_ph_size_in_bytes
= min_ph_size
* PAGE_SIZE
;
7162 if (object
->internal
)
7163 object_size
= object
->vo_size
;
7165 vnode_pager_get_object_size(object
->pager
, &object_size
);
7167 object_size
= round_page_64(object_size
);
7169 if (orig_start
>= object_size
) {
7171 * fault occurred beyond the EOF...
7172 * we need to punt w/o changing the
7177 if (object
->pages_used
> object
->pages_created
) {
7179 * must have wrapped our 32 bit counters
7182 object
->pages_used
= object
->pages_created
= 0;
7184 if ((sequential_run
= object
->sequential
)) {
7185 if (sequential_run
< 0) {
7186 sequential_behavior
= VM_BEHAVIOR_RSEQNTL
;
7187 sequential_run
= 0 - sequential_run
;
7189 sequential_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
7196 behavior
= VM_BEHAVIOR_DEFAULT
;
7198 case VM_BEHAVIOR_DEFAULT
:
7199 if (object
->internal
&& fault_info
->user_tag
== VM_MEMORY_STACK
)
7202 if (sequential_run
>= (3 * PAGE_SIZE
)) {
7203 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
7205 if (sequential_behavior
== VM_BEHAVIOR_SEQUENTIAL
)
7206 look_behind
= FALSE
;
7213 if (object
->pages_created
< (20 * min_ph_size
)) {
7217 pre_heat_size
= min_ph_size_in_bytes
;
7220 * Linear growth in PH size: The maximum size is max_length...
7221 * this cacluation will result in a size that is neither a
7222 * power of 2 nor a multiple of PAGE_SIZE... so round
7223 * it up to the nearest PAGE_SIZE boundary
7225 pre_heat_size
= (max_length
* object
->pages_used
) / object
->pages_created
;
7227 if (pre_heat_size
< min_ph_size_in_bytes
)
7228 pre_heat_size
= min_ph_size_in_bytes
;
7230 pre_heat_size
= round_page(pre_heat_size
);
7235 case VM_BEHAVIOR_RANDOM
:
7236 if ((pre_heat_size
= cluster_size
) <= PAGE_SIZE
)
7240 case VM_BEHAVIOR_SEQUENTIAL
:
7241 if ((pre_heat_size
= cluster_size
) == 0)
7242 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
7243 look_behind
= FALSE
;
7248 case VM_BEHAVIOR_RSEQNTL
:
7249 if ((pre_heat_size
= cluster_size
) == 0)
7250 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
7257 throttle_limit
= (uint32_t) max_length
;
7258 assert(throttle_limit
== max_length
);
7260 if (vnode_pager_check_hard_throttle(object
->pager
, &throttle_limit
, *io_streaming
) == KERN_SUCCESS
) {
7261 if (max_length
> throttle_limit
)
7262 max_length
= throttle_limit
;
7264 if (pre_heat_size
> max_length
)
7265 pre_heat_size
= max_length
;
7267 if (behavior
== VM_BEHAVIOR_DEFAULT
&& (pre_heat_size
> min_ph_size_in_bytes
)) {
7268 if (vm_page_free_count
< vm_page_throttle_limit
)
7269 pre_heat_size
= trunc_page(pre_heat_size
/ 16);
7270 else if (vm_page_free_count
< vm_page_free_target
)
7271 pre_heat_size
= trunc_page(pre_heat_size
/ 4);
7273 if (pre_heat_size
< min_ph_size_in_bytes
)
7274 pre_heat_size
= min_ph_size_in_bytes
;
7276 if (look_ahead
== TRUE
) {
7277 if (look_behind
== TRUE
) {
7279 * if we get here its due to a random access...
7280 * so we want to center the original fault address
7281 * within the cluster we will issue... make sure
7282 * to calculate 'head_size' as a multiple of PAGE_SIZE...
7283 * 'pre_heat_size' is a multiple of PAGE_SIZE but not
7284 * necessarily an even number of pages so we need to truncate
7285 * the result to a PAGE_SIZE boundary
7287 head_size
= trunc_page(pre_heat_size
/ 2);
7289 if (target_start
> head_size
)
7290 target_start
-= head_size
;
7295 * 'target_start' at this point represents the beginning offset
7296 * of the cluster we are considering... 'orig_start' will be in
7297 * the center of this cluster if we didn't have to clip the start
7298 * due to running into the start of the file
7301 if ((target_start
+ pre_heat_size
) > object_size
)
7302 pre_heat_size
= (vm_size_t
)(round_page_64(object_size
- target_start
));
7304 * at this point caclulate the number of pages beyond the original fault
7305 * address that we want to consider... this is guaranteed not to extend beyond
7306 * the current EOF...
7308 assert((vm_size_t
)(orig_start
- target_start
) == (orig_start
- target_start
));
7309 tail_size
= pre_heat_size
- (vm_size_t
)(orig_start
- target_start
) - PAGE_SIZE
;
7311 if (pre_heat_size
> target_start
) {
7313 * since pre_heat_size is always smaller then 2^32,
7314 * if it is larger then target_start (a 64 bit value)
7315 * it is safe to clip target_start to 32 bits
7317 pre_heat_size
= (vm_size_t
) target_start
;
7321 assert( !(target_start
& PAGE_MASK_64
));
7322 assert( !(pre_heat_size
& PAGE_MASK
));
7324 pre_heat_scaling
[pre_heat_size
/ PAGE_SIZE
]++;
7326 if (pre_heat_size
<= PAGE_SIZE
)
7329 if (look_behind
== TRUE
) {
7331 * take a look at the pages before the original
7332 * faulting offset... recalculate this in case
7333 * we had to clip 'pre_heat_size' above to keep
7334 * from running past the EOF.
7336 head_size
= pre_heat_size
- tail_size
- PAGE_SIZE
;
7338 for (offset
= orig_start
- PAGE_SIZE_64
; head_size
; offset
-= PAGE_SIZE_64
, head_size
-= PAGE_SIZE
) {
7340 * don't poke below the lowest offset
7342 if (offset
< fault_info
->lo_offset
)
7345 * for external objects and internal objects w/o an existence map
7346 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
7349 if (vm_external_state_get(object
->existence_map
, offset
) == VM_EXTERNAL_STATE_ABSENT
) {
7351 * we know for a fact that the pager can't provide the page
7352 * so don't include it or any pages beyond it in this cluster
7357 if (vm_page_lookup(object
, offset
) != VM_PAGE_NULL
) {
7359 * don't bridge resident pages
7364 *length
+= PAGE_SIZE
;
7367 if (look_ahead
== TRUE
) {
7368 for (offset
= orig_start
+ PAGE_SIZE_64
; tail_size
; offset
+= PAGE_SIZE_64
, tail_size
-= PAGE_SIZE
) {
7370 * don't poke above the highest offset
7372 if (offset
>= fault_info
->hi_offset
)
7374 assert(offset
< object_size
);
7377 * for external objects and internal objects w/o an existence map
7378 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
7381 if (vm_external_state_get(object
->existence_map
, offset
) == VM_EXTERNAL_STATE_ABSENT
) {
7383 * we know for a fact that the pager can't provide the page
7384 * so don't include it or any pages beyond it in this cluster
7389 if (vm_page_lookup(object
, offset
) != VM_PAGE_NULL
) {
7391 * don't bridge resident pages
7395 *length
+= PAGE_SIZE
;
7399 if (*length
> max_length
)
7400 *length
= max_length
;
7402 pre_heat_cluster
[*length
/ PAGE_SIZE
]++;
7404 vm_object_unlock(object
);
7409 * Allow manipulation of individual page state. This is actually part of
7410 * the UPL regimen but takes place on the VM object rather than on a UPL
7416 vm_object_offset_t offset
,
7418 ppnum_t
*phys_entry
,
7423 vm_object_lock(object
);
7425 if(ops
& UPL_POP_PHYSICAL
) {
7426 if(object
->phys_contiguous
) {
7428 *phys_entry
= (ppnum_t
)
7429 (object
->vo_shadow_offset
>> PAGE_SHIFT
);
7431 vm_object_unlock(object
);
7432 return KERN_SUCCESS
;
7434 vm_object_unlock(object
);
7435 return KERN_INVALID_OBJECT
;
7438 if(object
->phys_contiguous
) {
7439 vm_object_unlock(object
);
7440 return KERN_INVALID_OBJECT
;
7444 if((dst_page
= vm_page_lookup(object
,offset
)) == VM_PAGE_NULL
) {
7445 vm_object_unlock(object
);
7446 return KERN_FAILURE
;
7449 /* Sync up on getting the busy bit */
7450 if((dst_page
->busy
|| dst_page
->cleaning
) &&
7451 (((ops
& UPL_POP_SET
) &&
7452 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
7453 /* someone else is playing with the page, we will */
7455 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
7459 if (ops
& UPL_POP_DUMP
) {
7460 if (dst_page
->pmapped
== TRUE
)
7461 pmap_disconnect(dst_page
->phys_page
);
7463 VM_PAGE_FREE(dst_page
);
7470 /* Get the condition of flags before requested ops */
7471 /* are undertaken */
7473 if(dst_page
->dirty
) *flags
|= UPL_POP_DIRTY
;
7474 if(dst_page
->pageout
) *flags
|= UPL_POP_PAGEOUT
;
7475 if(dst_page
->precious
) *flags
|= UPL_POP_PRECIOUS
;
7476 if(dst_page
->absent
) *flags
|= UPL_POP_ABSENT
;
7477 if(dst_page
->busy
) *flags
|= UPL_POP_BUSY
;
7480 /* The caller should have made a call either contingent with */
7481 /* or prior to this call to set UPL_POP_BUSY */
7482 if(ops
& UPL_POP_SET
) {
7483 /* The protection granted with this assert will */
7484 /* not be complete. If the caller violates the */
7485 /* convention and attempts to change page state */
7486 /* without first setting busy we may not see it */
7487 /* because the page may already be busy. However */
7488 /* if such violations occur we will assert sooner */
7490 assert(dst_page
->busy
|| (ops
& UPL_POP_BUSY
));
7491 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= TRUE
;
7492 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= TRUE
;
7493 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= TRUE
;
7494 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= TRUE
;
7495 if (ops
& UPL_POP_BUSY
) dst_page
->busy
= TRUE
;
7498 if(ops
& UPL_POP_CLR
) {
7499 assert(dst_page
->busy
);
7500 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= FALSE
;
7501 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= FALSE
;
7502 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= FALSE
;
7503 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= FALSE
;
7504 if (ops
& UPL_POP_BUSY
) {
7505 dst_page
->busy
= FALSE
;
7506 PAGE_WAKEUP(dst_page
);
7510 if (dst_page
->encrypted
) {
7513 * We need to decrypt this encrypted page before the
7514 * caller can access its contents.
7515 * But if the caller really wants to access the page's
7516 * contents, they have to keep the page "busy".
7517 * Otherwise, the page could get recycled or re-encrypted
7520 if ((ops
& UPL_POP_SET
) && (ops
& UPL_POP_BUSY
) &&
7523 * The page is stable enough to be accessed by
7524 * the caller, so make sure its contents are
7527 vm_page_decrypt(dst_page
, 0);
7530 * The page is not busy, so don't bother
7531 * decrypting it, since anything could
7532 * happen to it between now and when the
7533 * caller wants to access it.
7534 * We should not give the caller access
7537 assert(!phys_entry
);
7543 * The physical page number will remain valid
7544 * only if the page is kept busy.
7545 * ENCRYPTED SWAP: make sure we don't let the
7546 * caller access an encrypted page.
7548 assert(dst_page
->busy
);
7549 assert(!dst_page
->encrypted
);
7550 *phys_entry
= dst_page
->phys_page
;
7556 vm_object_unlock(object
);
7557 return KERN_SUCCESS
;
7562 * vm_object_range_op offers performance enhancement over
7563 * vm_object_page_op for page_op functions which do not require page
7564 * level state to be returned from the call. Page_op was created to provide
7565 * a low-cost alternative to page manipulation via UPLs when only a single
7566 * page was involved. The range_op call establishes the ability in the _op
7567 * family of functions to work on multiple pages where the lack of page level
7568 * state handling allows the caller to avoid the overhead of the upl structures.
7574 vm_object_offset_t offset_beg
,
7575 vm_object_offset_t offset_end
,
7579 vm_object_offset_t offset
;
7582 if (offset_end
- offset_beg
> (uint32_t) -1) {
7583 /* range is too big and would overflow "*range" */
7584 return KERN_INVALID_ARGUMENT
;
7586 if (object
->resident_page_count
== 0) {
7588 if (ops
& UPL_ROP_PRESENT
) {
7591 *range
= (uint32_t) (offset_end
- offset_beg
);
7592 assert(*range
== (offset_end
- offset_beg
));
7595 return KERN_SUCCESS
;
7597 vm_object_lock(object
);
7599 if (object
->phys_contiguous
) {
7600 vm_object_unlock(object
);
7601 return KERN_INVALID_OBJECT
;
7604 offset
= offset_beg
& ~PAGE_MASK_64
;
7606 while (offset
< offset_end
) {
7607 dst_page
= vm_page_lookup(object
, offset
);
7608 if (dst_page
!= VM_PAGE_NULL
) {
7609 if (ops
& UPL_ROP_DUMP
) {
7610 if (dst_page
->list_req_pending
) {
7612 * This page isn't on a UPL yet.
7613 * So it's safe to steal it here and dump it.
7615 } else if (dst_page
->busy
|| dst_page
->cleaning
) {
7617 * someone else is playing with the
7618 * page, we will have to wait
7620 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
7622 * need to relook the page up since it's
7623 * state may have changed while we slept
7624 * it might even belong to a different object
7629 if (dst_page
->pmapped
== TRUE
)
7630 pmap_disconnect(dst_page
->phys_page
);
7632 VM_PAGE_FREE(dst_page
);
7634 } else if ((ops
& UPL_ROP_ABSENT
) && !dst_page
->absent
)
7636 } else if (ops
& UPL_ROP_PRESENT
)
7639 offset
+= PAGE_SIZE
;
7641 vm_object_unlock(object
);
7644 if (offset
> offset_end
)
7645 offset
= offset_end
;
7646 if(offset
> offset_beg
) {
7647 *range
= (uint32_t) (offset
- offset_beg
);
7648 assert(*range
== (offset
- offset_beg
));
7653 return KERN_SUCCESS
;
7657 uint32_t scan_object_collision
= 0;
7660 vm_object_lock(vm_object_t object
)
7662 if (object
== vm_pageout_scan_wants_object
) {
7663 scan_object_collision
++;
7666 lck_rw_lock_exclusive(&object
->Lock
);
7670 vm_object_lock_avoid(vm_object_t object
)
7672 if (object
== vm_pageout_scan_wants_object
) {
7673 scan_object_collision
++;
7680 _vm_object_lock_try(vm_object_t object
)
7682 return (lck_rw_try_lock_exclusive(&object
->Lock
));
7686 vm_object_lock_try(vm_object_t object
)
7689 * Called from hibernate path so check before blocking.
7691 if (vm_object_lock_avoid(object
) && ml_get_interrupts_enabled() && get_preemption_level()==0) {
7694 return _vm_object_lock_try(object
);
7698 vm_object_lock_shared(vm_object_t object
)
7700 if (vm_object_lock_avoid(object
)) {
7703 lck_rw_lock_shared(&object
->Lock
);
7707 vm_object_lock_try_shared(vm_object_t object
)
7709 if (vm_object_lock_avoid(object
)) {
7712 return (lck_rw_try_lock_shared(&object
->Lock
));
7716 unsigned int vm_object_change_wimg_mode_count
= 0;
7719 * The object must be locked
7722 vm_object_change_wimg_mode(vm_object_t object
, unsigned int wimg_mode
)
7726 vm_object_lock_assert_exclusive(object
);
7728 vm_object_paging_wait(object
, THREAD_UNINT
);
7730 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
7733 pmap_set_cache_attributes(p
->phys_page
, wimg_mode
);
7735 if (wimg_mode
== VM_WIMG_USE_DEFAULT
)
7736 object
->set_cache_attr
= FALSE
;
7738 object
->set_cache_attr
= TRUE
;
7740 object
->wimg_bits
= wimg_mode
;
7742 vm_object_change_wimg_mode_count
++;
7747 __private_extern__
void default_freezer_pack_page(vm_page_t
, vm_object_t
, vm_object_offset_t
, void**);
7748 __private_extern__
void default_freezer_unpack(vm_object_t
, void**);
7750 kern_return_t
vm_object_pack(
7751 unsigned int *purgeable_count
,
7752 unsigned int *wired_count
,
7753 unsigned int *clean_count
,
7754 unsigned int *dirty_count
,
7756 vm_object_t src_object
,
7757 vm_object_t compact_object
,
7759 vm_object_offset_t
*offset
)
7761 kern_return_t kr
= KERN_SUCCESS
;
7763 vm_object_lock(src_object
);
7765 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= 0;
7768 if (!src_object
->alive
|| src_object
->terminating
){
7773 if (src_object
->purgable
== VM_PURGABLE_VOLATILE
) {
7774 *purgeable_count
= src_object
->resident_page_count
;
7776 /* If the destination object is null, we're just walking the pages to discover how many can be hibernated */
7777 if (VM_OBJECT_NULL
!= compact_object
) {
7778 purgeable_q_t queue
;
7779 /* object should be on a queue */
7780 assert(src_object
->objq
.next
!= NULL
&&
7781 src_object
->objq
.prev
!= NULL
);
7782 queue
= vm_purgeable_object_remove(src_object
);
7784 vm_page_lock_queues();
7785 vm_purgeable_token_delete_first(queue
);
7786 vm_page_unlock_queues();
7787 vm_object_purge(src_object
);
7792 if (src_object
->ref_count
== 1) {
7793 vm_object_pack_pages(wired_count
, clean_count
, dirty_count
, src_object
, compact_object
, table
, offset
);
7795 if (src_object
->internal
) {
7800 vm_object_unlock(src_object
);
7807 vm_object_pack_pages(
7808 unsigned int *wired_count
,
7809 unsigned int *clean_count
,
7810 unsigned int *dirty_count
,
7811 vm_object_t src_object
,
7812 vm_object_t compact_object
,
7814 vm_object_offset_t
*offset
)
7818 next
= (vm_page_t
)queue_first(&src_object
->memq
);
7820 /* Since this function is dual purpose in order that we can count
7821 * the freezable pages as well as prepare them, assert that our
7822 * arguments are sane. Gnarly, but avoids code duplication.
7824 if (VM_OBJECT_NULL
== compact_object
){
7832 while (!queue_end(&src_object
->memq
, (queue_entry_t
)next
)) {
7834 next
= (vm_page_t
)queue_next(&next
->listq
);
7836 if (p
->fictitious
|| p
->busy
)
7839 if (p
->absent
|| p
->unusual
|| p
->error
)
7842 if (VM_PAGE_WIRED(p
)) {
7847 if (VM_OBJECT_NULL
== compact_object
) {
7848 if (p
->dirty
|| pmap_is_modified(p
->phys_page
)) {
7859 p
->dump_cleaning
= TRUE
;
7861 vm_page_lockspin_queues();
7863 vm_page_unlock_queues();
7868 if (p
->pmapped
== TRUE
) {
7870 refmod_state
= pmap_disconnect(p
->phys_page
);
7871 if (refmod_state
& VM_MEM_MODIFIED
) {
7879 default_freezer_pack_page(p
, compact_object
, *offset
, table
);
7880 *offset
+= PAGE_SIZE
;
7897 assert(object
!= VM_OBJECT_NULL
);
7899 vm_object_lock(object
);
7901 next
= (vm_page_t
)queue_first(&object
->memq
);
7903 while (!queue_end(&object
->memq
, (queue_entry_t
)next
)) {
7905 next
= (vm_page_t
)queue_next(&next
->listq
);
7907 /* Throw to the pageout queue */
7908 vm_page_lockspin_queues();
7910 VM_PAGE_QUEUES_REMOVE(p
);
7911 vm_pageout_cluster(p
);
7913 vm_page_unlock_queues();
7916 vm_object_unlock(object
);
7923 memory_object_t pager
;
7926 vm_object_lock(object
);
7928 pager
= object
->pager
;
7930 if (!object
->pager_ready
|| pager
== MEMORY_OBJECT_NULL
) {
7931 vm_object_unlock(object
);
7932 return KERN_FAILURE
;
7935 vm_object_paging_wait(object
, THREAD_UNINT
);
7936 vm_object_paging_begin(object
);
7938 object
->blocked_access
= TRUE
;
7939 vm_object_unlock(object
);
7941 kr
= memory_object_data_reclaim(pager
, TRUE
);
7943 vm_object_lock(object
);
7945 object
->blocked_access
= FALSE
;
7946 vm_object_paging_end(object
);
7948 vm_object_unlock(object
);
7955 vm_object_t compact_object
,
7960 * Right now we treat the default freezer much like
7961 * the default pager with respect to when it is
7962 * created and terminated.
7963 * But, in the future, we may want to terminate the
7964 * default freezer at the very instant that an object
7965 * has been completely re-filled with all it's previously
7967 * At that time we'll need to reset the object fields like
7968 * "pager" and the associated "pager_{created,initialized,trusted}"
7969 * fields right here.
7971 default_freezer_unpack(compact_object
, table
);
7974 #endif /* CONFIG_FREEZE */