]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
a573d49eac4a726824881f931b4be1fc2c39f616
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Virtual memory object module.
63 */
64
65 #include <debug.h>
66 #include <mach_pagemap.h>
67 #include <task_swapper.h>
68
69 #include <mach/mach_types.h>
70 #include <mach/memory_object.h>
71 #include <mach/memory_object_default.h>
72 #include <mach/memory_object_control_server.h>
73 #include <mach/vm_param.h>
74
75 #include <ipc/ipc_types.h>
76 #include <ipc/ipc_port.h>
77
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/lock.h>
81 #include <kern/queue.h>
82 #include <kern/xpr.h>
83 #include <kern/zalloc.h>
84 #include <kern/host.h>
85 #include <kern/host_statistics.h>
86 #include <kern/processor.h>
87 #include <kern/misc_protos.h>
88
89 #include <vm/memory_object.h>
90 #include <vm/vm_fault.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_page.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_protos.h>
96 #include <vm/vm_purgeable_internal.h>
97
98 /*
99 * Virtual memory objects maintain the actual data
100 * associated with allocated virtual memory. A given
101 * page of memory exists within exactly one object.
102 *
103 * An object is only deallocated when all "references"
104 * are given up.
105 *
106 * Associated with each object is a list of all resident
107 * memory pages belonging to that object; this list is
108 * maintained by the "vm_page" module, but locked by the object's
109 * lock.
110 *
111 * Each object also records the memory object reference
112 * that is used by the kernel to request and write
113 * back data (the memory object, field "pager"), etc...
114 *
115 * Virtual memory objects are allocated to provide
116 * zero-filled memory (vm_allocate) or map a user-defined
117 * memory object into a virtual address space (vm_map).
118 *
119 * Virtual memory objects that refer to a user-defined
120 * memory object are called "permanent", because all changes
121 * made in virtual memory are reflected back to the
122 * memory manager, which may then store it permanently.
123 * Other virtual memory objects are called "temporary",
124 * meaning that changes need be written back only when
125 * necessary to reclaim pages, and that storage associated
126 * with the object can be discarded once it is no longer
127 * mapped.
128 *
129 * A permanent memory object may be mapped into more
130 * than one virtual address space. Moreover, two threads
131 * may attempt to make the first mapping of a memory
132 * object concurrently. Only one thread is allowed to
133 * complete this mapping; all others wait for the
134 * "pager_initialized" field is asserted, indicating
135 * that the first thread has initialized all of the
136 * necessary fields in the virtual memory object structure.
137 *
138 * The kernel relies on a *default memory manager* to
139 * provide backing storage for the zero-filled virtual
140 * memory objects. The pager memory objects associated
141 * with these temporary virtual memory objects are only
142 * requested from the default memory manager when it
143 * becomes necessary. Virtual memory objects
144 * that depend on the default memory manager are called
145 * "internal". The "pager_created" field is provided to
146 * indicate whether these ports have ever been allocated.
147 *
148 * The kernel may also create virtual memory objects to
149 * hold changed pages after a copy-on-write operation.
150 * In this case, the virtual memory object (and its
151 * backing storage -- its memory object) only contain
152 * those pages that have been changed. The "shadow"
153 * field refers to the virtual memory object that contains
154 * the remainder of the contents. The "shadow_offset"
155 * field indicates where in the "shadow" these contents begin.
156 * The "copy" field refers to a virtual memory object
157 * to which changed pages must be copied before changing
158 * this object, in order to implement another form
159 * of copy-on-write optimization.
160 *
161 * The virtual memory object structure also records
162 * the attributes associated with its memory object.
163 * The "pager_ready", "can_persist" and "copy_strategy"
164 * fields represent those attributes. The "cached_list"
165 * field is used in the implementation of the persistence
166 * attribute.
167 *
168 * ZZZ Continue this comment.
169 */
170
171 /* Forward declarations for internal functions. */
172 static kern_return_t vm_object_terminate(
173 vm_object_t object);
174
175 extern void vm_object_remove(
176 vm_object_t object);
177
178 static vm_object_t vm_object_cache_trim(
179 boolean_t called_from_vm_object_deallocate);
180
181 static void vm_object_deactivate_all_pages(
182 vm_object_t object);
183
184 static kern_return_t vm_object_copy_call(
185 vm_object_t src_object,
186 vm_object_offset_t src_offset,
187 vm_object_size_t size,
188 vm_object_t *_result_object);
189
190 static void vm_object_do_collapse(
191 vm_object_t object,
192 vm_object_t backing_object);
193
194 static void vm_object_do_bypass(
195 vm_object_t object,
196 vm_object_t backing_object);
197
198 static void vm_object_release_pager(
199 memory_object_t pager);
200
201 static zone_t vm_object_zone; /* vm backing store zone */
202
203 /*
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
206 */
207 static struct vm_object kernel_object_store;
208 vm_object_t kernel_object;
209
210
211 /*
212 * The submap object is used as a placeholder for vm_map_submap
213 * operations. The object is declared in vm_map.c because it
214 * is exported by the vm_map module. The storage is declared
215 * here because it must be initialized here.
216 */
217 static struct vm_object vm_submap_object_store;
218
219 /*
220 * Virtual memory objects are initialized from
221 * a template (see vm_object_allocate).
222 *
223 * When adding a new field to the virtual memory
224 * object structure, be sure to add initialization
225 * (see _vm_object_allocate()).
226 */
227 static struct vm_object vm_object_template;
228
229 /*
230 * Virtual memory objects that are not referenced by
231 * any address maps, but that are allowed to persist
232 * (an attribute specified by the associated memory manager),
233 * are kept in a queue (vm_object_cached_list).
234 *
235 * When an object from this queue is referenced again,
236 * for example to make another address space mapping,
237 * it must be removed from the queue. That is, the
238 * queue contains *only* objects with zero references.
239 *
240 * The kernel may choose to terminate objects from this
241 * queue in order to reclaim storage. The current policy
242 * is to permit a fixed maximum number of unreferenced
243 * objects (vm_object_cached_max).
244 *
245 * A spin lock (accessed by routines
246 * vm_object_cache_{lock,lock_try,unlock}) governs the
247 * object cache. It must be held when objects are
248 * added to or removed from the cache (in vm_object_terminate).
249 * The routines that acquire a reference to a virtual
250 * memory object based on one of the memory object ports
251 * must also lock the cache.
252 *
253 * Ideally, the object cache should be more isolated
254 * from the reference mechanism, so that the lock need
255 * not be held to make simple references.
256 */
257 static queue_head_t vm_object_cached_list;
258 static int vm_object_cached_count=0;
259 static int vm_object_cached_high; /* highest # cached objects */
260 static int vm_object_cached_max = 512; /* may be patched*/
261
262 static decl_mutex_data(,vm_object_cached_lock_data)
263
264 #define vm_object_cache_lock() \
265 mutex_lock(&vm_object_cached_lock_data)
266 #define vm_object_cache_lock_try() \
267 mutex_try(&vm_object_cached_lock_data)
268 #define vm_object_cache_unlock() \
269 mutex_unlock(&vm_object_cached_lock_data)
270
271 #define VM_OBJECT_HASH_COUNT 1024
272 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
273 static struct zone *vm_object_hash_zone;
274
275 struct vm_object_hash_entry {
276 queue_chain_t hash_link; /* hash chain link */
277 memory_object_t pager; /* pager we represent */
278 vm_object_t object; /* corresponding object */
279 boolean_t waiting; /* someone waiting for
280 * termination */
281 };
282
283 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
284 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
285
286 #define VM_OBJECT_HASH_SHIFT 8
287 #define vm_object_hash(pager) \
288 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
289
290 void vm_object_hash_entry_free(
291 vm_object_hash_entry_t entry);
292
293 static void vm_object_reap(vm_object_t object);
294 static void vm_object_reap_async(vm_object_t object);
295 static void vm_object_reaper_thread(void);
296 static queue_head_t vm_object_reaper_queue; /* protected by vm_object_cache_lock() */
297 unsigned int vm_object_reap_count = 0;
298 unsigned int vm_object_reap_count_async = 0;
299
300 /*
301 * vm_object_hash_lookup looks up a pager in the hashtable
302 * and returns the corresponding entry, with optional removal.
303 */
304
305 static vm_object_hash_entry_t
306 vm_object_hash_lookup(
307 memory_object_t pager,
308 boolean_t remove_entry)
309 {
310 register queue_t bucket;
311 register vm_object_hash_entry_t entry;
312
313 bucket = &vm_object_hashtable[vm_object_hash(pager)];
314
315 entry = (vm_object_hash_entry_t)queue_first(bucket);
316 while (!queue_end(bucket, (queue_entry_t)entry)) {
317 if (entry->pager == pager && !remove_entry)
318 return(entry);
319 else if (entry->pager == pager) {
320 queue_remove(bucket, entry,
321 vm_object_hash_entry_t, hash_link);
322 return(entry);
323 }
324
325 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
326 }
327
328 return(VM_OBJECT_HASH_ENTRY_NULL);
329 }
330
331 /*
332 * vm_object_hash_enter enters the specified
333 * pager / cache object association in the hashtable.
334 */
335
336 static void
337 vm_object_hash_insert(
338 vm_object_hash_entry_t entry)
339 {
340 register queue_t bucket;
341
342 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
343
344 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
345 }
346
347 static vm_object_hash_entry_t
348 vm_object_hash_entry_alloc(
349 memory_object_t pager)
350 {
351 vm_object_hash_entry_t entry;
352
353 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
354 entry->pager = pager;
355 entry->object = VM_OBJECT_NULL;
356 entry->waiting = FALSE;
357
358 return(entry);
359 }
360
361 void
362 vm_object_hash_entry_free(
363 vm_object_hash_entry_t entry)
364 {
365 zfree(vm_object_hash_zone, entry);
366 }
367
368 /*
369 * vm_object_allocate:
370 *
371 * Returns a new object with the given size.
372 */
373
374 __private_extern__ void
375 _vm_object_allocate(
376 vm_object_size_t size,
377 vm_object_t object)
378 {
379 XPR(XPR_VM_OBJECT,
380 "vm_object_allocate, object 0x%X size 0x%X\n",
381 (integer_t)object, size, 0,0,0);
382
383 *object = vm_object_template;
384 queue_init(&object->memq);
385 queue_init(&object->msr_q);
386 #ifdef UPL_DEBUG
387 queue_init(&object->uplq);
388 #endif /* UPL_DEBUG */
389 vm_object_lock_init(object);
390 object->size = size;
391 }
392
393 __private_extern__ vm_object_t
394 vm_object_allocate(
395 vm_object_size_t size)
396 {
397 register vm_object_t object;
398
399 object = (vm_object_t) zalloc(vm_object_zone);
400
401 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
402
403 if (object != VM_OBJECT_NULL)
404 _vm_object_allocate(size, object);
405
406 return object;
407 }
408
409
410 lck_grp_t vm_object_lck_grp;
411 lck_grp_attr_t vm_object_lck_grp_attr;
412 lck_attr_t vm_object_lck_attr;
413 lck_attr_t kernel_object_lck_attr;
414
415 /*
416 * vm_object_bootstrap:
417 *
418 * Initialize the VM objects module.
419 */
420 __private_extern__ void
421 vm_object_bootstrap(void)
422 {
423 register int i;
424
425 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
426 round_page_32(512*1024),
427 round_page_32(12*1024),
428 "vm objects");
429
430 queue_init(&vm_object_reaper_queue);
431 queue_init(&vm_object_cached_list);
432 mutex_init(&vm_object_cached_lock_data, 0);
433
434 vm_object_hash_zone =
435 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
436 round_page_32(512*1024),
437 round_page_32(12*1024),
438 "vm object hash entries");
439
440 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
441 queue_init(&vm_object_hashtable[i]);
442
443 vm_object_init_lck_grp();
444
445 /*
446 * Fill in a template object, for quick initialization
447 */
448
449 /* memq; Lock; init after allocation */
450 vm_object_template.memq.prev = NULL;
451 vm_object_template.memq.next = NULL;
452 #if 0
453 /*
454 * We can't call vm_object_lock_init() here because that will
455 * allocate some memory and VM is not fully initialized yet.
456 * The lock will be initialized for each allocate object in
457 * _vm_object_allocate(), so we don't need to initialize it in
458 * the vm_object_template.
459 */
460 vm_object_lock_init(&vm_object_template);
461 #endif
462 vm_object_template.size = 0;
463 vm_object_template.memq_hint = VM_PAGE_NULL;
464 vm_object_template.ref_count = 1;
465 #if TASK_SWAPPER
466 vm_object_template.res_count = 1;
467 #endif /* TASK_SWAPPER */
468 vm_object_template.resident_page_count = 0;
469 vm_object_template.copy = VM_OBJECT_NULL;
470 vm_object_template.shadow = VM_OBJECT_NULL;
471 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
472 vm_object_template.pager = MEMORY_OBJECT_NULL;
473 vm_object_template.paging_offset = 0;
474 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
475 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
476 vm_object_template.paging_in_progress = 0;
477
478 /* Begin bitfields */
479 vm_object_template.all_wanted = 0; /* all bits FALSE */
480 vm_object_template.pager_created = FALSE;
481 vm_object_template.pager_initialized = FALSE;
482 vm_object_template.pager_ready = FALSE;
483 vm_object_template.pager_trusted = FALSE;
484 vm_object_template.can_persist = FALSE;
485 vm_object_template.internal = TRUE;
486 vm_object_template.temporary = TRUE;
487 vm_object_template.private = FALSE;
488 vm_object_template.pageout = FALSE;
489 vm_object_template.alive = TRUE;
490 vm_object_template.purgable = VM_PURGABLE_DENY;
491 vm_object_template.shadowed = FALSE;
492 vm_object_template.silent_overwrite = FALSE;
493 vm_object_template.advisory_pageout = FALSE;
494 vm_object_template.true_share = FALSE;
495 vm_object_template.terminating = FALSE;
496 vm_object_template.named = FALSE;
497 vm_object_template.shadow_severed = FALSE;
498 vm_object_template.phys_contiguous = FALSE;
499 vm_object_template.nophyscache = FALSE;
500 /* End bitfields */
501
502 vm_object_template.cached_list.prev = NULL;
503 vm_object_template.cached_list.next = NULL;
504 vm_object_template.msr_q.prev = NULL;
505 vm_object_template.msr_q.next = NULL;
506
507 vm_object_template.last_alloc = (vm_object_offset_t) 0;
508 vm_object_template.sequential = (vm_object_offset_t) 0;
509 vm_object_template.pages_created = 0;
510 vm_object_template.pages_used = 0;
511
512 #if MACH_PAGEMAP
513 vm_object_template.existence_map = VM_EXTERNAL_NULL;
514 #endif /* MACH_PAGEMAP */
515 vm_object_template.cow_hint = ~(vm_offset_t)0;
516 #if MACH_ASSERT
517 vm_object_template.paging_object = VM_OBJECT_NULL;
518 #endif /* MACH_ASSERT */
519
520 /* cache bitfields */
521 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
522 vm_object_template.code_signed = FALSE;
523 vm_object_template.not_in_use = 0;
524 #ifdef UPL_DEBUG
525 vm_object_template.uplq.prev = NULL;
526 vm_object_template.uplq.next = NULL;
527 #endif /* UPL_DEBUG */
528 #ifdef VM_PIP_DEBUG
529 bzero(&vm_object_template.pip_holders,
530 sizeof (vm_object_template.pip_holders));
531 #endif /* VM_PIP_DEBUG */
532
533 vm_object_template.objq.next=NULL;
534 vm_object_template.objq.prev=NULL;
535
536
537 /*
538 * Initialize the "kernel object"
539 */
540
541 kernel_object = &kernel_object_store;
542
543 /*
544 * Note that in the following size specifications, we need to add 1 because
545 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
546 */
547
548 #ifdef ppc
549 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
550 kernel_object);
551 #else
552 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
553 kernel_object);
554 #endif
555 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
556
557 /*
558 * Initialize the "submap object". Make it as large as the
559 * kernel object so that no limit is imposed on submap sizes.
560 */
561
562 vm_submap_object = &vm_submap_object_store;
563 #ifdef ppc
564 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
565 vm_submap_object);
566 #else
567 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
568 vm_submap_object);
569 #endif
570 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
571
572 /*
573 * Create an "extra" reference to this object so that we never
574 * try to deallocate it; zfree doesn't like to be called with
575 * non-zone memory.
576 */
577 vm_object_reference(vm_submap_object);
578
579 #if MACH_PAGEMAP
580 vm_external_module_initialize();
581 #endif /* MACH_PAGEMAP */
582 }
583
584 void
585 vm_object_reaper_init(void)
586 {
587 kern_return_t kr;
588 thread_t thread;
589
590 kr = kernel_thread_start_priority(
591 (thread_continue_t) vm_object_reaper_thread,
592 NULL,
593 BASEPRI_PREEMPT - 1,
594 &thread);
595 if (kr != KERN_SUCCESS) {
596 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
597 }
598 thread_deallocate(thread);
599 }
600
601 __private_extern__ void
602 vm_object_init(void)
603 {
604 /*
605 * Finish initializing the kernel object.
606 */
607 }
608
609
610 __private_extern__ void
611 vm_object_init_lck_grp(void)
612 {
613 /*
614 * initialze the vm_object lock world
615 */
616 lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
617 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
618 lck_attr_setdefault(&vm_object_lck_attr);
619 lck_attr_setdefault(&kernel_object_lck_attr);
620 lck_attr_cleardebug(&kernel_object_lck_attr);
621 }
622
623
624 #define MIGHT_NOT_CACHE_SHADOWS 1
625 #if MIGHT_NOT_CACHE_SHADOWS
626 static int cache_shadows = TRUE;
627 #endif /* MIGHT_NOT_CACHE_SHADOWS */
628
629 /*
630 * vm_object_deallocate:
631 *
632 * Release a reference to the specified object,
633 * gained either through a vm_object_allocate
634 * or a vm_object_reference call. When all references
635 * are gone, storage associated with this object
636 * may be relinquished.
637 *
638 * No object may be locked.
639 */
640 unsigned long vm_object_deallocate_shared_successes = 0;
641 unsigned long vm_object_deallocate_shared_failures = 0;
642 unsigned long vm_object_deallocate_shared_swap_failures = 0;
643 __private_extern__ void
644 vm_object_deallocate(
645 register vm_object_t object)
646 {
647 boolean_t retry_cache_trim = FALSE;
648 vm_object_t shadow = VM_OBJECT_NULL;
649 uint32_t try_failed_count = 0;
650
651 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
652 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
653
654 if (object == VM_OBJECT_NULL)
655 return;
656
657 if (object == kernel_object) {
658 vm_object_lock(kernel_object);
659 kernel_object->ref_count--;
660 if (kernel_object->ref_count == 0) {
661 panic("vm_object_deallocate: losing kernel_object\n");
662 }
663 vm_object_unlock(kernel_object);
664 return;
665 }
666
667 if (object->ref_count > 2 ||
668 (!object->named && object->ref_count > 1)) {
669 UInt32 original_ref_count;
670 volatile UInt32 *ref_count_p;
671 Boolean atomic_swap;
672
673 /*
674 * The object currently looks like it is not being
675 * kept alive solely by the reference we're about to release.
676 * Let's try and release our reference without taking
677 * all the locks we would need if we had to terminate the
678 * object (cache lock + exclusive object lock).
679 * Lock the object "shared" to make sure we don't race with
680 * anyone holding it "exclusive".
681 */
682 vm_object_lock_shared(object);
683 ref_count_p = (volatile UInt32 *) &object->ref_count;
684 original_ref_count = object->ref_count;
685 /*
686 * Test again as "ref_count" could have changed.
687 * "named" shouldn't change.
688 */
689 if (original_ref_count > 2 ||
690 (!object->named && original_ref_count > 1)) {
691 atomic_swap = OSCompareAndSwap(
692 original_ref_count,
693 original_ref_count - 1,
694 (UInt32 *) &object->ref_count);
695 if (atomic_swap == FALSE) {
696 vm_object_deallocate_shared_swap_failures++;
697 }
698
699 } else {
700 atomic_swap = FALSE;
701 }
702 vm_object_unlock(object);
703
704 if (atomic_swap) {
705 /* ref_count was updated atomically ! */
706 vm_object_deallocate_shared_successes++;
707 return;
708 }
709
710 /*
711 * Someone else updated the ref_count at the same
712 * time and we lost the race. Fall back to the usual
713 * slow but safe path...
714 */
715 vm_object_deallocate_shared_failures++;
716 }
717
718 while (object != VM_OBJECT_NULL) {
719
720 /*
721 * The cache holds a reference (uncounted) to
722 * the object; we must lock it before removing
723 * the object.
724 */
725 for (;;) {
726 vm_object_cache_lock();
727
728 /*
729 * if we try to take a regular lock here
730 * we risk deadlocking against someone
731 * holding a lock on this object while
732 * trying to vm_object_deallocate a different
733 * object
734 */
735 if (vm_object_lock_try(object))
736 break;
737 vm_object_cache_unlock();
738 try_failed_count++;
739
740 mutex_pause(try_failed_count); /* wait a bit */
741 }
742 assert(object->ref_count > 0);
743
744 /*
745 * If the object has a named reference, and only
746 * that reference would remain, inform the pager
747 * about the last "mapping" reference going away.
748 */
749 if ((object->ref_count == 2) && (object->named)) {
750 memory_object_t pager = object->pager;
751
752 /* Notify the Pager that there are no */
753 /* more mappers for this object */
754
755 if (pager != MEMORY_OBJECT_NULL) {
756 vm_object_unlock(object);
757 vm_object_cache_unlock();
758
759 memory_object_unmap(pager);
760
761 try_failed_count = 0;
762 for (;;) {
763 vm_object_cache_lock();
764
765 /*
766 * if we try to take a regular lock here
767 * we risk deadlocking against someone
768 * holding a lock on this object while
769 * trying to vm_object_deallocate a different
770 * object
771 */
772 if (vm_object_lock_try(object))
773 break;
774 vm_object_cache_unlock();
775 try_failed_count++;
776
777 mutex_pause(try_failed_count); /* wait a bit */
778 }
779 assert(object->ref_count > 0);
780 }
781 }
782
783 /*
784 * Lose the reference. If other references
785 * remain, then we are done, unless we need
786 * to retry a cache trim.
787 * If it is the last reference, then keep it
788 * until any pending initialization is completed.
789 */
790
791 /* if the object is terminating, it cannot go into */
792 /* the cache and we obviously should not call */
793 /* terminate again. */
794
795 if ((object->ref_count > 1) || object->terminating) {
796 vm_object_lock_assert_exclusive(object);
797 object->ref_count--;
798 vm_object_res_deallocate(object);
799 vm_object_cache_unlock();
800
801 if (object->ref_count == 1 &&
802 object->shadow != VM_OBJECT_NULL) {
803 /*
804 * There's only one reference left on this
805 * VM object. We can't tell if it's a valid
806 * one (from a mapping for example) or if this
807 * object is just part of a possibly stale and
808 * useless shadow chain.
809 * We would like to try and collapse it into
810 * its parent, but we don't have any pointers
811 * back to this parent object.
812 * But we can try and collapse this object with
813 * its own shadows, in case these are useless
814 * too...
815 * We can't bypass this object though, since we
816 * don't know if this last reference on it is
817 * meaningful or not.
818 */
819 vm_object_collapse(object, 0, FALSE);
820 }
821
822 vm_object_unlock(object);
823 if (retry_cache_trim &&
824 ((object = vm_object_cache_trim(TRUE)) !=
825 VM_OBJECT_NULL)) {
826 continue;
827 }
828 return;
829 }
830
831 /*
832 * We have to wait for initialization
833 * before destroying or caching the object.
834 */
835
836 if (object->pager_created && ! object->pager_initialized) {
837 assert(! object->can_persist);
838 vm_object_assert_wait(object,
839 VM_OBJECT_EVENT_INITIALIZED,
840 THREAD_UNINT);
841 vm_object_unlock(object);
842 vm_object_cache_unlock();
843 thread_block(THREAD_CONTINUE_NULL);
844 continue;
845 }
846
847 /*
848 * If this object can persist, then enter it in
849 * the cache. Otherwise, terminate it.
850 *
851 * NOTE: Only permanent objects are cached, and
852 * permanent objects cannot have shadows. This
853 * affects the residence counting logic in a minor
854 * way (can do it in-line, mostly).
855 */
856
857 if ((object->can_persist) && (object->alive)) {
858 /*
859 * Now it is safe to decrement reference count,
860 * and to return if reference count is > 0.
861 */
862 vm_object_lock_assert_exclusive(object);
863 if (--object->ref_count > 0) {
864 vm_object_res_deallocate(object);
865 vm_object_unlock(object);
866 vm_object_cache_unlock();
867 if (retry_cache_trim &&
868 ((object = vm_object_cache_trim(TRUE)) !=
869 VM_OBJECT_NULL)) {
870 continue;
871 }
872 return;
873 }
874
875 #if MIGHT_NOT_CACHE_SHADOWS
876 /*
877 * Remove shadow now if we don't
878 * want to cache shadows.
879 */
880 if (! cache_shadows) {
881 shadow = object->shadow;
882 object->shadow = VM_OBJECT_NULL;
883 }
884 #endif /* MIGHT_NOT_CACHE_SHADOWS */
885
886 /*
887 * Enter the object onto the queue of
888 * cached objects, and deactivate
889 * all of its pages.
890 */
891 assert(object->shadow == VM_OBJECT_NULL);
892 VM_OBJ_RES_DECR(object);
893 XPR(XPR_VM_OBJECT,
894 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
895 (integer_t)object,
896 (integer_t)vm_object_cached_list.next,
897 (integer_t)vm_object_cached_list.prev,0,0);
898
899 vm_object_cached_count++;
900 if (vm_object_cached_count > vm_object_cached_high)
901 vm_object_cached_high = vm_object_cached_count;
902 queue_enter(&vm_object_cached_list, object,
903 vm_object_t, cached_list);
904 vm_object_cache_unlock();
905 vm_object_deactivate_all_pages(object);
906 vm_object_unlock(object);
907
908 #if MIGHT_NOT_CACHE_SHADOWS
909 /*
910 * If we have a shadow that we need
911 * to deallocate, do so now, remembering
912 * to trim the cache later.
913 */
914 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
915 object = shadow;
916 retry_cache_trim = TRUE;
917 continue;
918 }
919 #endif /* MIGHT_NOT_CACHE_SHADOWS */
920
921 /*
922 * Trim the cache. If the cache trim
923 * returns with a shadow for us to deallocate,
924 * then remember to retry the cache trim
925 * when we are done deallocating the shadow.
926 * Otherwise, we are done.
927 */
928
929 object = vm_object_cache_trim(TRUE);
930 if (object == VM_OBJECT_NULL) {
931 return;
932 }
933 retry_cache_trim = TRUE;
934
935 } else {
936 /*
937 * This object is not cachable; terminate it.
938 */
939 XPR(XPR_VM_OBJECT,
940 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
941 (integer_t)object, object->resident_page_count,
942 object->paging_in_progress,
943 (void *)current_thread(),object->ref_count);
944
945 VM_OBJ_RES_DECR(object); /* XXX ? */
946 /*
947 * Terminate this object. If it had a shadow,
948 * then deallocate it; otherwise, if we need
949 * to retry a cache trim, do so now; otherwise,
950 * we are done. "pageout" objects have a shadow,
951 * but maintain a "paging reference" rather than
952 * a normal reference.
953 */
954 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
955 if(vm_object_terminate(object) != KERN_SUCCESS) {
956 return;
957 }
958 if (shadow != VM_OBJECT_NULL) {
959 object = shadow;
960 continue;
961 }
962 if (retry_cache_trim &&
963 ((object = vm_object_cache_trim(TRUE)) !=
964 VM_OBJECT_NULL)) {
965 continue;
966 }
967 return;
968 }
969 }
970 assert(! retry_cache_trim);
971 }
972
973 /*
974 * Check to see whether we really need to trim
975 * down the cache. If so, remove an object from
976 * the cache, terminate it, and repeat.
977 *
978 * Called with, and returns with, cache lock unlocked.
979 */
980 vm_object_t
981 vm_object_cache_trim(
982 boolean_t called_from_vm_object_deallocate)
983 {
984 register vm_object_t object = VM_OBJECT_NULL;
985 vm_object_t shadow;
986
987 for (;;) {
988
989 /*
990 * If we no longer need to trim the cache,
991 * then we are done.
992 */
993
994 vm_object_cache_lock();
995 if (vm_object_cached_count <= vm_object_cached_max) {
996 vm_object_cache_unlock();
997 return VM_OBJECT_NULL;
998 }
999
1000 /*
1001 * We must trim down the cache, so remove
1002 * the first object in the cache.
1003 */
1004 XPR(XPR_VM_OBJECT,
1005 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
1006 (integer_t)vm_object_cached_list.next,
1007 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
1008
1009 object = (vm_object_t) queue_first(&vm_object_cached_list);
1010 if(object == (vm_object_t) &vm_object_cached_list) {
1011 /* something's wrong with the calling parameter or */
1012 /* the value of vm_object_cached_count, just fix */
1013 /* and return */
1014 if(vm_object_cached_max < 0)
1015 vm_object_cached_max = 0;
1016 vm_object_cached_count = 0;
1017 vm_object_cache_unlock();
1018 return VM_OBJECT_NULL;
1019 }
1020 vm_object_lock(object);
1021 queue_remove(&vm_object_cached_list, object, vm_object_t,
1022 cached_list);
1023 vm_object_cached_count--;
1024
1025 /*
1026 * Since this object is in the cache, we know
1027 * that it is initialized and has no references.
1028 * Take a reference to avoid recursive deallocations.
1029 */
1030
1031 assert(object->pager_initialized);
1032 assert(object->ref_count == 0);
1033 vm_object_lock_assert_exclusive(object);
1034 object->ref_count++;
1035
1036 /*
1037 * Terminate the object.
1038 * If the object had a shadow, we let vm_object_deallocate
1039 * deallocate it. "pageout" objects have a shadow, but
1040 * maintain a "paging reference" rather than a normal
1041 * reference.
1042 * (We are careful here to limit recursion.)
1043 */
1044 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
1045 if(vm_object_terminate(object) != KERN_SUCCESS)
1046 continue;
1047 if (shadow != VM_OBJECT_NULL) {
1048 if (called_from_vm_object_deallocate) {
1049 return shadow;
1050 } else {
1051 vm_object_deallocate(shadow);
1052 }
1053 }
1054 }
1055 }
1056
1057 #define VM_OBJ_TERM_STATS DEBUG
1058 #if VM_OBJ_TERM_STATS
1059 uint32_t vm_object_terminate_pages_freed = 0;
1060 uint32_t vm_object_terminate_pages_removed = 0;
1061 uint32_t vm_object_terminate_batches = 0;
1062 uint32_t vm_object_terminate_biggest_batch = 0;
1063 #endif /* VM_OBJ_TERM_STATS */
1064
1065 #define V_O_T_MAX_BATCH 256
1066
1067 /*
1068 * Routine: vm_object_terminate
1069 * Purpose:
1070 * Free all resources associated with a vm_object.
1071 * In/out conditions:
1072 * Upon entry, the object must be locked,
1073 * and the object must have exactly one reference.
1074 *
1075 * The shadow object reference is left alone.
1076 *
1077 * The object must be unlocked if its found that pages
1078 * must be flushed to a backing object. If someone
1079 * manages to map the object while it is being flushed
1080 * the object is returned unlocked and unchanged. Otherwise,
1081 * upon exit, the cache will be unlocked, and the
1082 * object will cease to exist.
1083 */
1084 static kern_return_t
1085 vm_object_terminate(
1086 register vm_object_t object)
1087 {
1088 register vm_page_t p;
1089 vm_object_t shadow_object;
1090 vm_page_t local_free_q;
1091 int loop_count;
1092 #if VM_OBJ_TERM_STATS
1093 uint32_t local_free_count;
1094 uint32_t pages_removed;
1095 #endif /* VM_OBJ_TERM_STATS */
1096
1097 #if VM_OBJ_TERM_STATS
1098 #define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) \
1099 MACRO_BEGIN \
1100 if (_pages_removed) { \
1101 hw_atomic_add(&vm_object_terminate_batches, 1); \
1102 hw_atomic_add(&vm_object_terminate_pages_removed, \
1103 _pages_removed); \
1104 hw_atomic_add(&vm_object_terminate_pages_freed, \
1105 _local_free_count); \
1106 if (_local_free_count > \
1107 vm_object_terminate_biggest_batch) { \
1108 vm_object_terminate_biggest_batch = \
1109 _local_free_count; \
1110 } \
1111 _local_free_count = 0; \
1112 } \
1113 MACRO_END
1114 #else /* VM_OBJ_TERM_STATS */
1115 #define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count)
1116 #endif /* VM_OBJ_TERM_STATS */
1117
1118 #define VM_OBJ_TERM_FREELIST(_pages_removed, _local_free_count, _local_free_q) \
1119 MACRO_BEGIN \
1120 VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count); \
1121 if (_local_free_q) { \
1122 vm_page_free_list(_local_free_q); \
1123 _local_free_q = VM_PAGE_NULL; \
1124 } \
1125 MACRO_END
1126
1127
1128
1129 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
1130 (integer_t)object, object->ref_count, 0, 0, 0);
1131
1132 local_free_q = VM_PAGE_NULL;
1133 #if VM_OBJ_TERM_STATS
1134 local_free_count = 0;
1135 pages_removed = 0;
1136 #endif /* VM_OBJ_TERM_STATS */
1137
1138 if (!object->pageout && (!object->temporary || object->can_persist)
1139 && (object->pager != NULL || object->shadow_severed)) {
1140 vm_object_cache_unlock();
1141 loop_count = V_O_T_MAX_BATCH;
1142 vm_page_lock_queues();
1143 while (!queue_empty(&object->memq)) {
1144 if (--loop_count == 0) {
1145 /*
1146 * Free the pages we've reclaimed so far and
1147 * take a little break to avoid hogging
1148 * the page queues lock too long.
1149 */
1150 VM_OBJ_TERM_FREELIST(pages_removed,
1151 local_free_count,
1152 local_free_q);
1153 mutex_yield(&vm_page_queue_lock);
1154 loop_count = V_O_T_MAX_BATCH;
1155 }
1156 /*
1157 * Clear pager_trusted bit so that the pages get yanked
1158 * out of the object instead of cleaned in place. This
1159 * prevents a deadlock in XMM and makes more sense anyway.
1160 */
1161 object->pager_trusted = FALSE;
1162
1163 p = (vm_page_t) queue_first(&object->memq);
1164
1165 VM_PAGE_CHECK(p);
1166
1167 if (p->busy || p->cleaning) {
1168 if(p->cleaning || p->absent) {
1169 /* free the pages reclaimed so far */
1170 VM_OBJ_TERM_FREELIST(pages_removed,
1171 local_free_count,
1172 local_free_q);
1173 vm_page_unlock_queues();
1174 vm_object_paging_wait(object, THREAD_UNINT);
1175 vm_page_lock_queues();
1176 continue;
1177 } else {
1178 panic("vm_object_terminate.3 %p %p", object, p);
1179 }
1180 }
1181
1182 p->busy = TRUE;
1183 VM_PAGE_QUEUES_REMOVE(p);
1184 #if VM_OBJ_TERM_STATS
1185 pages_removed++;
1186 #endif /* VM_OBJ_TERM_STATS */
1187
1188 if (p->absent || p->private) {
1189
1190 /*
1191 * For private pages, VM_PAGE_FREE just
1192 * leaves the page structure around for
1193 * its owner to clean up. For absent
1194 * pages, the structure is returned to
1195 * the appropriate pool.
1196 */
1197
1198 goto free_page;
1199 }
1200
1201 if (p->fictitious) {
1202 if (p->phys_page == vm_page_guard_addr) {
1203 goto free_page;
1204 }
1205 panic("vm_object_terminate.4 %p %p", object, p);
1206 }
1207
1208 if (!p->dirty && p->wpmapped)
1209 p->dirty = pmap_is_modified(p->phys_page);
1210
1211 if ((p->dirty || p->precious) && !p->error && object->alive) {
1212 /* free the pages reclaimed so far */
1213 VM_OBJ_TERM_FREELIST(pages_removed,
1214 local_free_count,
1215 local_free_q);
1216 vm_page_unlock_queues();
1217 vm_pageout_cluster(p); /* flush page */
1218 vm_object_paging_wait(object, THREAD_UNINT);
1219 XPR(XPR_VM_OBJECT,
1220 "vm_object_terminate restart, object 0x%X ref %d\n",
1221 (integer_t)object, object->ref_count, 0, 0, 0);
1222 vm_page_lock_queues();
1223 } else {
1224 free_page:
1225 /*
1226 * Add this page to our list of reclaimed pages,
1227 * to be freed later.
1228 */
1229 vm_page_free_prepare(p);
1230 p->pageq.next = (queue_entry_t) local_free_q;
1231 local_free_q = p;
1232 #if VM_OBJ_TERM_STATS
1233 local_free_count++;
1234 #endif /* VM_OBJ_TERM_STATS */
1235 }
1236 }
1237
1238 /*
1239 * Free the remaining reclaimed pages.
1240 */
1241 VM_OBJ_TERM_FREELIST(pages_removed,
1242 local_free_count,
1243 local_free_q);
1244 vm_page_unlock_queues();
1245 vm_object_unlock(object);
1246 vm_object_cache_lock();
1247 vm_object_lock(object);
1248 }
1249
1250 /*
1251 * Make sure the object isn't already being terminated
1252 */
1253 if(object->terminating) {
1254 vm_object_lock_assert_exclusive(object);
1255 object->ref_count--;
1256 assert(object->ref_count > 0);
1257 vm_object_cache_unlock();
1258 vm_object_unlock(object);
1259 return KERN_FAILURE;
1260 }
1261
1262 /*
1263 * Did somebody get a reference to the object while we were
1264 * cleaning it?
1265 */
1266 if(object->ref_count != 1) {
1267 vm_object_lock_assert_exclusive(object);
1268 object->ref_count--;
1269 assert(object->ref_count > 0);
1270 vm_object_res_deallocate(object);
1271 vm_object_cache_unlock();
1272 vm_object_unlock(object);
1273 return KERN_FAILURE;
1274 }
1275
1276 /*
1277 * Make sure no one can look us up now.
1278 */
1279
1280 object->terminating = TRUE;
1281 object->alive = FALSE;
1282 vm_object_remove(object);
1283
1284 /*
1285 * Detach the object from its shadow if we are the shadow's
1286 * copy. The reference we hold on the shadow must be dropped
1287 * by our caller.
1288 */
1289 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1290 !(object->pageout)) {
1291 vm_object_lock(shadow_object);
1292 if (shadow_object->copy == object)
1293 shadow_object->copy = VM_OBJECT_NULL;
1294 vm_object_unlock(shadow_object);
1295 }
1296
1297 if (object->paging_in_progress != 0) {
1298 /*
1299 * There are still some paging_in_progress references
1300 * on this object, meaning that there are some paging
1301 * or other I/O operations in progress for this VM object.
1302 * Such operations take some paging_in_progress references
1303 * up front to ensure that the object doesn't go away, but
1304 * they may also need to acquire a reference on the VM object,
1305 * to map it in kernel space, for example. That means that
1306 * they may end up releasing the last reference on the VM
1307 * object, triggering its termination, while still holding
1308 * paging_in_progress references. Waiting for these
1309 * pending paging_in_progress references to go away here would
1310 * deadlock.
1311 *
1312 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1313 * complete the VM object termination if it still holds
1314 * paging_in_progress references at this point.
1315 *
1316 * No new paging_in_progress should appear now that the
1317 * VM object is "terminating" and not "alive".
1318 */
1319 vm_object_reap_async(object);
1320 vm_object_cache_unlock();
1321 vm_object_unlock(object);
1322 /*
1323 * Return KERN_FAILURE to let the caller know that we
1324 * haven't completed the termination and it can't drop this
1325 * object's reference on its shadow object yet.
1326 * The reaper thread will take care of that once it has
1327 * completed this object's termination.
1328 */
1329 return KERN_FAILURE;
1330 }
1331
1332 /* complete the VM object termination */
1333 vm_object_reap(object);
1334 object = VM_OBJECT_NULL;
1335 /* cache lock and object lock were released by vm_object_reap() */
1336
1337 /*
1338 * KERN_SUCCESS means that this object has been terminated
1339 * and no longer needs its shadow object but still holds a
1340 * reference on it.
1341 * The caller is responsible for dropping that reference.
1342 * We can't call vm_object_deallocate() here because that
1343 * would create a recursion.
1344 */
1345 return KERN_SUCCESS;
1346 }
1347
1348 /*
1349 * vm_object_reap():
1350 *
1351 * Complete the termination of a VM object after it's been marked
1352 * as "terminating" and "!alive" by vm_object_terminate().
1353 *
1354 * The VM object cache and the VM object must be locked by caller.
1355 * The locks will be released on return and the VM object is no longer valid.
1356 */
1357 void
1358 vm_object_reap(
1359 vm_object_t object)
1360 {
1361 memory_object_t pager;
1362 vm_page_t p;
1363 vm_page_t local_free_q;
1364 int loop_count;
1365 #if VM_OBJ_TERM_STATS
1366 uint32_t local_free_count;
1367 #endif /* VM_OBJ_TERM_STATS */
1368
1369 #if DEBUG
1370 mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
1371 #endif /* DEBUG */
1372 vm_object_lock_assert_exclusive(object);
1373 assert(object->paging_in_progress == 0);
1374
1375 vm_object_reap_count++;
1376
1377 local_free_q = VM_PAGE_NULL;
1378 #if VM_OBJ_TERM_STATS
1379 local_free_count = 0;
1380 #endif /* VM_OBJ_TERM_STATS */
1381
1382 pager = object->pager;
1383 object->pager = MEMORY_OBJECT_NULL;
1384
1385 if (pager != MEMORY_OBJECT_NULL)
1386 memory_object_control_disable(object->pager_control);
1387 vm_object_cache_unlock();
1388
1389 vm_object_lock_assert_exclusive(object);
1390 object->ref_count--;
1391 #if TASK_SWAPPER
1392 assert(object->res_count == 0);
1393 #endif /* TASK_SWAPPER */
1394
1395 assert (object->ref_count == 0);
1396
1397 /* remove from purgeable queue if it's on */
1398 if (object->objq.next || object->objq.prev) {
1399 purgeable_q_t queue = vm_purgeable_object_remove(object);
1400 assert(queue);
1401
1402 /* Must take page lock for this - using it to protect token queue */
1403 vm_page_lock_queues();
1404 vm_purgeable_token_delete_first(queue);
1405
1406 assert(queue->debug_count_objects>=0);
1407 vm_page_unlock_queues();
1408 }
1409
1410 /*
1411 * Clean or free the pages, as appropriate.
1412 * It is possible for us to find busy/absent pages,
1413 * if some faults on this object were aborted.
1414 */
1415 if (object->pageout) {
1416 assert(object->shadow != VM_OBJECT_NULL);
1417
1418 vm_pageout_object_terminate(object);
1419
1420 } else if ((object->temporary && !object->can_persist) ||
1421 (pager == MEMORY_OBJECT_NULL)) {
1422 loop_count = V_O_T_MAX_BATCH;
1423 vm_page_lock_queues();
1424 while (!queue_empty(&object->memq)) {
1425 if (--loop_count == 0) {
1426 /*
1427 * Free the pages we reclaimed so far
1428 * and take a little break to avoid
1429 * hogging the page queue lock too long
1430 */
1431 VM_OBJ_TERM_FREELIST(local_free_count,
1432 local_free_count,
1433 local_free_q);
1434 mutex_yield(&vm_page_queue_lock);
1435 loop_count = V_O_T_MAX_BATCH;
1436 }
1437 p = (vm_page_t) queue_first(&object->memq);
1438
1439 vm_page_free_prepare(p);
1440
1441 assert(p->pageq.next == NULL && p->pageq.prev == NULL);
1442 p->pageq.next = (queue_entry_t) local_free_q;
1443 local_free_q = p;
1444 #if VM_OBJ_TERM_STATS
1445 local_free_count++;
1446 #endif /* VM_OBJ_TERM_STATS */
1447 }
1448 /*
1449 * Free the remaining reclaimed pages
1450 */
1451 VM_OBJ_TERM_FREELIST(local_free_count,
1452 local_free_count,
1453 local_free_q);
1454 vm_page_unlock_queues();
1455 } else if (!queue_empty(&object->memq)) {
1456 panic("vm_object_reap: queue just emptied isn't");
1457 }
1458
1459 assert(object->paging_in_progress == 0);
1460 assert(object->ref_count == 0);
1461
1462 /*
1463 * If the pager has not already been released by
1464 * vm_object_destroy, we need to terminate it and
1465 * release our reference to it here.
1466 */
1467 if (pager != MEMORY_OBJECT_NULL) {
1468 vm_object_unlock(object);
1469 vm_object_release_pager(pager);
1470 vm_object_lock(object);
1471 }
1472
1473 /* kick off anyone waiting on terminating */
1474 object->terminating = FALSE;
1475 vm_object_paging_begin(object);
1476 vm_object_paging_end(object);
1477 vm_object_unlock(object);
1478
1479 #if MACH_PAGEMAP
1480 vm_external_destroy(object->existence_map, object->size);
1481 #endif /* MACH_PAGEMAP */
1482
1483 object->shadow = VM_OBJECT_NULL;
1484
1485 vm_object_lock_destroy(object);
1486 /*
1487 * Free the space for the object.
1488 */
1489 zfree(vm_object_zone, object);
1490 object = VM_OBJECT_NULL;
1491 }
1492
1493 void
1494 vm_object_reap_async(
1495 vm_object_t object)
1496 {
1497 #if DEBUG
1498 mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
1499 #endif /* DEBUG */
1500 vm_object_lock_assert_exclusive(object);
1501
1502 vm_object_reap_count_async++;
1503
1504 /* enqueue the VM object... */
1505 queue_enter(&vm_object_reaper_queue, object,
1506 vm_object_t, cached_list);
1507 /* ... and wake up the reaper thread */
1508 thread_wakeup((event_t) &vm_object_reaper_queue);
1509 }
1510
1511 void
1512 vm_object_reaper_thread(void)
1513 {
1514 vm_object_t object, shadow_object;
1515
1516 vm_object_cache_lock();
1517
1518 while (!queue_empty(&vm_object_reaper_queue)) {
1519 queue_remove_first(&vm_object_reaper_queue,
1520 object,
1521 vm_object_t,
1522 cached_list);
1523 vm_object_lock(object);
1524 assert(object->terminating);
1525 assert(!object->alive);
1526
1527 /*
1528 * The pageout daemon might be playing with our pages.
1529 * Now that the object is dead, it won't touch any more
1530 * pages, but some pages might already be on their way out.
1531 * Hence, we wait until the active paging activities have
1532 * ceased before we break the association with the pager
1533 * itself.
1534 */
1535 while (object->paging_in_progress != 0) {
1536 vm_object_cache_unlock();
1537 vm_object_wait(object,
1538 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1539 THREAD_UNINT);
1540 vm_object_cache_lock();
1541 vm_object_lock(object);
1542 }
1543
1544 shadow_object =
1545 object->pageout ? VM_OBJECT_NULL : object->shadow;
1546
1547 vm_object_reap(object);
1548 /* cache is unlocked and object is no longer valid */
1549 object = VM_OBJECT_NULL;
1550
1551 if (shadow_object != VM_OBJECT_NULL) {
1552 /*
1553 * Drop the reference "object" was holding on
1554 * its shadow object.
1555 */
1556 vm_object_deallocate(shadow_object);
1557 shadow_object = VM_OBJECT_NULL;
1558 }
1559
1560 vm_object_cache_lock();
1561 }
1562
1563 /* wait for more work... */
1564 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT);
1565 vm_object_cache_unlock();
1566 thread_block((thread_continue_t) vm_object_reaper_thread);
1567 /*NOTREACHED*/
1568 }
1569
1570 /*
1571 * Routine: vm_object_pager_wakeup
1572 * Purpose: Wake up anyone waiting for termination of a pager.
1573 */
1574
1575 static void
1576 vm_object_pager_wakeup(
1577 memory_object_t pager)
1578 {
1579 vm_object_hash_entry_t entry;
1580 boolean_t waiting = FALSE;
1581
1582 /*
1583 * If anyone was waiting for the memory_object_terminate
1584 * to be queued, wake them up now.
1585 */
1586 vm_object_cache_lock();
1587 entry = vm_object_hash_lookup(pager, TRUE);
1588 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1589 waiting = entry->waiting;
1590 vm_object_cache_unlock();
1591 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1592 if (waiting)
1593 thread_wakeup((event_t) pager);
1594 vm_object_hash_entry_free(entry);
1595 }
1596 }
1597
1598 /*
1599 * Routine: vm_object_release_pager
1600 * Purpose: Terminate the pager and, upon completion,
1601 * release our last reference to it.
1602 * just like memory_object_terminate, except
1603 * that we wake up anyone blocked in vm_object_enter
1604 * waiting for termination message to be queued
1605 * before calling memory_object_init.
1606 */
1607 static void
1608 vm_object_release_pager(
1609 memory_object_t pager)
1610 {
1611
1612 /*
1613 * Terminate the pager.
1614 */
1615
1616 (void) memory_object_terminate(pager);
1617
1618 /*
1619 * Wakeup anyone waiting for this terminate
1620 */
1621 vm_object_pager_wakeup(pager);
1622
1623 /*
1624 * Release reference to pager.
1625 */
1626 memory_object_deallocate(pager);
1627 }
1628
1629 /*
1630 * Routine: vm_object_destroy
1631 * Purpose:
1632 * Shut down a VM object, despite the
1633 * presence of address map (or other) references
1634 * to the vm_object.
1635 */
1636 kern_return_t
1637 vm_object_destroy(
1638 vm_object_t object,
1639 __unused kern_return_t reason)
1640 {
1641 memory_object_t old_pager;
1642
1643 if (object == VM_OBJECT_NULL)
1644 return(KERN_SUCCESS);
1645
1646 /*
1647 * Remove the pager association immediately.
1648 *
1649 * This will prevent the memory manager from further
1650 * meddling. [If it wanted to flush data or make
1651 * other changes, it should have done so before performing
1652 * the destroy call.]
1653 */
1654
1655 vm_object_cache_lock();
1656 vm_object_lock(object);
1657 object->can_persist = FALSE;
1658 object->named = FALSE;
1659 object->alive = FALSE;
1660
1661 /*
1662 * Rip out the pager from the vm_object now...
1663 */
1664
1665 vm_object_remove(object);
1666 old_pager = object->pager;
1667 object->pager = MEMORY_OBJECT_NULL;
1668 if (old_pager != MEMORY_OBJECT_NULL)
1669 memory_object_control_disable(object->pager_control);
1670 vm_object_cache_unlock();
1671
1672 /*
1673 * Wait for the existing paging activity (that got
1674 * through before we nulled out the pager) to subside.
1675 */
1676
1677 vm_object_paging_wait(object, THREAD_UNINT);
1678 vm_object_unlock(object);
1679
1680 /*
1681 * Terminate the object now.
1682 */
1683 if (old_pager != MEMORY_OBJECT_NULL) {
1684 vm_object_release_pager(old_pager);
1685
1686 /*
1687 * JMM - Release the caller's reference. This assumes the
1688 * caller had a reference to release, which is a big (but
1689 * currently valid) assumption if this is driven from the
1690 * vnode pager (it is holding a named reference when making
1691 * this call)..
1692 */
1693 vm_object_deallocate(object);
1694
1695 }
1696 return(KERN_SUCCESS);
1697 }
1698
1699 #define VM_OBJ_DEACT_ALL_STATS DEBUG
1700 #if VM_OBJ_DEACT_ALL_STATS
1701 uint32_t vm_object_deactivate_all_pages_batches = 0;
1702 uint32_t vm_object_deactivate_all_pages_pages = 0;
1703 #endif /* VM_OBJ_DEACT_ALL_STATS */
1704 /*
1705 * vm_object_deactivate_pages
1706 *
1707 * Deactivate all pages in the specified object. (Keep its pages
1708 * in memory even though it is no longer referenced.)
1709 *
1710 * The object must be locked.
1711 */
1712 static void
1713 vm_object_deactivate_all_pages(
1714 register vm_object_t object)
1715 {
1716 register vm_page_t p;
1717 int loop_count;
1718 #if VM_OBJ_DEACT_ALL_STATS
1719 int pages_count;
1720 #endif /* VM_OBJ_DEACT_ALL_STATS */
1721 #define V_O_D_A_P_MAX_BATCH 256
1722
1723 loop_count = V_O_D_A_P_MAX_BATCH;
1724 #if VM_OBJ_DEACT_ALL_STATS
1725 pages_count = 0;
1726 #endif /* VM_OBJ_DEACT_ALL_STATS */
1727 vm_page_lock_queues();
1728 queue_iterate(&object->memq, p, vm_page_t, listq) {
1729 if (--loop_count == 0) {
1730 #if VM_OBJ_DEACT_ALL_STATS
1731 hw_atomic_add(&vm_object_deactivate_all_pages_batches,
1732 1);
1733 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1734 pages_count);
1735 pages_count = 0;
1736 #endif /* VM_OBJ_DEACT_ALL_STATS */
1737 mutex_yield(&vm_page_queue_lock);
1738 loop_count = V_O_D_A_P_MAX_BATCH;
1739 }
1740 if (!p->busy && !p->throttled) {
1741 #if VM_OBJ_DEACT_ALL_STATS
1742 pages_count++;
1743 #endif /* VM_OBJ_DEACT_ALL_STATS */
1744 vm_page_deactivate(p);
1745 }
1746 }
1747 #if VM_OBJ_DEACT_ALL_STATS
1748 if (pages_count) {
1749 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1);
1750 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1751 pages_count);
1752 pages_count = 0;
1753 }
1754 #endif /* VM_OBJ_DEACT_ALL_STATS */
1755 vm_page_unlock_queues();
1756 }
1757
1758 __private_extern__ void
1759 vm_object_deactivate_pages(
1760 vm_object_t object,
1761 vm_object_offset_t offset,
1762 vm_object_size_t size,
1763 boolean_t kill_page)
1764 {
1765 vm_object_t orig_object;
1766 int pages_moved = 0;
1767 int pages_found = 0;
1768
1769 /*
1770 * entered with object lock held, acquire a paging reference to
1771 * prevent the memory_object and control ports from
1772 * being destroyed.
1773 */
1774 orig_object = object;
1775
1776 for (;;) {
1777 register vm_page_t m;
1778 vm_object_offset_t toffset;
1779 vm_object_size_t tsize;
1780
1781 vm_object_paging_begin(object);
1782 vm_page_lock_queues();
1783
1784 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1785
1786 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1787
1788 pages_found++;
1789
1790 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1791
1792 assert(!m->laundry);
1793
1794 m->reference = FALSE;
1795 pmap_clear_reference(m->phys_page);
1796
1797 if ((kill_page) && (object->internal)) {
1798 m->precious = FALSE;
1799 m->dirty = FALSE;
1800 pmap_clear_modify(m->phys_page);
1801 #if MACH_PAGEMAP
1802 vm_external_state_clr(object->existence_map, offset);
1803 #endif /* MACH_PAGEMAP */
1804 }
1805
1806 if (!m->throttled) {
1807 VM_PAGE_QUEUES_REMOVE(m);
1808
1809 assert(!m->laundry);
1810 assert(m->object != kernel_object);
1811 assert(m->pageq.next == NULL &&
1812 m->pageq.prev == NULL);
1813
1814 if(m->zero_fill) {
1815 queue_enter_first(
1816 &vm_page_queue_zf,
1817 m, vm_page_t, pageq);
1818 vm_zf_queue_count++;
1819 } else {
1820 queue_enter_first(
1821 &vm_page_queue_inactive,
1822 m, vm_page_t, pageq);
1823 }
1824
1825 m->inactive = TRUE;
1826 if (!m->fictitious) {
1827 vm_page_inactive_count++;
1828 token_new_pagecount++;
1829 } else {
1830 assert(m->phys_page == vm_page_fictitious_addr);
1831 }
1832
1833 pages_moved++;
1834 }
1835 }
1836 }
1837 }
1838 vm_page_unlock_queues();
1839 vm_object_paging_end(object);
1840
1841 if (object->shadow) {
1842 vm_object_t tmp_object;
1843
1844 kill_page = 0;
1845
1846 offset += object->shadow_offset;
1847
1848 tmp_object = object->shadow;
1849 vm_object_lock(tmp_object);
1850
1851 if (object != orig_object)
1852 vm_object_unlock(object);
1853 object = tmp_object;
1854 } else
1855 break;
1856 }
1857 if (object != orig_object)
1858 vm_object_unlock(object);
1859 }
1860
1861 /*
1862 * Routine: vm_object_pmap_protect
1863 *
1864 * Purpose:
1865 * Reduces the permission for all physical
1866 * pages in the specified object range.
1867 *
1868 * If removing write permission only, it is
1869 * sufficient to protect only the pages in
1870 * the top-level object; only those pages may
1871 * have write permission.
1872 *
1873 * If removing all access, we must follow the
1874 * shadow chain from the top-level object to
1875 * remove access to all pages in shadowed objects.
1876 *
1877 * The object must *not* be locked. The object must
1878 * be temporary/internal.
1879 *
1880 * If pmap is not NULL, this routine assumes that
1881 * the only mappings for the pages are in that
1882 * pmap.
1883 */
1884
1885 __private_extern__ void
1886 vm_object_pmap_protect(
1887 register vm_object_t object,
1888 register vm_object_offset_t offset,
1889 vm_object_size_t size,
1890 pmap_t pmap,
1891 vm_map_offset_t pmap_start,
1892 vm_prot_t prot)
1893 {
1894 if (object == VM_OBJECT_NULL)
1895 return;
1896 size = vm_object_round_page(size);
1897 offset = vm_object_trunc_page(offset);
1898
1899 vm_object_lock(object);
1900
1901 if (object->phys_contiguous) {
1902 if (pmap != NULL) {
1903 vm_object_unlock(object);
1904 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1905 } else {
1906 vm_object_offset_t phys_start, phys_end, phys_addr;
1907
1908 phys_start = object->shadow_offset + offset;
1909 phys_end = phys_start + size;
1910 assert(phys_start <= phys_end);
1911 assert(phys_end <= object->shadow_offset + object->size);
1912 vm_object_unlock(object);
1913
1914 for (phys_addr = phys_start;
1915 phys_addr < phys_end;
1916 phys_addr += PAGE_SIZE_64) {
1917 pmap_page_protect(phys_addr >> 12, prot);
1918 }
1919 }
1920 return;
1921 }
1922
1923 assert(object->internal);
1924
1925 while (TRUE) {
1926 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1927 vm_object_unlock(object);
1928 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1929 return;
1930 }
1931
1932 /* if we are doing large ranges with respect to resident */
1933 /* page count then we should interate over pages otherwise */
1934 /* inverse page look-up will be faster */
1935 if (ptoa_64(object->resident_page_count / 4) < size) {
1936 vm_page_t p;
1937 vm_object_offset_t end;
1938
1939 end = offset + size;
1940
1941 if (pmap != PMAP_NULL) {
1942 queue_iterate(&object->memq, p, vm_page_t, listq) {
1943 if (!p->fictitious &&
1944 (offset <= p->offset) && (p->offset < end)) {
1945 vm_map_offset_t start;
1946
1947 start = pmap_start + p->offset - offset;
1948 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1949 }
1950 }
1951 } else {
1952 queue_iterate(&object->memq, p, vm_page_t, listq) {
1953 if (!p->fictitious &&
1954 (offset <= p->offset) && (p->offset < end)) {
1955
1956 pmap_page_protect(p->phys_page, prot);
1957 }
1958 }
1959 }
1960 } else {
1961 vm_page_t p;
1962 vm_object_offset_t end;
1963 vm_object_offset_t target_off;
1964
1965 end = offset + size;
1966
1967 if (pmap != PMAP_NULL) {
1968 for(target_off = offset;
1969 target_off < end;
1970 target_off += PAGE_SIZE) {
1971 p = vm_page_lookup(object, target_off);
1972 if (p != VM_PAGE_NULL) {
1973 vm_offset_t start;
1974 start = pmap_start +
1975 (vm_offset_t)(p->offset - offset);
1976 pmap_protect(pmap, start,
1977 start + PAGE_SIZE, prot);
1978 }
1979 }
1980 } else {
1981 for(target_off = offset;
1982 target_off < end; target_off += PAGE_SIZE) {
1983 p = vm_page_lookup(object, target_off);
1984 if (p != VM_PAGE_NULL) {
1985 pmap_page_protect(p->phys_page, prot);
1986 }
1987 }
1988 }
1989 }
1990
1991 if (prot == VM_PROT_NONE) {
1992 /*
1993 * Must follow shadow chain to remove access
1994 * to pages in shadowed objects.
1995 */
1996 register vm_object_t next_object;
1997
1998 next_object = object->shadow;
1999 if (next_object != VM_OBJECT_NULL) {
2000 offset += object->shadow_offset;
2001 vm_object_lock(next_object);
2002 vm_object_unlock(object);
2003 object = next_object;
2004 }
2005 else {
2006 /*
2007 * End of chain - we are done.
2008 */
2009 break;
2010 }
2011 }
2012 else {
2013 /*
2014 * Pages in shadowed objects may never have
2015 * write permission - we may stop here.
2016 */
2017 break;
2018 }
2019 }
2020
2021 vm_object_unlock(object);
2022 }
2023
2024 /*
2025 * Routine: vm_object_copy_slowly
2026 *
2027 * Description:
2028 * Copy the specified range of the source
2029 * virtual memory object without using
2030 * protection-based optimizations (such
2031 * as copy-on-write). The pages in the
2032 * region are actually copied.
2033 *
2034 * In/out conditions:
2035 * The caller must hold a reference and a lock
2036 * for the source virtual memory object. The source
2037 * object will be returned *unlocked*.
2038 *
2039 * Results:
2040 * If the copy is completed successfully, KERN_SUCCESS is
2041 * returned. If the caller asserted the interruptible
2042 * argument, and an interruption occurred while waiting
2043 * for a user-generated event, MACH_SEND_INTERRUPTED is
2044 * returned. Other values may be returned to indicate
2045 * hard errors during the copy operation.
2046 *
2047 * A new virtual memory object is returned in a
2048 * parameter (_result_object). The contents of this
2049 * new object, starting at a zero offset, are a copy
2050 * of the source memory region. In the event of
2051 * an error, this parameter will contain the value
2052 * VM_OBJECT_NULL.
2053 */
2054 __private_extern__ kern_return_t
2055 vm_object_copy_slowly(
2056 register vm_object_t src_object,
2057 vm_object_offset_t src_offset,
2058 vm_object_size_t size,
2059 boolean_t interruptible,
2060 vm_object_t *_result_object) /* OUT */
2061 {
2062 vm_object_t new_object;
2063 vm_object_offset_t new_offset;
2064
2065 struct vm_object_fault_info fault_info;
2066
2067 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
2068 src_object, src_offset, size, 0, 0);
2069
2070 if (size == 0) {
2071 vm_object_unlock(src_object);
2072 *_result_object = VM_OBJECT_NULL;
2073 return(KERN_INVALID_ARGUMENT);
2074 }
2075
2076 /*
2077 * Prevent destruction of the source object while we copy.
2078 */
2079
2080 vm_object_reference_locked(src_object);
2081 vm_object_unlock(src_object);
2082
2083 /*
2084 * Create a new object to hold the copied pages.
2085 * A few notes:
2086 * We fill the new object starting at offset 0,
2087 * regardless of the input offset.
2088 * We don't bother to lock the new object within
2089 * this routine, since we have the only reference.
2090 */
2091
2092 new_object = vm_object_allocate(size);
2093 new_offset = 0;
2094
2095 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
2096
2097 fault_info.interruptible = interruptible;
2098 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
2099 fault_info.user_tag = 0;
2100 fault_info.lo_offset = src_offset;
2101 fault_info.hi_offset = src_offset + size;
2102 fault_info.no_cache = FALSE;
2103
2104 for ( ;
2105 size != 0 ;
2106 src_offset += PAGE_SIZE_64,
2107 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
2108 ) {
2109 vm_page_t new_page;
2110 vm_fault_return_t result;
2111
2112 vm_object_lock(new_object);
2113
2114 while ((new_page = vm_page_alloc(new_object, new_offset))
2115 == VM_PAGE_NULL) {
2116
2117 vm_object_unlock(new_object);
2118
2119 if (!vm_page_wait(interruptible)) {
2120 vm_object_deallocate(new_object);
2121 vm_object_deallocate(src_object);
2122 *_result_object = VM_OBJECT_NULL;
2123 return(MACH_SEND_INTERRUPTED);
2124 }
2125 vm_object_lock(new_object);
2126 }
2127 vm_object_unlock(new_object);
2128
2129 do {
2130 vm_prot_t prot = VM_PROT_READ;
2131 vm_page_t _result_page;
2132 vm_page_t top_page;
2133 register
2134 vm_page_t result_page;
2135 kern_return_t error_code;
2136
2137 vm_object_lock(src_object);
2138 vm_object_paging_begin(src_object);
2139
2140 fault_info.cluster_size = size;
2141
2142 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
2143 result = vm_fault_page(src_object, src_offset,
2144 VM_PROT_READ, FALSE,
2145 &prot, &_result_page, &top_page,
2146 (int *)0,
2147 &error_code, FALSE, FALSE, &fault_info);
2148
2149 switch(result) {
2150 case VM_FAULT_SUCCESS:
2151 result_page = _result_page;
2152
2153 /*
2154 * We don't need to hold the object
2155 * lock -- the busy page will be enough.
2156 * [We don't care about picking up any
2157 * new modifications.]
2158 *
2159 * Copy the page to the new object.
2160 *
2161 * POLICY DECISION:
2162 * If result_page is clean,
2163 * we could steal it instead
2164 * of copying.
2165 */
2166
2167 vm_object_unlock(result_page->object);
2168 vm_page_copy(result_page, new_page);
2169
2170 /*
2171 * Let go of both pages (make them
2172 * not busy, perform wakeup, activate).
2173 */
2174 vm_object_lock(new_object);
2175 new_page->dirty = TRUE;
2176 PAGE_WAKEUP_DONE(new_page);
2177 vm_object_unlock(new_object);
2178
2179 vm_object_lock(result_page->object);
2180 PAGE_WAKEUP_DONE(result_page);
2181
2182 vm_page_lockspin_queues();
2183 if (!result_page->active &&
2184 !result_page->inactive &&
2185 !result_page->throttled)
2186 vm_page_activate(result_page);
2187 vm_page_activate(new_page);
2188 vm_page_unlock_queues();
2189
2190 /*
2191 * Release paging references and
2192 * top-level placeholder page, if any.
2193 */
2194
2195 vm_fault_cleanup(result_page->object,
2196 top_page);
2197
2198 break;
2199
2200 case VM_FAULT_RETRY:
2201 break;
2202
2203 case VM_FAULT_FICTITIOUS_SHORTAGE:
2204 vm_page_more_fictitious();
2205 break;
2206
2207 case VM_FAULT_MEMORY_SHORTAGE:
2208 if (vm_page_wait(interruptible))
2209 break;
2210 /* fall thru */
2211
2212 case VM_FAULT_INTERRUPTED:
2213 vm_page_free(new_page);
2214 vm_object_deallocate(new_object);
2215 vm_object_deallocate(src_object);
2216 *_result_object = VM_OBJECT_NULL;
2217 return(MACH_SEND_INTERRUPTED);
2218
2219 case VM_FAULT_MEMORY_ERROR:
2220 /*
2221 * A policy choice:
2222 * (a) ignore pages that we can't
2223 * copy
2224 * (b) return the null object if
2225 * any page fails [chosen]
2226 */
2227
2228 vm_page_lock_queues();
2229 vm_page_free(new_page);
2230 vm_page_unlock_queues();
2231
2232 vm_object_deallocate(new_object);
2233 vm_object_deallocate(src_object);
2234 *_result_object = VM_OBJECT_NULL;
2235 return(error_code ? error_code:
2236 KERN_MEMORY_ERROR);
2237 }
2238 } while (result != VM_FAULT_SUCCESS);
2239 }
2240
2241 /*
2242 * Lose the extra reference, and return our object.
2243 */
2244 vm_object_deallocate(src_object);
2245 *_result_object = new_object;
2246 return(KERN_SUCCESS);
2247 }
2248
2249 /*
2250 * Routine: vm_object_copy_quickly
2251 *
2252 * Purpose:
2253 * Copy the specified range of the source virtual
2254 * memory object, if it can be done without waiting
2255 * for user-generated events.
2256 *
2257 * Results:
2258 * If the copy is successful, the copy is returned in
2259 * the arguments; otherwise, the arguments are not
2260 * affected.
2261 *
2262 * In/out conditions:
2263 * The object should be unlocked on entry and exit.
2264 */
2265
2266 /*ARGSUSED*/
2267 __private_extern__ boolean_t
2268 vm_object_copy_quickly(
2269 vm_object_t *_object, /* INOUT */
2270 __unused vm_object_offset_t offset, /* IN */
2271 __unused vm_object_size_t size, /* IN */
2272 boolean_t *_src_needs_copy, /* OUT */
2273 boolean_t *_dst_needs_copy) /* OUT */
2274 {
2275 vm_object_t object = *_object;
2276 memory_object_copy_strategy_t copy_strategy;
2277
2278 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
2279 *_object, offset, size, 0, 0);
2280 if (object == VM_OBJECT_NULL) {
2281 *_src_needs_copy = FALSE;
2282 *_dst_needs_copy = FALSE;
2283 return(TRUE);
2284 }
2285
2286 vm_object_lock(object);
2287
2288 copy_strategy = object->copy_strategy;
2289
2290 switch (copy_strategy) {
2291 case MEMORY_OBJECT_COPY_SYMMETRIC:
2292
2293 /*
2294 * Symmetric copy strategy.
2295 * Make another reference to the object.
2296 * Leave object/offset unchanged.
2297 */
2298
2299 vm_object_reference_locked(object);
2300 object->shadowed = TRUE;
2301 vm_object_unlock(object);
2302
2303 /*
2304 * Both source and destination must make
2305 * shadows, and the source must be made
2306 * read-only if not already.
2307 */
2308
2309 *_src_needs_copy = TRUE;
2310 *_dst_needs_copy = TRUE;
2311
2312 break;
2313
2314 case MEMORY_OBJECT_COPY_DELAY:
2315 vm_object_unlock(object);
2316 return(FALSE);
2317
2318 default:
2319 vm_object_unlock(object);
2320 return(FALSE);
2321 }
2322 return(TRUE);
2323 }
2324
2325 static int copy_call_count = 0;
2326 static int copy_call_sleep_count = 0;
2327 static int copy_call_restart_count = 0;
2328
2329 /*
2330 * Routine: vm_object_copy_call [internal]
2331 *
2332 * Description:
2333 * Copy the source object (src_object), using the
2334 * user-managed copy algorithm.
2335 *
2336 * In/out conditions:
2337 * The source object must be locked on entry. It
2338 * will be *unlocked* on exit.
2339 *
2340 * Results:
2341 * If the copy is successful, KERN_SUCCESS is returned.
2342 * A new object that represents the copied virtual
2343 * memory is returned in a parameter (*_result_object).
2344 * If the return value indicates an error, this parameter
2345 * is not valid.
2346 */
2347 static kern_return_t
2348 vm_object_copy_call(
2349 vm_object_t src_object,
2350 vm_object_offset_t src_offset,
2351 vm_object_size_t size,
2352 vm_object_t *_result_object) /* OUT */
2353 {
2354 kern_return_t kr;
2355 vm_object_t copy;
2356 boolean_t check_ready = FALSE;
2357 uint32_t try_failed_count = 0;
2358
2359 /*
2360 * If a copy is already in progress, wait and retry.
2361 *
2362 * XXX
2363 * Consider making this call interruptable, as Mike
2364 * intended it to be.
2365 *
2366 * XXXO
2367 * Need a counter or version or something to allow
2368 * us to use the copy that the currently requesting
2369 * thread is obtaining -- is it worth adding to the
2370 * vm object structure? Depends how common this case it.
2371 */
2372 copy_call_count++;
2373 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
2374 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
2375 THREAD_UNINT);
2376 copy_call_restart_count++;
2377 }
2378
2379 /*
2380 * Indicate (for the benefit of memory_object_create_copy)
2381 * that we want a copy for src_object. (Note that we cannot
2382 * do a real assert_wait before calling memory_object_copy,
2383 * so we simply set the flag.)
2384 */
2385
2386 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
2387 vm_object_unlock(src_object);
2388
2389 /*
2390 * Ask the memory manager to give us a memory object
2391 * which represents a copy of the src object.
2392 * The memory manager may give us a memory object
2393 * which we already have, or it may give us a
2394 * new memory object. This memory object will arrive
2395 * via memory_object_create_copy.
2396 */
2397
2398 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
2399 if (kr != KERN_SUCCESS) {
2400 return kr;
2401 }
2402
2403 /*
2404 * Wait for the copy to arrive.
2405 */
2406 vm_object_lock(src_object);
2407 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
2408 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
2409 THREAD_UNINT);
2410 copy_call_sleep_count++;
2411 }
2412 Retry:
2413 assert(src_object->copy != VM_OBJECT_NULL);
2414 copy = src_object->copy;
2415 if (!vm_object_lock_try(copy)) {
2416 vm_object_unlock(src_object);
2417
2418 try_failed_count++;
2419 mutex_pause(try_failed_count); /* wait a bit */
2420
2421 vm_object_lock(src_object);
2422 goto Retry;
2423 }
2424 if (copy->size < src_offset+size)
2425 copy->size = src_offset+size;
2426
2427 if (!copy->pager_ready)
2428 check_ready = TRUE;
2429
2430 /*
2431 * Return the copy.
2432 */
2433 *_result_object = copy;
2434 vm_object_unlock(copy);
2435 vm_object_unlock(src_object);
2436
2437 /* Wait for the copy to be ready. */
2438 if (check_ready == TRUE) {
2439 vm_object_lock(copy);
2440 while (!copy->pager_ready) {
2441 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
2442 }
2443 vm_object_unlock(copy);
2444 }
2445
2446 return KERN_SUCCESS;
2447 }
2448
2449 static int copy_delayed_lock_collisions = 0;
2450 static int copy_delayed_max_collisions = 0;
2451 static int copy_delayed_lock_contention = 0;
2452 static int copy_delayed_protect_iterate = 0;
2453
2454 /*
2455 * Routine: vm_object_copy_delayed [internal]
2456 *
2457 * Description:
2458 * Copy the specified virtual memory object, using
2459 * the asymmetric copy-on-write algorithm.
2460 *
2461 * In/out conditions:
2462 * The src_object must be locked on entry. It will be unlocked
2463 * on exit - so the caller must also hold a reference to it.
2464 *
2465 * This routine will not block waiting for user-generated
2466 * events. It is not interruptible.
2467 */
2468 __private_extern__ vm_object_t
2469 vm_object_copy_delayed(
2470 vm_object_t src_object,
2471 vm_object_offset_t src_offset,
2472 vm_object_size_t size,
2473 boolean_t src_object_shared)
2474 {
2475 vm_object_t new_copy = VM_OBJECT_NULL;
2476 vm_object_t old_copy;
2477 vm_page_t p;
2478 vm_object_size_t copy_size = src_offset + size;
2479
2480
2481 int collisions = 0;
2482 /*
2483 * The user-level memory manager wants to see all of the changes
2484 * to this object, but it has promised not to make any changes on
2485 * its own.
2486 *
2487 * Perform an asymmetric copy-on-write, as follows:
2488 * Create a new object, called a "copy object" to hold
2489 * pages modified by the new mapping (i.e., the copy,
2490 * not the original mapping).
2491 * Record the original object as the backing object for
2492 * the copy object. If the original mapping does not
2493 * change a page, it may be used read-only by the copy.
2494 * Record the copy object in the original object.
2495 * When the original mapping causes a page to be modified,
2496 * it must be copied to a new page that is "pushed" to
2497 * the copy object.
2498 * Mark the new mapping (the copy object) copy-on-write.
2499 * This makes the copy object itself read-only, allowing
2500 * it to be reused if the original mapping makes no
2501 * changes, and simplifying the synchronization required
2502 * in the "push" operation described above.
2503 *
2504 * The copy-on-write is said to be assymetric because the original
2505 * object is *not* marked copy-on-write. A copied page is pushed
2506 * to the copy object, regardless which party attempted to modify
2507 * the page.
2508 *
2509 * Repeated asymmetric copy operations may be done. If the
2510 * original object has not been changed since the last copy, its
2511 * copy object can be reused. Otherwise, a new copy object can be
2512 * inserted between the original object and its previous copy
2513 * object. Since any copy object is read-only, this cannot affect
2514 * affect the contents of the previous copy object.
2515 *
2516 * Note that a copy object is higher in the object tree than the
2517 * original object; therefore, use of the copy object recorded in
2518 * the original object must be done carefully, to avoid deadlock.
2519 */
2520
2521 Retry:
2522
2523 /*
2524 * Wait for paging in progress.
2525 */
2526 if (!src_object->true_share && src_object->paging_in_progress) {
2527 if (src_object_shared == TRUE) {
2528 vm_object_unlock(src_object);
2529
2530 vm_object_lock(src_object);
2531 src_object_shared = FALSE;
2532 }
2533 vm_object_paging_wait(src_object, THREAD_UNINT);
2534 }
2535 /*
2536 * See whether we can reuse the result of a previous
2537 * copy operation.
2538 */
2539
2540 old_copy = src_object->copy;
2541 if (old_copy != VM_OBJECT_NULL) {
2542 int lock_granted;
2543
2544 /*
2545 * Try to get the locks (out of order)
2546 */
2547 if (src_object_shared == TRUE)
2548 lock_granted = vm_object_lock_try_shared(old_copy);
2549 else
2550 lock_granted = vm_object_lock_try(old_copy);
2551
2552 if (!lock_granted) {
2553 vm_object_unlock(src_object);
2554
2555 if (collisions++ == 0)
2556 copy_delayed_lock_contention++;
2557 mutex_pause(collisions);
2558
2559 /* Heisenberg Rules */
2560 copy_delayed_lock_collisions++;
2561
2562 if (collisions > copy_delayed_max_collisions)
2563 copy_delayed_max_collisions = collisions;
2564
2565 if (src_object_shared == TRUE)
2566 vm_object_lock_shared(src_object);
2567 else
2568 vm_object_lock(src_object);
2569
2570 goto Retry;
2571 }
2572
2573 /*
2574 * Determine whether the old copy object has
2575 * been modified.
2576 */
2577
2578 if (old_copy->resident_page_count == 0 &&
2579 !old_copy->pager_created) {
2580 /*
2581 * It has not been modified.
2582 *
2583 * Return another reference to
2584 * the existing copy-object if
2585 * we can safely grow it (if
2586 * needed).
2587 */
2588
2589 if (old_copy->size < copy_size) {
2590 if (src_object_shared == TRUE) {
2591 vm_object_unlock(old_copy);
2592 vm_object_unlock(src_object);
2593
2594 vm_object_lock(src_object);
2595 src_object_shared = FALSE;
2596 goto Retry;
2597 }
2598 /*
2599 * We can't perform a delayed copy if any of the
2600 * pages in the extended range are wired (because
2601 * we can't safely take write permission away from
2602 * wired pages). If the pages aren't wired, then
2603 * go ahead and protect them.
2604 */
2605 copy_delayed_protect_iterate++;
2606
2607 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2608 if (!p->fictitious &&
2609 p->offset >= old_copy->size &&
2610 p->offset < copy_size) {
2611 if (p->wire_count > 0) {
2612 vm_object_unlock(old_copy);
2613 vm_object_unlock(src_object);
2614
2615 if (new_copy != VM_OBJECT_NULL) {
2616 vm_object_unlock(new_copy);
2617 vm_object_deallocate(new_copy);
2618 }
2619
2620 return VM_OBJECT_NULL;
2621 } else {
2622 pmap_page_protect(p->phys_page,
2623 (VM_PROT_ALL & ~VM_PROT_WRITE));
2624 }
2625 }
2626 }
2627 old_copy->size = copy_size;
2628 }
2629 if (src_object_shared == TRUE)
2630 vm_object_reference_shared(old_copy);
2631 else
2632 vm_object_reference_locked(old_copy);
2633 vm_object_unlock(old_copy);
2634 vm_object_unlock(src_object);
2635
2636 if (new_copy != VM_OBJECT_NULL) {
2637 vm_object_unlock(new_copy);
2638 vm_object_deallocate(new_copy);
2639 }
2640 return(old_copy);
2641 }
2642
2643
2644
2645 /*
2646 * Adjust the size argument so that the newly-created
2647 * copy object will be large enough to back either the
2648 * old copy object or the new mapping.
2649 */
2650 if (old_copy->size > copy_size)
2651 copy_size = old_copy->size;
2652
2653 if (new_copy == VM_OBJECT_NULL) {
2654 vm_object_unlock(old_copy);
2655 vm_object_unlock(src_object);
2656 new_copy = vm_object_allocate(copy_size);
2657 vm_object_lock(src_object);
2658 vm_object_lock(new_copy);
2659
2660 src_object_shared = FALSE;
2661 goto Retry;
2662 }
2663 new_copy->size = copy_size;
2664
2665 /*
2666 * The copy-object is always made large enough to
2667 * completely shadow the original object, since
2668 * it may have several users who want to shadow
2669 * the original object at different points.
2670 */
2671
2672 assert((old_copy->shadow == src_object) &&
2673 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2674
2675 } else if (new_copy == VM_OBJECT_NULL) {
2676 vm_object_unlock(src_object);
2677 new_copy = vm_object_allocate(copy_size);
2678 vm_object_lock(src_object);
2679 vm_object_lock(new_copy);
2680
2681 src_object_shared = FALSE;
2682 goto Retry;
2683 }
2684
2685 /*
2686 * We now have the src object locked, and the new copy object
2687 * allocated and locked (and potentially the old copy locked).
2688 * Before we go any further, make sure we can still perform
2689 * a delayed copy, as the situation may have changed.
2690 *
2691 * Specifically, we can't perform a delayed copy if any of the
2692 * pages in the range are wired (because we can't safely take
2693 * write permission away from wired pages). If the pages aren't
2694 * wired, then go ahead and protect them.
2695 */
2696 copy_delayed_protect_iterate++;
2697
2698 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2699 if (!p->fictitious && p->offset < copy_size) {
2700 if (p->wire_count > 0) {
2701 if (old_copy)
2702 vm_object_unlock(old_copy);
2703 vm_object_unlock(src_object);
2704 vm_object_unlock(new_copy);
2705 vm_object_deallocate(new_copy);
2706 return VM_OBJECT_NULL;
2707 } else {
2708 pmap_page_protect(p->phys_page,
2709 (VM_PROT_ALL & ~VM_PROT_WRITE));
2710 }
2711 }
2712 }
2713 if (old_copy != VM_OBJECT_NULL) {
2714 /*
2715 * Make the old copy-object shadow the new one.
2716 * It will receive no more pages from the original
2717 * object.
2718 */
2719
2720 /* remove ref. from old_copy */
2721 vm_object_lock_assert_exclusive(src_object);
2722 src_object->ref_count--;
2723 assert(src_object->ref_count > 0);
2724 vm_object_lock_assert_exclusive(old_copy);
2725 old_copy->shadow = new_copy;
2726 vm_object_lock_assert_exclusive(new_copy);
2727 assert(new_copy->ref_count > 0);
2728 new_copy->ref_count++; /* for old_copy->shadow ref. */
2729
2730 #if TASK_SWAPPER
2731 if (old_copy->res_count) {
2732 VM_OBJ_RES_INCR(new_copy);
2733 VM_OBJ_RES_DECR(src_object);
2734 }
2735 #endif
2736
2737 vm_object_unlock(old_copy); /* done with old_copy */
2738 }
2739
2740 /*
2741 * Point the new copy at the existing object.
2742 */
2743 vm_object_lock_assert_exclusive(new_copy);
2744 new_copy->shadow = src_object;
2745 new_copy->shadow_offset = 0;
2746 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2747
2748 vm_object_lock_assert_exclusive(src_object);
2749 vm_object_reference_locked(src_object);
2750 src_object->copy = new_copy;
2751 vm_object_unlock(src_object);
2752 vm_object_unlock(new_copy);
2753
2754 XPR(XPR_VM_OBJECT,
2755 "vm_object_copy_delayed: used copy object %X for source %X\n",
2756 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2757
2758 return new_copy;
2759 }
2760
2761 /*
2762 * Routine: vm_object_copy_strategically
2763 *
2764 * Purpose:
2765 * Perform a copy according to the source object's
2766 * declared strategy. This operation may block,
2767 * and may be interrupted.
2768 */
2769 __private_extern__ kern_return_t
2770 vm_object_copy_strategically(
2771 register vm_object_t src_object,
2772 vm_object_offset_t src_offset,
2773 vm_object_size_t size,
2774 vm_object_t *dst_object, /* OUT */
2775 vm_object_offset_t *dst_offset, /* OUT */
2776 boolean_t *dst_needs_copy) /* OUT */
2777 {
2778 boolean_t result;
2779 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2780 boolean_t object_lock_shared = FALSE;
2781 memory_object_copy_strategy_t copy_strategy;
2782
2783 assert(src_object != VM_OBJECT_NULL);
2784
2785 copy_strategy = src_object->copy_strategy;
2786
2787 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
2788 vm_object_lock_shared(src_object);
2789 object_lock_shared = TRUE;
2790 } else
2791 vm_object_lock(src_object);
2792
2793 /*
2794 * The copy strategy is only valid if the memory manager
2795 * is "ready". Internal objects are always ready.
2796 */
2797
2798 while (!src_object->internal && !src_object->pager_ready) {
2799 wait_result_t wait_result;
2800
2801 if (object_lock_shared == TRUE) {
2802 vm_object_unlock(src_object);
2803 vm_object_lock(src_object);
2804 object_lock_shared = FALSE;
2805 continue;
2806 }
2807 wait_result = vm_object_sleep( src_object,
2808 VM_OBJECT_EVENT_PAGER_READY,
2809 interruptible);
2810 if (wait_result != THREAD_AWAKENED) {
2811 vm_object_unlock(src_object);
2812 *dst_object = VM_OBJECT_NULL;
2813 *dst_offset = 0;
2814 *dst_needs_copy = FALSE;
2815 return(MACH_SEND_INTERRUPTED);
2816 }
2817 }
2818
2819 /*
2820 * Use the appropriate copy strategy.
2821 */
2822
2823 switch (copy_strategy) {
2824 case MEMORY_OBJECT_COPY_DELAY:
2825 *dst_object = vm_object_copy_delayed(src_object,
2826 src_offset, size, object_lock_shared);
2827 if (*dst_object != VM_OBJECT_NULL) {
2828 *dst_offset = src_offset;
2829 *dst_needs_copy = TRUE;
2830 result = KERN_SUCCESS;
2831 break;
2832 }
2833 vm_object_lock(src_object);
2834 /* fall thru when delayed copy not allowed */
2835
2836 case MEMORY_OBJECT_COPY_NONE:
2837 result = vm_object_copy_slowly(src_object, src_offset, size,
2838 interruptible, dst_object);
2839 if (result == KERN_SUCCESS) {
2840 *dst_offset = 0;
2841 *dst_needs_copy = FALSE;
2842 }
2843 break;
2844
2845 case MEMORY_OBJECT_COPY_CALL:
2846 result = vm_object_copy_call(src_object, src_offset, size,
2847 dst_object);
2848 if (result == KERN_SUCCESS) {
2849 *dst_offset = src_offset;
2850 *dst_needs_copy = TRUE;
2851 }
2852 break;
2853
2854 case MEMORY_OBJECT_COPY_SYMMETRIC:
2855 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2856 vm_object_unlock(src_object);
2857 result = KERN_MEMORY_RESTART_COPY;
2858 break;
2859
2860 default:
2861 panic("copy_strategically: bad strategy");
2862 result = KERN_INVALID_ARGUMENT;
2863 }
2864 return(result);
2865 }
2866
2867 /*
2868 * vm_object_shadow:
2869 *
2870 * Create a new object which is backed by the
2871 * specified existing object range. The source
2872 * object reference is deallocated.
2873 *
2874 * The new object and offset into that object
2875 * are returned in the source parameters.
2876 */
2877 boolean_t vm_object_shadow_check = FALSE;
2878
2879 __private_extern__ boolean_t
2880 vm_object_shadow(
2881 vm_object_t *object, /* IN/OUT */
2882 vm_object_offset_t *offset, /* IN/OUT */
2883 vm_object_size_t length)
2884 {
2885 register vm_object_t source;
2886 register vm_object_t result;
2887
2888 source = *object;
2889 #if 0
2890 /*
2891 * XXX FBDP
2892 * This assertion is valid but it gets triggered by Rosetta for example
2893 * due to a combination of vm_remap() that changes a VM object's
2894 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
2895 * that then sets "needs_copy" on its map entry. This creates a
2896 * mapping situation that VM should never see and doesn't know how to
2897 * handle.
2898 * It's not clear if this can create any real problem but we should
2899 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
2900 * do more than just set "needs_copy" to handle the copy-on-write...
2901 * In the meantime, let's disable the assertion.
2902 */
2903 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2904 #endif
2905
2906 /*
2907 * Determine if we really need a shadow.
2908 */
2909
2910 if (vm_object_shadow_check && source->ref_count == 1 &&
2911 (source->shadow == VM_OBJECT_NULL ||
2912 source->shadow->copy == VM_OBJECT_NULL))
2913 {
2914 source->shadowed = FALSE;
2915 return FALSE;
2916 }
2917
2918 /*
2919 * Allocate a new object with the given length
2920 */
2921
2922 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2923 panic("vm_object_shadow: no object for shadowing");
2924
2925 /*
2926 * The new object shadows the source object, adding
2927 * a reference to it. Our caller changes his reference
2928 * to point to the new object, removing a reference to
2929 * the source object. Net result: no change of reference
2930 * count.
2931 */
2932 result->shadow = source;
2933
2934 /*
2935 * Store the offset into the source object,
2936 * and fix up the offset into the new object.
2937 */
2938
2939 result->shadow_offset = *offset;
2940
2941 /*
2942 * Return the new things
2943 */
2944
2945 *offset = 0;
2946 *object = result;
2947 return TRUE;
2948 }
2949
2950 /*
2951 * The relationship between vm_object structures and
2952 * the memory_object requires careful synchronization.
2953 *
2954 * All associations are created by memory_object_create_named
2955 * for external pagers and vm_object_pager_create for internal
2956 * objects as follows:
2957 *
2958 * pager: the memory_object itself, supplied by
2959 * the user requesting a mapping (or the kernel,
2960 * when initializing internal objects); the
2961 * kernel simulates holding send rights by keeping
2962 * a port reference;
2963 *
2964 * pager_request:
2965 * the memory object control port,
2966 * created by the kernel; the kernel holds
2967 * receive (and ownership) rights to this
2968 * port, but no other references.
2969 *
2970 * When initialization is complete, the "initialized" field
2971 * is asserted. Other mappings using a particular memory object,
2972 * and any references to the vm_object gained through the
2973 * port association must wait for this initialization to occur.
2974 *
2975 * In order to allow the memory manager to set attributes before
2976 * requests (notably virtual copy operations, but also data or
2977 * unlock requests) are made, a "ready" attribute is made available.
2978 * Only the memory manager may affect the value of this attribute.
2979 * Its value does not affect critical kernel functions, such as
2980 * internal object initialization or destruction. [Furthermore,
2981 * memory objects created by the kernel are assumed to be ready
2982 * immediately; the default memory manager need not explicitly
2983 * set the "ready" attribute.]
2984 *
2985 * [Both the "initialized" and "ready" attribute wait conditions
2986 * use the "pager" field as the wait event.]
2987 *
2988 * The port associations can be broken down by any of the
2989 * following routines:
2990 * vm_object_terminate:
2991 * No references to the vm_object remain, and
2992 * the object cannot (or will not) be cached.
2993 * This is the normal case, and is done even
2994 * though one of the other cases has already been
2995 * done.
2996 * memory_object_destroy:
2997 * The memory manager has requested that the
2998 * kernel relinquish references to the memory
2999 * object. [The memory manager may not want to
3000 * destroy the memory object, but may wish to
3001 * refuse or tear down existing memory mappings.]
3002 *
3003 * Each routine that breaks an association must break all of
3004 * them at once. At some later time, that routine must clear
3005 * the pager field and release the memory object references.
3006 * [Furthermore, each routine must cope with the simultaneous
3007 * or previous operations of the others.]
3008 *
3009 * In addition to the lock on the object, the vm_object_cache_lock
3010 * governs the associations. References gained through the
3011 * association require use of the cache lock.
3012 *
3013 * Because the pager field may be cleared spontaneously, it
3014 * cannot be used to determine whether a memory object has
3015 * ever been associated with a particular vm_object. [This
3016 * knowledge is important to the shadow object mechanism.]
3017 * For this reason, an additional "created" attribute is
3018 * provided.
3019 *
3020 * During various paging operations, the pager reference found in the
3021 * vm_object must be valid. To prevent this from being released,
3022 * (other than being removed, i.e., made null), routines may use
3023 * the vm_object_paging_begin/end routines [actually, macros].
3024 * The implementation uses the "paging_in_progress" and "wanted" fields.
3025 * [Operations that alter the validity of the pager values include the
3026 * termination routines and vm_object_collapse.]
3027 */
3028
3029
3030 /*
3031 * Routine: vm_object_enter
3032 * Purpose:
3033 * Find a VM object corresponding to the given
3034 * pager; if no such object exists, create one,
3035 * and initialize the pager.
3036 */
3037 vm_object_t
3038 vm_object_enter(
3039 memory_object_t pager,
3040 vm_object_size_t size,
3041 boolean_t internal,
3042 boolean_t init,
3043 boolean_t named)
3044 {
3045 register vm_object_t object;
3046 vm_object_t new_object;
3047 boolean_t must_init;
3048 vm_object_hash_entry_t entry, new_entry;
3049 uint32_t try_failed_count = 0;
3050
3051 if (pager == MEMORY_OBJECT_NULL)
3052 return(vm_object_allocate(size));
3053
3054 new_object = VM_OBJECT_NULL;
3055 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3056 must_init = init;
3057
3058 /*
3059 * Look for an object associated with this port.
3060 */
3061 Retry:
3062 vm_object_cache_lock();
3063 do {
3064 entry = vm_object_hash_lookup(pager, FALSE);
3065
3066 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
3067 if (new_object == VM_OBJECT_NULL) {
3068 /*
3069 * We must unlock to create a new object;
3070 * if we do so, we must try the lookup again.
3071 */
3072 vm_object_cache_unlock();
3073 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
3074 new_entry = vm_object_hash_entry_alloc(pager);
3075 new_object = vm_object_allocate(size);
3076 vm_object_cache_lock();
3077 } else {
3078 /*
3079 * Lookup failed twice, and we have something
3080 * to insert; set the object.
3081 */
3082 vm_object_hash_insert(new_entry);
3083 entry = new_entry;
3084 entry->object = new_object;
3085 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3086 new_object = VM_OBJECT_NULL;
3087 must_init = TRUE;
3088 }
3089 } else if (entry->object == VM_OBJECT_NULL) {
3090 /*
3091 * If a previous object is being terminated,
3092 * we must wait for the termination message
3093 * to be queued (and lookup the entry again).
3094 */
3095 entry->waiting = TRUE;
3096 entry = VM_OBJECT_HASH_ENTRY_NULL;
3097 assert_wait((event_t) pager, THREAD_UNINT);
3098 vm_object_cache_unlock();
3099 thread_block(THREAD_CONTINUE_NULL);
3100 vm_object_cache_lock();
3101 }
3102 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
3103
3104 object = entry->object;
3105 assert(object != VM_OBJECT_NULL);
3106
3107 if (!must_init) {
3108 if (!vm_object_lock_try(object)) {
3109
3110 vm_object_cache_unlock();
3111
3112 try_failed_count++;
3113 mutex_pause(try_failed_count); /* wait a bit */
3114
3115 goto Retry;
3116 }
3117 assert(!internal || object->internal);
3118 if (named) {
3119 assert(!object->named);
3120 object->named = TRUE;
3121 }
3122 if (object->ref_count == 0) {
3123 XPR(XPR_VM_OBJECT_CACHE,
3124 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
3125 (integer_t)object,
3126 (integer_t)vm_object_cached_list.next,
3127 (integer_t)vm_object_cached_list.prev, 0,0);
3128 queue_remove(&vm_object_cached_list, object,
3129 vm_object_t, cached_list);
3130 vm_object_cached_count--;
3131 }
3132 vm_object_lock_assert_exclusive(object);
3133 object->ref_count++;
3134 vm_object_res_reference(object);
3135 vm_object_unlock(object);
3136
3137 VM_STAT_INCR(hits);
3138 }
3139 assert(object->ref_count > 0);
3140
3141 VM_STAT_INCR(lookups);
3142
3143 vm_object_cache_unlock();
3144
3145 XPR(XPR_VM_OBJECT,
3146 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
3147 (integer_t)pager, (integer_t)object, must_init, 0, 0);
3148
3149 /*
3150 * If we raced to create a vm_object but lost, let's
3151 * throw away ours.
3152 */
3153
3154 if (new_object != VM_OBJECT_NULL)
3155 vm_object_deallocate(new_object);
3156
3157 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
3158 vm_object_hash_entry_free(new_entry);
3159
3160 if (must_init) {
3161 memory_object_control_t control;
3162
3163 /*
3164 * Allocate request port.
3165 */
3166
3167 control = memory_object_control_allocate(object);
3168 assert (control != MEMORY_OBJECT_CONTROL_NULL);
3169
3170 vm_object_lock(object);
3171 assert(object != kernel_object);
3172
3173 /*
3174 * Copy the reference we were given.
3175 */
3176
3177 memory_object_reference(pager);
3178 object->pager_created = TRUE;
3179 object->pager = pager;
3180 object->internal = internal;
3181 object->pager_trusted = internal;
3182 if (!internal) {
3183 /* copy strategy invalid until set by memory manager */
3184 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
3185 }
3186 object->pager_control = control;
3187 object->pager_ready = FALSE;
3188
3189 vm_object_unlock(object);
3190
3191 /*
3192 * Let the pager know we're using it.
3193 */
3194
3195 (void) memory_object_init(pager,
3196 object->pager_control,
3197 PAGE_SIZE);
3198
3199 vm_object_lock(object);
3200 if (named)
3201 object->named = TRUE;
3202 if (internal) {
3203 object->pager_ready = TRUE;
3204 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
3205 }
3206
3207 object->pager_initialized = TRUE;
3208 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
3209 } else {
3210 vm_object_lock(object);
3211 }
3212
3213 /*
3214 * [At this point, the object must be locked]
3215 */
3216
3217 /*
3218 * Wait for the work above to be done by the first
3219 * thread to map this object.
3220 */
3221
3222 while (!object->pager_initialized) {
3223 vm_object_sleep(object,
3224 VM_OBJECT_EVENT_INITIALIZED,
3225 THREAD_UNINT);
3226 }
3227 vm_object_unlock(object);
3228
3229 XPR(XPR_VM_OBJECT,
3230 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
3231 (integer_t)object, (integer_t)object->pager, internal, 0,0);
3232 return(object);
3233 }
3234
3235 /*
3236 * Routine: vm_object_pager_create
3237 * Purpose:
3238 * Create a memory object for an internal object.
3239 * In/out conditions:
3240 * The object is locked on entry and exit;
3241 * it may be unlocked within this call.
3242 * Limitations:
3243 * Only one thread may be performing a
3244 * vm_object_pager_create on an object at
3245 * a time. Presumably, only the pageout
3246 * daemon will be using this routine.
3247 */
3248
3249 void
3250 vm_object_pager_create(
3251 register vm_object_t object)
3252 {
3253 memory_object_t pager;
3254 vm_object_hash_entry_t entry;
3255 #if MACH_PAGEMAP
3256 vm_object_size_t size;
3257 vm_external_map_t map;
3258 #endif /* MACH_PAGEMAP */
3259
3260 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
3261 (integer_t)object, 0,0,0,0);
3262
3263 assert(object != kernel_object);
3264
3265 if (memory_manager_default_check() != KERN_SUCCESS)
3266 return;
3267
3268 /*
3269 * Prevent collapse or termination by holding a paging reference
3270 */
3271
3272 vm_object_paging_begin(object);
3273 if (object->pager_created) {
3274 /*
3275 * Someone else got to it first...
3276 * wait for them to finish initializing the ports
3277 */
3278 while (!object->pager_initialized) {
3279 vm_object_sleep(object,
3280 VM_OBJECT_EVENT_INITIALIZED,
3281 THREAD_UNINT);
3282 }
3283 vm_object_paging_end(object);
3284 return;
3285 }
3286
3287 /*
3288 * Indicate that a memory object has been assigned
3289 * before dropping the lock, to prevent a race.
3290 */
3291
3292 object->pager_created = TRUE;
3293 object->paging_offset = 0;
3294
3295 #if MACH_PAGEMAP
3296 size = object->size;
3297 #endif /* MACH_PAGEMAP */
3298 vm_object_unlock(object);
3299
3300 #if MACH_PAGEMAP
3301 map = vm_external_create(size);
3302 vm_object_lock(object);
3303 assert(object->size == size);
3304 object->existence_map = map;
3305 vm_object_unlock(object);
3306 #endif /* MACH_PAGEMAP */
3307
3308 /*
3309 * Create the [internal] pager, and associate it with this object.
3310 *
3311 * We make the association here so that vm_object_enter()
3312 * can look up the object to complete initializing it. No
3313 * user will ever map this object.
3314 */
3315 {
3316 memory_object_default_t dmm;
3317
3318 /* acquire a reference for the default memory manager */
3319 dmm = memory_manager_default_reference();
3320
3321 assert(object->temporary);
3322
3323 /* create our new memory object */
3324 (void) memory_object_create(dmm, object->size, &pager);
3325
3326 memory_object_default_deallocate(dmm);
3327 }
3328
3329 entry = vm_object_hash_entry_alloc(pager);
3330
3331 vm_object_cache_lock();
3332 vm_object_hash_insert(entry);
3333
3334 entry->object = object;
3335 vm_object_cache_unlock();
3336
3337 /*
3338 * A reference was returned by
3339 * memory_object_create(), and it is
3340 * copied by vm_object_enter().
3341 */
3342
3343 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
3344 panic("vm_object_pager_create: mismatch");
3345
3346 /*
3347 * Drop the reference we were passed.
3348 */
3349 memory_object_deallocate(pager);
3350
3351 vm_object_lock(object);
3352
3353 /*
3354 * Release the paging reference
3355 */
3356 vm_object_paging_end(object);
3357 }
3358
3359 /*
3360 * Routine: vm_object_remove
3361 * Purpose:
3362 * Eliminate the pager/object association
3363 * for this pager.
3364 * Conditions:
3365 * The object cache must be locked.
3366 */
3367 __private_extern__ void
3368 vm_object_remove(
3369 vm_object_t object)
3370 {
3371 memory_object_t pager;
3372
3373 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
3374 vm_object_hash_entry_t entry;
3375
3376 entry = vm_object_hash_lookup(pager, FALSE);
3377 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
3378 entry->object = VM_OBJECT_NULL;
3379 }
3380
3381 }
3382
3383 /*
3384 * Global variables for vm_object_collapse():
3385 *
3386 * Counts for normal collapses and bypasses.
3387 * Debugging variables, to watch or disable collapse.
3388 */
3389 static long object_collapses = 0;
3390 static long object_bypasses = 0;
3391
3392 static boolean_t vm_object_collapse_allowed = TRUE;
3393 static boolean_t vm_object_bypass_allowed = TRUE;
3394
3395 #if MACH_PAGEMAP
3396 static int vm_external_discarded;
3397 static int vm_external_collapsed;
3398 #endif
3399
3400 unsigned long vm_object_collapse_encrypted = 0;
3401
3402 /*
3403 * Routine: vm_object_do_collapse
3404 * Purpose:
3405 * Collapse an object with the object backing it.
3406 * Pages in the backing object are moved into the
3407 * parent, and the backing object is deallocated.
3408 * Conditions:
3409 * Both objects and the cache are locked; the page
3410 * queues are unlocked.
3411 *
3412 */
3413 static void
3414 vm_object_do_collapse(
3415 vm_object_t object,
3416 vm_object_t backing_object)
3417 {
3418 vm_page_t p, pp;
3419 vm_object_offset_t new_offset, backing_offset;
3420 vm_object_size_t size;
3421
3422 backing_offset = object->shadow_offset;
3423 size = object->size;
3424
3425 /*
3426 * Move all in-memory pages from backing_object
3427 * to the parent. Pages that have been paged out
3428 * will be overwritten by any of the parent's
3429 * pages that shadow them.
3430 */
3431
3432 while (!queue_empty(&backing_object->memq)) {
3433
3434 p = (vm_page_t) queue_first(&backing_object->memq);
3435
3436 new_offset = (p->offset - backing_offset);
3437
3438 assert(!p->busy || p->absent);
3439
3440 /*
3441 * If the parent has a page here, or if
3442 * this page falls outside the parent,
3443 * dispose of it.
3444 *
3445 * Otherwise, move it as planned.
3446 */
3447
3448 if (p->offset < backing_offset || new_offset >= size) {
3449 VM_PAGE_FREE(p);
3450 } else {
3451 /*
3452 * ENCRYPTED SWAP:
3453 * The encryption key includes the "pager" and the
3454 * "paging_offset". These will not change during the
3455 * object collapse, so we can just move an encrypted
3456 * page from one object to the other in this case.
3457 * We can't decrypt the page here, since we can't drop
3458 * the object lock.
3459 */
3460 if (p->encrypted) {
3461 vm_object_collapse_encrypted++;
3462 }
3463 pp = vm_page_lookup(object, new_offset);
3464 if (pp == VM_PAGE_NULL) {
3465
3466 /*
3467 * Parent now has no page.
3468 * Move the backing object's page up.
3469 */
3470
3471 vm_page_rename(p, object, new_offset, TRUE);
3472 #if MACH_PAGEMAP
3473 } else if (pp->absent) {
3474
3475 /*
3476 * Parent has an absent page...
3477 * it's not being paged in, so
3478 * it must really be missing from
3479 * the parent.
3480 *
3481 * Throw out the absent page...
3482 * any faults looking for that
3483 * page will restart with the new
3484 * one.
3485 */
3486
3487 VM_PAGE_FREE(pp);
3488 vm_page_rename(p, object, new_offset, TRUE);
3489 #endif /* MACH_PAGEMAP */
3490 } else {
3491 assert(! pp->absent);
3492
3493 /*
3494 * Parent object has a real page.
3495 * Throw away the backing object's
3496 * page.
3497 */
3498 VM_PAGE_FREE(p);
3499 }
3500 }
3501 }
3502
3503 #if !MACH_PAGEMAP
3504 assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL))
3505 || (!backing_object->pager_created
3506 && (backing_object->pager == MEMORY_OBJECT_NULL)));
3507 #else
3508 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3509 #endif /* !MACH_PAGEMAP */
3510
3511 if (backing_object->pager != MEMORY_OBJECT_NULL) {
3512 vm_object_hash_entry_t entry;
3513
3514 /*
3515 * Move the pager from backing_object to object.
3516 *
3517 * XXX We're only using part of the paging space
3518 * for keeps now... we ought to discard the
3519 * unused portion.
3520 */
3521
3522 assert(!object->paging_in_progress);
3523 object->pager = backing_object->pager;
3524 entry = vm_object_hash_lookup(object->pager, FALSE);
3525 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3526 entry->object = object;
3527 object->pager_created = backing_object->pager_created;
3528 object->pager_control = backing_object->pager_control;
3529 object->pager_ready = backing_object->pager_ready;
3530 object->pager_initialized = backing_object->pager_initialized;
3531 object->paging_offset =
3532 backing_object->paging_offset + backing_offset;
3533 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
3534 memory_object_control_collapse(object->pager_control,
3535 object);
3536 }
3537 }
3538
3539 vm_object_cache_unlock();
3540
3541 #if MACH_PAGEMAP
3542 /*
3543 * If the shadow offset is 0, the use the existence map from
3544 * the backing object if there is one. If the shadow offset is
3545 * not zero, toss it.
3546 *
3547 * XXX - If the shadow offset is not 0 then a bit copy is needed
3548 * if the map is to be salvaged. For now, we just just toss the
3549 * old map, giving the collapsed object no map. This means that
3550 * the pager is invoked for zero fill pages. If analysis shows
3551 * that this happens frequently and is a performance hit, then
3552 * this code should be fixed to salvage the map.
3553 */
3554 assert(object->existence_map == VM_EXTERNAL_NULL);
3555 if (backing_offset || (size != backing_object->size)) {
3556 vm_external_discarded++;
3557 vm_external_destroy(backing_object->existence_map,
3558 backing_object->size);
3559 }
3560 else {
3561 vm_external_collapsed++;
3562 object->existence_map = backing_object->existence_map;
3563 }
3564 backing_object->existence_map = VM_EXTERNAL_NULL;
3565 #endif /* MACH_PAGEMAP */
3566
3567 /*
3568 * Object now shadows whatever backing_object did.
3569 * Note that the reference to backing_object->shadow
3570 * moves from within backing_object to within object.
3571 */
3572
3573 assert(!object->phys_contiguous);
3574 assert(!backing_object->phys_contiguous);
3575 object->shadow = backing_object->shadow;
3576 if (object->shadow) {
3577 object->shadow_offset += backing_object->shadow_offset;
3578 } else {
3579 /* no shadow, therefore no shadow offset... */
3580 object->shadow_offset = 0;
3581 }
3582 assert((object->shadow == VM_OBJECT_NULL) ||
3583 (object->shadow->copy != backing_object));
3584
3585 /*
3586 * Discard backing_object.
3587 *
3588 * Since the backing object has no pages, no
3589 * pager left, and no object references within it,
3590 * all that is necessary is to dispose of it.
3591 */
3592
3593 assert((backing_object->ref_count == 1) &&
3594 (backing_object->resident_page_count == 0) &&
3595 (backing_object->paging_in_progress == 0));
3596
3597 backing_object->alive = FALSE;
3598 vm_object_unlock(backing_object);
3599
3600 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3601 (integer_t)backing_object, 0,0,0,0);
3602
3603 vm_object_lock_destroy(backing_object);
3604
3605 zfree(vm_object_zone, backing_object);
3606
3607 object_collapses++;
3608 }
3609
3610 static void
3611 vm_object_do_bypass(
3612 vm_object_t object,
3613 vm_object_t backing_object)
3614 {
3615 /*
3616 * Make the parent shadow the next object
3617 * in the chain.
3618 */
3619
3620 vm_object_lock_assert_exclusive(backing_object);
3621
3622 #if TASK_SWAPPER
3623 /*
3624 * Do object reference in-line to
3625 * conditionally increment shadow's
3626 * residence count. If object is not
3627 * resident, leave residence count
3628 * on shadow alone.
3629 */
3630 if (backing_object->shadow != VM_OBJECT_NULL) {
3631 vm_object_lock(backing_object->shadow);
3632 vm_object_lock_assert_exclusive(backing_object->shadow);
3633 backing_object->shadow->ref_count++;
3634 if (object->res_count != 0)
3635 vm_object_res_reference(backing_object->shadow);
3636 vm_object_unlock(backing_object->shadow);
3637 }
3638 #else /* TASK_SWAPPER */
3639 vm_object_reference(backing_object->shadow);
3640 #endif /* TASK_SWAPPER */
3641
3642 assert(!object->phys_contiguous);
3643 assert(!backing_object->phys_contiguous);
3644 object->shadow = backing_object->shadow;
3645 if (object->shadow) {
3646 object->shadow_offset += backing_object->shadow_offset;
3647 } else {
3648 /* no shadow, therefore no shadow offset... */
3649 object->shadow_offset = 0;
3650 }
3651
3652 /*
3653 * Backing object might have had a copy pointer
3654 * to us. If it did, clear it.
3655 */
3656 if (backing_object->copy == object) {
3657 backing_object->copy = VM_OBJECT_NULL;
3658 }
3659
3660 /*
3661 * Drop the reference count on backing_object.
3662 #if TASK_SWAPPER
3663 * Since its ref_count was at least 2, it
3664 * will not vanish; so we don't need to call
3665 * vm_object_deallocate.
3666 * [FBDP: that doesn't seem to be true any more]
3667 *
3668 * The res_count on the backing object is
3669 * conditionally decremented. It's possible
3670 * (via vm_pageout_scan) to get here with
3671 * a "swapped" object, which has a 0 res_count,
3672 * in which case, the backing object res_count
3673 * is already down by one.
3674 #else
3675 * Don't call vm_object_deallocate unless
3676 * ref_count drops to zero.
3677 *
3678 * The ref_count can drop to zero here if the
3679 * backing object could be bypassed but not
3680 * collapsed, such as when the backing object
3681 * is temporary and cachable.
3682 #endif
3683 */
3684 if (backing_object->ref_count > 1) {
3685 vm_object_lock_assert_exclusive(backing_object);
3686 backing_object->ref_count--;
3687 #if TASK_SWAPPER
3688 if (object->res_count != 0)
3689 vm_object_res_deallocate(backing_object);
3690 assert(backing_object->ref_count > 0);
3691 #endif /* TASK_SWAPPER */
3692 vm_object_unlock(backing_object);
3693 } else {
3694
3695 /*
3696 * Drop locks so that we can deallocate
3697 * the backing object.
3698 */
3699
3700 #if TASK_SWAPPER
3701 if (object->res_count == 0) {
3702 /* XXX get a reference for the deallocate below */
3703 vm_object_res_reference(backing_object);
3704 }
3705 #endif /* TASK_SWAPPER */
3706 vm_object_unlock(object);
3707 vm_object_unlock(backing_object);
3708 vm_object_deallocate(backing_object);
3709
3710 /*
3711 * Relock object. We don't have to reverify
3712 * its state since vm_object_collapse will
3713 * do that for us as it starts at the
3714 * top of its loop.
3715 */
3716
3717 vm_object_lock(object);
3718 }
3719
3720 object_bypasses++;
3721 }
3722
3723
3724 /*
3725 * vm_object_collapse:
3726 *
3727 * Perform an object collapse or an object bypass if appropriate.
3728 * The real work of collapsing and bypassing is performed in
3729 * the routines vm_object_do_collapse and vm_object_do_bypass.
3730 *
3731 * Requires that the object be locked and the page queues be unlocked.
3732 *
3733 */
3734 static unsigned long vm_object_collapse_calls = 0;
3735 static unsigned long vm_object_collapse_objects = 0;
3736 static unsigned long vm_object_collapse_do_collapse = 0;
3737 static unsigned long vm_object_collapse_do_bypass = 0;
3738 static unsigned long vm_object_collapse_delays = 0;
3739 __private_extern__ void
3740 vm_object_collapse(
3741 register vm_object_t object,
3742 register vm_object_offset_t hint_offset,
3743 boolean_t can_bypass)
3744 {
3745 register vm_object_t backing_object;
3746 register unsigned int rcount;
3747 register unsigned int size;
3748 vm_object_t original_object;
3749
3750 vm_object_collapse_calls++;
3751
3752 if (! vm_object_collapse_allowed &&
3753 ! (can_bypass && vm_object_bypass_allowed)) {
3754 return;
3755 }
3756
3757 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3758 (integer_t)object, 0,0,0,0);
3759
3760 if (object == VM_OBJECT_NULL)
3761 return;
3762
3763 original_object = object;
3764
3765 while (TRUE) {
3766 vm_object_collapse_objects++;
3767 /*
3768 * Verify that the conditions are right for either
3769 * collapse or bypass:
3770 */
3771
3772 /*
3773 * There is a backing object, and
3774 */
3775
3776 backing_object = object->shadow;
3777 if (backing_object == VM_OBJECT_NULL) {
3778 if (object != original_object) {
3779 vm_object_unlock(object);
3780 }
3781 return;
3782 }
3783
3784 /*
3785 * No pages in the object are currently
3786 * being paged out, and
3787 */
3788 if (object->paging_in_progress != 0) {
3789 /* try and collapse the rest of the shadow chain */
3790 vm_object_lock(backing_object);
3791 if (object != original_object) {
3792 vm_object_unlock(object);
3793 }
3794 object = backing_object;
3795 continue;
3796 }
3797
3798 vm_object_lock(backing_object);
3799
3800 /*
3801 * ...
3802 * The backing object is not read_only,
3803 * and no pages in the backing object are
3804 * currently being paged out.
3805 * The backing object is internal.
3806 *
3807 */
3808
3809 if (!backing_object->internal ||
3810 backing_object->paging_in_progress != 0) {
3811 /* try and collapse the rest of the shadow chain */
3812 if (object != original_object) {
3813 vm_object_unlock(object);
3814 }
3815 object = backing_object;
3816 continue;
3817 }
3818
3819 /*
3820 * The backing object can't be a copy-object:
3821 * the shadow_offset for the copy-object must stay
3822 * as 0. Furthermore (for the 'we have all the
3823 * pages' case), if we bypass backing_object and
3824 * just shadow the next object in the chain, old
3825 * pages from that object would then have to be copied
3826 * BOTH into the (former) backing_object and into the
3827 * parent object.
3828 */
3829 if (backing_object->shadow != VM_OBJECT_NULL &&
3830 backing_object->shadow->copy == backing_object) {
3831 /* try and collapse the rest of the shadow chain */
3832 if (object != original_object) {
3833 vm_object_unlock(object);
3834 }
3835 object = backing_object;
3836 continue;
3837 }
3838
3839 /*
3840 * We can now try to either collapse the backing
3841 * object (if the parent is the only reference to
3842 * it) or (perhaps) remove the parent's reference
3843 * to it.
3844 *
3845 * If there is exactly one reference to the backing
3846 * object, we may be able to collapse it into the
3847 * parent.
3848 *
3849 * If MACH_PAGEMAP is defined:
3850 * The parent must not have a pager created for it,
3851 * since collapsing a backing_object dumps new pages
3852 * into the parent that its pager doesn't know about
3853 * (and the collapse code can't merge the existence
3854 * maps).
3855 * Otherwise:
3856 * As long as one of the objects is still not known
3857 * to the pager, we can collapse them.
3858 */
3859 if (backing_object->ref_count == 1 &&
3860 (!object->pager_created
3861 #if !MACH_PAGEMAP
3862 || !backing_object->pager_created
3863 #endif /*!MACH_PAGEMAP */
3864 ) && vm_object_collapse_allowed) {
3865
3866 XPR(XPR_VM_OBJECT,
3867 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3868 (integer_t)backing_object, (integer_t)object,
3869 (integer_t)backing_object->pager,
3870 (integer_t)backing_object->pager_control, 0);
3871
3872 /*
3873 * We need the cache lock for collapsing,
3874 * but we must not deadlock.
3875 */
3876
3877 if (! vm_object_cache_lock_try()) {
3878 if (object != original_object) {
3879 vm_object_unlock(object);
3880 }
3881 vm_object_unlock(backing_object);
3882 return;
3883 }
3884
3885 /*
3886 * Collapse the object with its backing
3887 * object, and try again with the object's
3888 * new backing object.
3889 */
3890
3891 vm_object_do_collapse(object, backing_object);
3892 vm_object_collapse_do_collapse++;
3893 continue;
3894 }
3895
3896 /*
3897 * Collapsing the backing object was not possible
3898 * or permitted, so let's try bypassing it.
3899 */
3900
3901 if (! (can_bypass && vm_object_bypass_allowed)) {
3902 /* try and collapse the rest of the shadow chain */
3903 if (object != original_object) {
3904 vm_object_unlock(object);
3905 }
3906 object = backing_object;
3907 continue;
3908 }
3909
3910
3911 /*
3912 * If the object doesn't have all its pages present,
3913 * we have to make sure no pages in the backing object
3914 * "show through" before bypassing it.
3915 */
3916 size = atop(object->size);
3917 rcount = object->resident_page_count;
3918 if (rcount != size) {
3919 vm_object_offset_t offset;
3920 vm_object_offset_t backing_offset;
3921 unsigned int backing_rcount;
3922 unsigned int lookups = 0;
3923
3924 /*
3925 * If the backing object has a pager but no pagemap,
3926 * then we cannot bypass it, because we don't know
3927 * what pages it has.
3928 */
3929 if (backing_object->pager_created
3930 #if MACH_PAGEMAP
3931 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3932 #endif /* MACH_PAGEMAP */
3933 ) {
3934 /* try and collapse the rest of the shadow chain */
3935 if (object != original_object) {
3936 vm_object_unlock(object);
3937 }
3938 object = backing_object;
3939 continue;
3940 }
3941
3942 /*
3943 * If the object has a pager but no pagemap,
3944 * then we cannot bypass it, because we don't know
3945 * what pages it has.
3946 */
3947 if (object->pager_created
3948 #if MACH_PAGEMAP
3949 && (object->existence_map == VM_EXTERNAL_NULL)
3950 #endif /* MACH_PAGEMAP */
3951 ) {
3952 /* try and collapse the rest of the shadow chain */
3953 if (object != original_object) {
3954 vm_object_unlock(object);
3955 }
3956 object = backing_object;
3957 continue;
3958 }
3959
3960 /*
3961 * If all of the pages in the backing object are
3962 * shadowed by the parent object, the parent
3963 * object no longer has to shadow the backing
3964 * object; it can shadow the next one in the
3965 * chain.
3966 *
3967 * If the backing object has existence info,
3968 * we must check examine its existence info
3969 * as well.
3970 *
3971 */
3972
3973 backing_offset = object->shadow_offset;
3974 backing_rcount = backing_object->resident_page_count;
3975
3976 #if MACH_PAGEMAP
3977 #define EXISTS_IN_OBJECT(obj, off, rc) \
3978 (vm_external_state_get((obj)->existence_map, \
3979 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3980 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3981 #else
3982 #define EXISTS_IN_OBJECT(obj, off, rc) \
3983 (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3984 #endif /* MACH_PAGEMAP */
3985
3986 /*
3987 * Check the hint location first
3988 * (since it is often the quickest way out of here).
3989 */
3990 if (object->cow_hint != ~(vm_offset_t)0)
3991 hint_offset = (vm_object_offset_t)object->cow_hint;
3992 else
3993 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
3994 (hint_offset - 8 * PAGE_SIZE_64) : 0;
3995
3996 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
3997 backing_offset, backing_rcount) &&
3998 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
3999 /* dependency right at the hint */
4000 object->cow_hint = (vm_offset_t)hint_offset;
4001 /* try and collapse the rest of the shadow chain */
4002 if (object != original_object) {
4003 vm_object_unlock(object);
4004 }
4005 object = backing_object;
4006 continue;
4007 }
4008
4009 /*
4010 * If the object's window onto the backing_object
4011 * is large compared to the number of resident
4012 * pages in the backing object, it makes sense to
4013 * walk the backing_object's resident pages first.
4014 *
4015 * NOTE: Pages may be in both the existence map and
4016 * resident. So, we can't permanently decrement
4017 * the rcount here because the second loop may
4018 * find the same pages in the backing object'
4019 * existence map that we found here and we would
4020 * double-decrement the rcount. We also may or
4021 * may not have found the
4022 */
4023 if (backing_rcount &&
4024 #if MACH_PAGEMAP
4025 size > ((backing_object->existence_map) ?
4026 backing_rcount : (backing_rcount >> 1))
4027 #else
4028 size > (backing_rcount >> 1)
4029 #endif /* MACH_PAGEMAP */
4030 ) {
4031 unsigned int rc = rcount;
4032 vm_page_t p;
4033
4034 backing_rcount = backing_object->resident_page_count;
4035 p = (vm_page_t)queue_first(&backing_object->memq);
4036 do {
4037 /* Until we get more than one lookup lock */
4038 if (lookups > 256) {
4039 vm_object_collapse_delays++;
4040 lookups = 0;
4041 mutex_pause(0);
4042 }
4043
4044 offset = (p->offset - backing_offset);
4045 if (offset < object->size &&
4046 offset != hint_offset &&
4047 !EXISTS_IN_OBJECT(object, offset, rc)) {
4048 /* found a dependency */
4049 object->cow_hint = (vm_offset_t)offset;
4050 break;
4051 }
4052 p = (vm_page_t) queue_next(&p->listq);
4053
4054 } while (--backing_rcount);
4055 if (backing_rcount != 0 ) {
4056 /* try and collapse the rest of the shadow chain */
4057 if (object != original_object) {
4058 vm_object_unlock(object);
4059 }
4060 object = backing_object;
4061 continue;
4062 }
4063 }
4064
4065 /*
4066 * Walk through the offsets looking for pages in the
4067 * backing object that show through to the object.
4068 */
4069 #if MACH_PAGEMAP
4070 if (backing_rcount || backing_object->existence_map) {
4071 #else
4072 if (backing_rcount) {
4073 #endif /* MACH_PAGEMAP */
4074 offset = hint_offset;
4075
4076 while((offset =
4077 (offset + PAGE_SIZE_64 < object->size) ?
4078 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
4079
4080 /* Until we get more than one lookup lock */
4081 if (lookups > 256) {
4082 vm_object_collapse_delays++;
4083 lookups = 0;
4084 mutex_pause(0);
4085 }
4086
4087 if (EXISTS_IN_OBJECT(backing_object, offset +
4088 backing_offset, backing_rcount) &&
4089 !EXISTS_IN_OBJECT(object, offset, rcount)) {
4090 /* found a dependency */
4091 object->cow_hint = (vm_offset_t)offset;
4092 break;
4093 }
4094 }
4095 if (offset != hint_offset) {
4096 /* try and collapse the rest of the shadow chain */
4097 if (object != original_object) {
4098 vm_object_unlock(object);
4099 }
4100 object = backing_object;
4101 continue;
4102 }
4103 }
4104 }
4105
4106 /* reset the offset hint for any objects deeper in the chain */
4107 object->cow_hint = (vm_offset_t)0;
4108
4109 /*
4110 * All interesting pages in the backing object
4111 * already live in the parent or its pager.
4112 * Thus we can bypass the backing object.
4113 */
4114
4115 vm_object_do_bypass(object, backing_object);
4116 vm_object_collapse_do_bypass++;
4117
4118 /*
4119 * Try again with this object's new backing object.
4120 */
4121
4122 continue;
4123 }
4124
4125 if (object != original_object) {
4126 vm_object_unlock(object);
4127 }
4128 }
4129
4130 /*
4131 * Routine: vm_object_page_remove: [internal]
4132 * Purpose:
4133 * Removes all physical pages in the specified
4134 * object range from the object's list of pages.
4135 *
4136 * In/out conditions:
4137 * The object must be locked.
4138 * The object must not have paging_in_progress, usually
4139 * guaranteed by not having a pager.
4140 */
4141 unsigned int vm_object_page_remove_lookup = 0;
4142 unsigned int vm_object_page_remove_iterate = 0;
4143
4144 __private_extern__ void
4145 vm_object_page_remove(
4146 register vm_object_t object,
4147 register vm_object_offset_t start,
4148 register vm_object_offset_t end)
4149 {
4150 register vm_page_t p, next;
4151
4152 /*
4153 * One and two page removals are most popular.
4154 * The factor of 16 here is somewhat arbitrary.
4155 * It balances vm_object_lookup vs iteration.
4156 */
4157
4158 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
4159 vm_object_page_remove_lookup++;
4160
4161 for (; start < end; start += PAGE_SIZE_64) {
4162 p = vm_page_lookup(object, start);
4163 if (p != VM_PAGE_NULL) {
4164 assert(!p->cleaning && !p->pageout);
4165 if (!p->fictitious && p->pmapped)
4166 pmap_disconnect(p->phys_page);
4167 VM_PAGE_FREE(p);
4168 }
4169 }
4170 } else {
4171 vm_object_page_remove_iterate++;
4172
4173 p = (vm_page_t) queue_first(&object->memq);
4174 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4175 next = (vm_page_t) queue_next(&p->listq);
4176 if ((start <= p->offset) && (p->offset < end)) {
4177 assert(!p->cleaning && !p->pageout);
4178 if (!p->fictitious && p->pmapped)
4179 pmap_disconnect(p->phys_page);
4180 VM_PAGE_FREE(p);
4181 }
4182 p = next;
4183 }
4184 }
4185 }
4186
4187
4188 /*
4189 * Routine: vm_object_coalesce
4190 * Function: Coalesces two objects backing up adjoining
4191 * regions of memory into a single object.
4192 *
4193 * returns TRUE if objects were combined.
4194 *
4195 * NOTE: Only works at the moment if the second object is NULL -
4196 * if it's not, which object do we lock first?
4197 *
4198 * Parameters:
4199 * prev_object First object to coalesce
4200 * prev_offset Offset into prev_object
4201 * next_object Second object into coalesce
4202 * next_offset Offset into next_object
4203 *
4204 * prev_size Size of reference to prev_object
4205 * next_size Size of reference to next_object
4206 *
4207 * Conditions:
4208 * The object(s) must *not* be locked. The map must be locked
4209 * to preserve the reference to the object(s).
4210 */
4211 static int vm_object_coalesce_count = 0;
4212
4213 __private_extern__ boolean_t
4214 vm_object_coalesce(
4215 register vm_object_t prev_object,
4216 vm_object_t next_object,
4217 vm_object_offset_t prev_offset,
4218 __unused vm_object_offset_t next_offset,
4219 vm_object_size_t prev_size,
4220 vm_object_size_t next_size)
4221 {
4222 vm_object_size_t newsize;
4223
4224 #ifdef lint
4225 next_offset++;
4226 #endif /* lint */
4227
4228 if (next_object != VM_OBJECT_NULL) {
4229 return(FALSE);
4230 }
4231
4232 if (prev_object == VM_OBJECT_NULL) {
4233 return(TRUE);
4234 }
4235
4236 XPR(XPR_VM_OBJECT,
4237 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
4238 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
4239
4240 vm_object_lock(prev_object);
4241
4242 /*
4243 * Try to collapse the object first
4244 */
4245 vm_object_collapse(prev_object, prev_offset, TRUE);
4246
4247 /*
4248 * Can't coalesce if pages not mapped to
4249 * prev_entry may be in use any way:
4250 * . more than one reference
4251 * . paged out
4252 * . shadows another object
4253 * . has a copy elsewhere
4254 * . is purgeable
4255 * . paging references (pages might be in page-list)
4256 */
4257
4258 if ((prev_object->ref_count > 1) ||
4259 prev_object->pager_created ||
4260 (prev_object->shadow != VM_OBJECT_NULL) ||
4261 (prev_object->copy != VM_OBJECT_NULL) ||
4262 (prev_object->true_share != FALSE) ||
4263 (prev_object->purgable != VM_PURGABLE_DENY) ||
4264 (prev_object->paging_in_progress != 0)) {
4265 vm_object_unlock(prev_object);
4266 return(FALSE);
4267 }
4268
4269 vm_object_coalesce_count++;
4270
4271 /*
4272 * Remove any pages that may still be in the object from
4273 * a previous deallocation.
4274 */
4275 vm_object_page_remove(prev_object,
4276 prev_offset + prev_size,
4277 prev_offset + prev_size + next_size);
4278
4279 /*
4280 * Extend the object if necessary.
4281 */
4282 newsize = prev_offset + prev_size + next_size;
4283 if (newsize > prev_object->size) {
4284 #if MACH_PAGEMAP
4285 /*
4286 * We cannot extend an object that has existence info,
4287 * since the existence info might then fail to cover
4288 * the entire object.
4289 *
4290 * This assertion must be true because the object
4291 * has no pager, and we only create existence info
4292 * for objects with pagers.
4293 */
4294 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
4295 #endif /* MACH_PAGEMAP */
4296 prev_object->size = newsize;
4297 }
4298
4299 vm_object_unlock(prev_object);
4300 return(TRUE);
4301 }
4302
4303 /*
4304 * Attach a set of physical pages to an object, so that they can
4305 * be mapped by mapping the object. Typically used to map IO memory.
4306 *
4307 * The mapping function and its private data are used to obtain the
4308 * physical addresses for each page to be mapped.
4309 */
4310 void
4311 vm_object_page_map(
4312 vm_object_t object,
4313 vm_object_offset_t offset,
4314 vm_object_size_t size,
4315 vm_object_offset_t (*map_fn)(void *map_fn_data,
4316 vm_object_offset_t offset),
4317 void *map_fn_data) /* private to map_fn */
4318 {
4319 int num_pages;
4320 int i;
4321 vm_page_t m;
4322 vm_page_t old_page;
4323 vm_object_offset_t addr;
4324
4325 num_pages = atop_64(size);
4326
4327 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
4328
4329 addr = (*map_fn)(map_fn_data, offset);
4330
4331 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
4332 vm_page_more_fictitious();
4333
4334 vm_object_lock(object);
4335 if ((old_page = vm_page_lookup(object, offset))
4336 != VM_PAGE_NULL)
4337 {
4338 vm_page_lock_queues();
4339 vm_page_free(old_page);
4340 vm_page_unlock_queues();
4341 }
4342
4343 vm_page_init(m, addr);
4344 /* private normally requires lock_queues but since we */
4345 /* are initializing the page, its not necessary here */
4346 m->private = TRUE; /* don`t free page */
4347 m->wire_count = 1;
4348 vm_page_insert(m, object, offset);
4349
4350 PAGE_WAKEUP_DONE(m);
4351 vm_object_unlock(object);
4352 }
4353 }
4354
4355 #include <mach_kdb.h>
4356
4357 #if MACH_KDB
4358 #include <ddb/db_output.h>
4359 #include <vm/vm_print.h>
4360
4361 #define printf kdbprintf
4362
4363 extern boolean_t vm_object_cached(
4364 vm_object_t object);
4365
4366 extern void print_bitstring(
4367 char byte);
4368
4369 boolean_t vm_object_print_pages = FALSE;
4370
4371 void
4372 print_bitstring(
4373 char byte)
4374 {
4375 printf("%c%c%c%c%c%c%c%c",
4376 ((byte & (1 << 0)) ? '1' : '0'),
4377 ((byte & (1 << 1)) ? '1' : '0'),
4378 ((byte & (1 << 2)) ? '1' : '0'),
4379 ((byte & (1 << 3)) ? '1' : '0'),
4380 ((byte & (1 << 4)) ? '1' : '0'),
4381 ((byte & (1 << 5)) ? '1' : '0'),
4382 ((byte & (1 << 6)) ? '1' : '0'),
4383 ((byte & (1 << 7)) ? '1' : '0'));
4384 }
4385
4386 boolean_t
4387 vm_object_cached(
4388 register vm_object_t object)
4389 {
4390 register vm_object_t o;
4391
4392 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
4393 if (object == o) {
4394 return TRUE;
4395 }
4396 }
4397 return FALSE;
4398 }
4399
4400 #if MACH_PAGEMAP
4401 /*
4402 * vm_external_print: [ debug ]
4403 */
4404 void
4405 vm_external_print(
4406 vm_external_map_t emap,
4407 vm_size_t size)
4408 {
4409 if (emap == VM_EXTERNAL_NULL) {
4410 printf("0 ");
4411 } else {
4412 vm_size_t existence_size = stob(size);
4413 printf("{ size=%d, map=[", existence_size);
4414 if (existence_size > 0) {
4415 print_bitstring(emap[0]);
4416 }
4417 if (existence_size > 1) {
4418 print_bitstring(emap[1]);
4419 }
4420 if (existence_size > 2) {
4421 printf("...");
4422 print_bitstring(emap[existence_size-1]);
4423 }
4424 printf("] }\n");
4425 }
4426 return;
4427 }
4428 #endif /* MACH_PAGEMAP */
4429
4430 int
4431 vm_follow_object(
4432 vm_object_t object)
4433 {
4434 int count = 0;
4435 int orig_db_indent = db_indent;
4436
4437 while (TRUE) {
4438 if (object == VM_OBJECT_NULL) {
4439 db_indent = orig_db_indent;
4440 return count;
4441 }
4442
4443 count += 1;
4444
4445 iprintf("object 0x%x", object);
4446 printf(", shadow=0x%x", object->shadow);
4447 printf(", copy=0x%x", object->copy);
4448 printf(", pager=0x%x", object->pager);
4449 printf(", ref=%d\n", object->ref_count);
4450
4451 db_indent += 2;
4452 object = object->shadow;
4453 }
4454
4455 }
4456
4457 /*
4458 * vm_object_print: [ debug ]
4459 */
4460 void
4461 vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
4462 __unused db_expr_t arg_count, __unused char *modif)
4463 {
4464 vm_object_t object;
4465 register vm_page_t p;
4466 const char *s;
4467
4468 register int count;
4469
4470 object = (vm_object_t) (long) db_addr;
4471 if (object == VM_OBJECT_NULL)
4472 return;
4473
4474 iprintf("object 0x%x\n", object);
4475
4476 db_indent += 2;
4477
4478 iprintf("size=0x%x", object->size);
4479 printf(", memq_hint=%p", object->memq_hint);
4480 printf(", ref_count=%d\n", object->ref_count);
4481 iprintf("");
4482 #if TASK_SWAPPER
4483 printf("res_count=%d, ", object->res_count);
4484 #endif /* TASK_SWAPPER */
4485 printf("resident_page_count=%d\n", object->resident_page_count);
4486
4487 iprintf("shadow=0x%x", object->shadow);
4488 if (object->shadow) {
4489 register int i = 0;
4490 vm_object_t shadow = object;
4491 while((shadow = shadow->shadow))
4492 i++;
4493 printf(" (depth %d)", i);
4494 }
4495 printf(", copy=0x%x", object->copy);
4496 printf(", shadow_offset=0x%x", object->shadow_offset);
4497 printf(", last_alloc=0x%x\n", object->last_alloc);
4498
4499 iprintf("pager=0x%x", object->pager);
4500 printf(", paging_offset=0x%x", object->paging_offset);
4501 printf(", pager_control=0x%x\n", object->pager_control);
4502
4503 iprintf("copy_strategy=%d[", object->copy_strategy);
4504 switch (object->copy_strategy) {
4505 case MEMORY_OBJECT_COPY_NONE:
4506 printf("copy_none");
4507 break;
4508
4509 case MEMORY_OBJECT_COPY_CALL:
4510 printf("copy_call");
4511 break;
4512
4513 case MEMORY_OBJECT_COPY_DELAY:
4514 printf("copy_delay");
4515 break;
4516
4517 case MEMORY_OBJECT_COPY_SYMMETRIC:
4518 printf("copy_symmetric");
4519 break;
4520
4521 case MEMORY_OBJECT_COPY_INVALID:
4522 printf("copy_invalid");
4523 break;
4524
4525 default:
4526 printf("?");
4527 }
4528 printf("]");
4529
4530 iprintf("all_wanted=0x%x<", object->all_wanted);
4531 s = "";
4532 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4533 printf("%sinit", s);
4534 s = ",";
4535 }
4536 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4537 printf("%sready", s);
4538 s = ",";
4539 }
4540 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4541 printf("%spaging", s);
4542 s = ",";
4543 }
4544 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4545 printf("%slock", s);
4546 s = ",";
4547 }
4548 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4549 printf("%suncaching", s);
4550 s = ",";
4551 }
4552 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4553 printf("%scopy_call", s);
4554 s = ",";
4555 }
4556 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4557 printf("%scaching", s);
4558 s = ",";
4559 }
4560 printf(">");
4561 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4562
4563 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4564 (object->pager_created ? "" : "!"),
4565 (object->pager_initialized ? "" : "!"),
4566 (object->pager_ready ? "" : "!"),
4567 (object->can_persist ? "" : "!"),
4568 (object->pager_trusted ? "" : "!"),
4569 (object->pageout ? "" : "!"),
4570 (object->internal ? "internal" : "external"),
4571 (object->temporary ? "temporary" : "permanent"));
4572 iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
4573 (object->alive ? "" : "!"),
4574 ((object->purgable != VM_PURGABLE_DENY) ? "" : "!"),
4575 ((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"),
4576 ((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"),
4577 (object->shadowed ? "" : "!"),
4578 (vm_object_cached(object) ? "" : "!"),
4579 (object->private ? "" : "!"));
4580 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4581 (object->advisory_pageout ? "" : "!"),
4582 (object->silent_overwrite ? "" : "!"));
4583
4584 #if MACH_PAGEMAP
4585 iprintf("existence_map=");
4586 vm_external_print(object->existence_map, object->size);
4587 #endif /* MACH_PAGEMAP */
4588 #if MACH_ASSERT
4589 iprintf("paging_object=0x%x\n", object->paging_object);
4590 #endif /* MACH_ASSERT */
4591
4592 if (vm_object_print_pages) {
4593 count = 0;
4594 p = (vm_page_t) queue_first(&object->memq);
4595 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4596 if (count == 0) {
4597 iprintf("memory:=");
4598 } else if (count == 2) {
4599 printf("\n");
4600 iprintf(" ...");
4601 count = 0;
4602 } else {
4603 printf(",");
4604 }
4605 count++;
4606
4607 printf("(off=0x%llX,page=%p)", p->offset, p);
4608 p = (vm_page_t) queue_next(&p->listq);
4609 }
4610 if (count != 0) {
4611 printf("\n");
4612 }
4613 }
4614 db_indent -= 2;
4615 }
4616
4617
4618 /*
4619 * vm_object_find [ debug ]
4620 *
4621 * Find all tasks which reference the given vm_object.
4622 */
4623
4624 boolean_t vm_object_find(vm_object_t object);
4625 boolean_t vm_object_print_verbose = FALSE;
4626
4627 boolean_t
4628 vm_object_find(
4629 vm_object_t object)
4630 {
4631 task_t task;
4632 vm_map_t map;
4633 vm_map_entry_t entry;
4634 boolean_t found = FALSE;
4635
4636 queue_iterate(&tasks, task, task_t, tasks) {
4637 map = task->map;
4638 for (entry = vm_map_first_entry(map);
4639 entry && entry != vm_map_to_entry(map);
4640 entry = entry->vme_next) {
4641
4642 vm_object_t obj;
4643
4644 /*
4645 * For the time being skip submaps,
4646 * only the kernel can have submaps,
4647 * and unless we are interested in
4648 * kernel objects, we can simply skip
4649 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4650 * for a full solution.
4651 */
4652 if (entry->is_sub_map)
4653 continue;
4654 if (entry)
4655 obj = entry->object.vm_object;
4656 else
4657 continue;
4658
4659 while (obj != VM_OBJECT_NULL) {
4660 if (obj == object) {
4661 if (!found) {
4662 printf("TASK\t\tMAP\t\tENTRY\n");
4663 found = TRUE;
4664 }
4665 printf("0x%x\t0x%x\t0x%x\n",
4666 task, map, entry);
4667 }
4668 obj = obj->shadow;
4669 }
4670 }
4671 }
4672
4673 return(found);
4674 }
4675
4676 #endif /* MACH_KDB */
4677
4678 kern_return_t
4679 vm_object_populate_with_private(
4680 vm_object_t object,
4681 vm_object_offset_t offset,
4682 ppnum_t phys_page,
4683 vm_size_t size)
4684 {
4685 ppnum_t base_page;
4686 vm_object_offset_t base_offset;
4687
4688
4689 if(!object->private)
4690 return KERN_FAILURE;
4691
4692 base_page = phys_page;
4693
4694 vm_object_lock(object);
4695 if(!object->phys_contiguous) {
4696 vm_page_t m;
4697 if((base_offset = trunc_page_64(offset)) != offset) {
4698 vm_object_unlock(object);
4699 return KERN_FAILURE;
4700 }
4701 base_offset += object->paging_offset;
4702 while(size) {
4703 m = vm_page_lookup(object, base_offset);
4704 if(m != VM_PAGE_NULL) {
4705 if(m->fictitious) {
4706 if (m->phys_page !=
4707 vm_page_guard_addr) {
4708 vm_page_lockspin_queues();
4709 m->fictitious = FALSE;
4710 m->private = TRUE;
4711 m->phys_page = base_page;
4712 if(!m->busy) {
4713 m->busy = TRUE;
4714 }
4715 if(!m->absent) {
4716 m->absent = TRUE;
4717 }
4718 m->list_req_pending = TRUE;
4719 vm_page_unlock_queues();
4720 }
4721 } else if (m->phys_page != base_page) {
4722 if (m->pmapped) {
4723 /*
4724 * pmap call to clear old mapping
4725 */
4726 pmap_disconnect(m->phys_page);
4727 }
4728 m->phys_page = base_page;
4729 }
4730
4731 /*
4732 * ENCRYPTED SWAP:
4733 * We're not pointing to the same
4734 * physical page any longer and the
4735 * contents of the new one are not
4736 * supposed to be encrypted.
4737 * XXX What happens to the original
4738 * physical page. Is it lost ?
4739 */
4740 m->encrypted = FALSE;
4741
4742 } else {
4743 while ((m = vm_page_grab_fictitious())
4744 == VM_PAGE_NULL)
4745 vm_page_more_fictitious();
4746 vm_page_lockspin_queues();
4747 m->fictitious = FALSE;
4748 m->private = TRUE;
4749 m->phys_page = base_page;
4750 m->list_req_pending = TRUE;
4751 m->absent = TRUE;
4752 m->unusual = TRUE;
4753 vm_page_unlock_queues();
4754 vm_page_insert(m, object, base_offset);
4755 }
4756 base_page++; /* Go to the next physical page */
4757 base_offset += PAGE_SIZE;
4758 size -= PAGE_SIZE;
4759 }
4760 } else {
4761 /* NOTE: we should check the original settings here */
4762 /* if we have a size > zero a pmap call should be made */
4763 /* to disable the range */
4764
4765 /* pmap_? */
4766
4767 /* shadows on contiguous memory are not allowed */
4768 /* we therefore can use the offset field */
4769 object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
4770 object->size = size;
4771 }
4772 vm_object_unlock(object);
4773 return KERN_SUCCESS;
4774 }
4775
4776 /*
4777 * memory_object_free_from_cache:
4778 *
4779 * Walk the vm_object cache list, removing and freeing vm_objects
4780 * which are backed by the pager identified by the caller, (pager_ops).
4781 * Remove up to "count" objects, if there are that may available
4782 * in the cache.
4783 *
4784 * Walk the list at most once, return the number of vm_objects
4785 * actually freed.
4786 */
4787
4788 __private_extern__ kern_return_t
4789 memory_object_free_from_cache(
4790 __unused host_t host,
4791 memory_object_pager_ops_t pager_ops,
4792 int *count)
4793 {
4794
4795 int object_released = 0;
4796
4797 register vm_object_t object = VM_OBJECT_NULL;
4798 vm_object_t shadow;
4799
4800 /*
4801 if(host == HOST_NULL)
4802 return(KERN_INVALID_ARGUMENT);
4803 */
4804
4805 try_again:
4806 vm_object_cache_lock();
4807
4808 queue_iterate(&vm_object_cached_list, object,
4809 vm_object_t, cached_list) {
4810 if (object->pager &&
4811 (pager_ops == object->pager->mo_pager_ops)) {
4812 vm_object_lock(object);
4813 queue_remove(&vm_object_cached_list, object,
4814 vm_object_t, cached_list);
4815 vm_object_cached_count--;
4816
4817 /*
4818 * Since this object is in the cache, we know
4819 * that it is initialized and has only a pager's
4820 * (implicit) reference. Take a reference to avoid
4821 * recursive deallocations.
4822 */
4823
4824 assert(object->pager_initialized);
4825 assert(object->ref_count == 0);
4826 vm_object_lock_assert_exclusive(object);
4827 object->ref_count++;
4828
4829 /*
4830 * Terminate the object.
4831 * If the object had a shadow, we let
4832 * vm_object_deallocate deallocate it.
4833 * "pageout" objects have a shadow, but
4834 * maintain a "paging reference" rather
4835 * than a normal reference.
4836 * (We are careful here to limit recursion.)
4837 */
4838 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4839 if ((vm_object_terminate(object) == KERN_SUCCESS)
4840 && (shadow != VM_OBJECT_NULL)) {
4841 vm_object_deallocate(shadow);
4842 }
4843
4844 if(object_released++ == *count)
4845 return KERN_SUCCESS;
4846 goto try_again;
4847 }
4848 }
4849 vm_object_cache_unlock();
4850 *count = object_released;
4851 return KERN_SUCCESS;
4852 }
4853
4854
4855
4856 kern_return_t
4857 memory_object_create_named(
4858 memory_object_t pager,
4859 memory_object_offset_t size,
4860 memory_object_control_t *control)
4861 {
4862 vm_object_t object;
4863 vm_object_hash_entry_t entry;
4864
4865 *control = MEMORY_OBJECT_CONTROL_NULL;
4866 if (pager == MEMORY_OBJECT_NULL)
4867 return KERN_INVALID_ARGUMENT;
4868
4869 vm_object_cache_lock();
4870 entry = vm_object_hash_lookup(pager, FALSE);
4871 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4872 (entry->object != VM_OBJECT_NULL)) {
4873 if (entry->object->named == TRUE)
4874 panic("memory_object_create_named: caller already holds the right"); }
4875
4876 vm_object_cache_unlock();
4877 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4878 == VM_OBJECT_NULL) {
4879 return(KERN_INVALID_OBJECT);
4880 }
4881
4882 /* wait for object (if any) to be ready */
4883 if (object != VM_OBJECT_NULL) {
4884 vm_object_lock(object);
4885 object->named = TRUE;
4886 while (!object->pager_ready) {
4887 vm_object_sleep(object,
4888 VM_OBJECT_EVENT_PAGER_READY,
4889 THREAD_UNINT);
4890 }
4891 *control = object->pager_control;
4892 vm_object_unlock(object);
4893 }
4894 return (KERN_SUCCESS);
4895 }
4896
4897
4898 /*
4899 * Routine: memory_object_recover_named [user interface]
4900 * Purpose:
4901 * Attempt to recover a named reference for a VM object.
4902 * VM will verify that the object has not already started
4903 * down the termination path, and if it has, will optionally
4904 * wait for that to finish.
4905 * Returns:
4906 * KERN_SUCCESS - we recovered a named reference on the object
4907 * KERN_FAILURE - we could not recover a reference (object dead)
4908 * KERN_INVALID_ARGUMENT - bad memory object control
4909 */
4910 kern_return_t
4911 memory_object_recover_named(
4912 memory_object_control_t control,
4913 boolean_t wait_on_terminating)
4914 {
4915 vm_object_t object;
4916
4917 vm_object_cache_lock();
4918 object = memory_object_control_to_vm_object(control);
4919 if (object == VM_OBJECT_NULL) {
4920 vm_object_cache_unlock();
4921 return (KERN_INVALID_ARGUMENT);
4922 }
4923
4924 restart:
4925 vm_object_lock(object);
4926
4927 if (object->terminating && wait_on_terminating) {
4928 vm_object_cache_unlock();
4929 vm_object_wait(object,
4930 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4931 THREAD_UNINT);
4932 vm_object_cache_lock();
4933 goto restart;
4934 }
4935
4936 if (!object->alive) {
4937 vm_object_cache_unlock();
4938 vm_object_unlock(object);
4939 return KERN_FAILURE;
4940 }
4941
4942 if (object->named == TRUE) {
4943 vm_object_cache_unlock();
4944 vm_object_unlock(object);
4945 return KERN_SUCCESS;
4946 }
4947
4948 if((object->ref_count == 0) && (!object->terminating)){
4949 queue_remove(&vm_object_cached_list, object,
4950 vm_object_t, cached_list);
4951 vm_object_cached_count--;
4952 XPR(XPR_VM_OBJECT_CACHE,
4953 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4954 (integer_t)object,
4955 (integer_t)vm_object_cached_list.next,
4956 (integer_t)vm_object_cached_list.prev, 0,0);
4957 }
4958
4959 vm_object_cache_unlock();
4960
4961 object->named = TRUE;
4962 vm_object_lock_assert_exclusive(object);
4963 object->ref_count++;
4964 vm_object_res_reference(object);
4965 while (!object->pager_ready) {
4966 vm_object_sleep(object,
4967 VM_OBJECT_EVENT_PAGER_READY,
4968 THREAD_UNINT);
4969 }
4970 vm_object_unlock(object);
4971 return (KERN_SUCCESS);
4972 }
4973
4974
4975 /*
4976 * vm_object_release_name:
4977 *
4978 * Enforces name semantic on memory_object reference count decrement
4979 * This routine should not be called unless the caller holds a name
4980 * reference gained through the memory_object_create_named.
4981 *
4982 * If the TERMINATE_IDLE flag is set, the call will return if the
4983 * reference count is not 1. i.e. idle with the only remaining reference
4984 * being the name.
4985 * If the decision is made to proceed the name field flag is set to
4986 * false and the reference count is decremented. If the RESPECT_CACHE
4987 * flag is set and the reference count has gone to zero, the
4988 * memory_object is checked to see if it is cacheable otherwise when
4989 * the reference count is zero, it is simply terminated.
4990 */
4991
4992 __private_extern__ kern_return_t
4993 vm_object_release_name(
4994 vm_object_t object,
4995 int flags)
4996 {
4997 vm_object_t shadow;
4998 boolean_t original_object = TRUE;
4999
5000 while (object != VM_OBJECT_NULL) {
5001
5002 /*
5003 * The cache holds a reference (uncounted) to
5004 * the object. We must locke it before removing
5005 * the object.
5006 *
5007 */
5008
5009 vm_object_cache_lock();
5010 vm_object_lock(object);
5011 assert(object->alive);
5012 if(original_object)
5013 assert(object->named);
5014 assert(object->ref_count > 0);
5015
5016 /*
5017 * We have to wait for initialization before
5018 * destroying or caching the object.
5019 */
5020
5021 if (object->pager_created && !object->pager_initialized) {
5022 assert(!object->can_persist);
5023 vm_object_assert_wait(object,
5024 VM_OBJECT_EVENT_INITIALIZED,
5025 THREAD_UNINT);
5026 vm_object_unlock(object);
5027 vm_object_cache_unlock();
5028 thread_block(THREAD_CONTINUE_NULL);
5029 continue;
5030 }
5031
5032 if (((object->ref_count > 1)
5033 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
5034 || (object->terminating)) {
5035 vm_object_unlock(object);
5036 vm_object_cache_unlock();
5037 return KERN_FAILURE;
5038 } else {
5039 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
5040 vm_object_unlock(object);
5041 vm_object_cache_unlock();
5042 return KERN_SUCCESS;
5043 }
5044 }
5045
5046 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
5047 (object->ref_count == 1)) {
5048 if(original_object)
5049 object->named = FALSE;
5050 vm_object_unlock(object);
5051 vm_object_cache_unlock();
5052 /* let vm_object_deallocate push this thing into */
5053 /* the cache, if that it is where it is bound */
5054 vm_object_deallocate(object);
5055 return KERN_SUCCESS;
5056 }
5057 VM_OBJ_RES_DECR(object);
5058 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
5059 if(object->ref_count == 1) {
5060 if(vm_object_terminate(object) != KERN_SUCCESS) {
5061 if(original_object) {
5062 return KERN_FAILURE;
5063 } else {
5064 return KERN_SUCCESS;
5065 }
5066 }
5067 if (shadow != VM_OBJECT_NULL) {
5068 original_object = FALSE;
5069 object = shadow;
5070 continue;
5071 }
5072 return KERN_SUCCESS;
5073 } else {
5074 vm_object_lock_assert_exclusive(object);
5075 object->ref_count--;
5076 assert(object->ref_count > 0);
5077 if(original_object)
5078 object->named = FALSE;
5079 vm_object_unlock(object);
5080 vm_object_cache_unlock();
5081 return KERN_SUCCESS;
5082 }
5083 }
5084 /*NOTREACHED*/
5085 assert(0);
5086 return KERN_FAILURE;
5087 }
5088
5089
5090 __private_extern__ kern_return_t
5091 vm_object_lock_request(
5092 vm_object_t object,
5093 vm_object_offset_t offset,
5094 vm_object_size_t size,
5095 memory_object_return_t should_return,
5096 int flags,
5097 vm_prot_t prot)
5098 {
5099 __unused boolean_t should_flush;
5100
5101 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
5102
5103 XPR(XPR_MEMORY_OBJECT,
5104 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
5105 (integer_t)object, offset, size,
5106 (((should_return&1)<<1)|should_flush), prot);
5107
5108 /*
5109 * Check for bogus arguments.
5110 */
5111 if (object == VM_OBJECT_NULL)
5112 return (KERN_INVALID_ARGUMENT);
5113
5114 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
5115 return (KERN_INVALID_ARGUMENT);
5116
5117 size = round_page_64(size);
5118
5119 /*
5120 * Lock the object, and acquire a paging reference to
5121 * prevent the memory_object reference from being released.
5122 */
5123 vm_object_lock(object);
5124 vm_object_paging_begin(object);
5125
5126 (void)vm_object_update(object,
5127 offset, size, NULL, NULL, should_return, flags, prot);
5128
5129 vm_object_paging_end(object);
5130 vm_object_unlock(object);
5131
5132 return (KERN_SUCCESS);
5133 }
5134
5135 /*
5136 * Empty a purgeable object by grabbing the physical pages assigned to it and
5137 * putting them on the free queue without writing them to backing store, etc.
5138 * When the pages are next touched they will be demand zero-fill pages. We
5139 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
5140 * skip referenced/dirty pages, pages on the active queue, etc. We're more
5141 * than happy to grab these since this is a purgeable object. We mark the
5142 * object as "empty" after reaping its pages.
5143 *
5144 * On entry the object and page queues are locked, the object must be a
5145 * purgeable object with no delayed copies pending.
5146 */
5147 unsigned int
5148 vm_object_purge(vm_object_t object)
5149 {
5150 vm_page_t p, next;
5151 unsigned int num_purged_pages;
5152 vm_page_t local_freeq;
5153 unsigned long local_freed;
5154 int purge_loop_quota;
5155 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
5156 #define PURGE_BATCH_FREE_LIMIT 50
5157 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
5158 #define PURGE_LOOP_QUOTA 100
5159
5160 num_purged_pages = 0;
5161 if (object->purgable == VM_PURGABLE_DENY)
5162 return num_purged_pages;
5163
5164 assert(object->purgable != VM_PURGABLE_NONVOLATILE);
5165 object->purgable = VM_PURGABLE_EMPTY;
5166
5167 assert(object->copy == VM_OBJECT_NULL);
5168 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5169 purge_loop_quota = PURGE_LOOP_QUOTA;
5170
5171 local_freeq = VM_PAGE_NULL;
5172 local_freed = 0;
5173
5174 /*
5175 * Go through the object's resident pages and try and discard them.
5176 */
5177 next = (vm_page_t)queue_first(&object->memq);
5178 while (!queue_end(&object->memq, (queue_entry_t)next)) {
5179 p = next;
5180 next = (vm_page_t)queue_next(&next->listq);
5181
5182 if (purge_loop_quota-- == 0) {
5183 /*
5184 * Avoid holding the page queues lock for too long.
5185 * Let someone else take it for a while if needed.
5186 * Keep holding the object's lock to guarantee that
5187 * the object's page list doesn't change under us
5188 * while we yield.
5189 */
5190 if (local_freeq != VM_PAGE_NULL) {
5191 /*
5192 * Flush our queue of pages to free.
5193 */
5194 vm_page_free_list(local_freeq);
5195 local_freeq = VM_PAGE_NULL;
5196 local_freed = 0;
5197 }
5198 mutex_yield(&vm_page_queue_lock);
5199
5200 /* resume with the current page and a new quota */
5201 purge_loop_quota = PURGE_LOOP_QUOTA;
5202 }
5203
5204
5205 if (p->busy || p->cleaning || p->laundry ||
5206 p->list_req_pending) {
5207 /* page is being acted upon, so don't mess with it */
5208 continue;
5209 }
5210 if (p->wire_count) {
5211 /* don't discard a wired page */
5212 continue;
5213 }
5214
5215 assert(!p->laundry);
5216 assert(p->object != kernel_object);
5217
5218 /* we can discard this page */
5219
5220 /* advertize that this page is in a transition state */
5221 p->busy = TRUE;
5222
5223 if (p->pmapped == TRUE) {
5224 /* unmap the page */
5225 int refmod_state;
5226
5227 refmod_state = pmap_disconnect(p->phys_page);
5228 if (refmod_state & VM_MEM_MODIFIED) {
5229 p->dirty = TRUE;
5230 }
5231 }
5232
5233 if (p->dirty || p->precious) {
5234 /* we saved the cost of cleaning this page ! */
5235 num_purged_pages++;
5236 vm_page_purged_count++;
5237 }
5238
5239 vm_page_free_prepare(p);
5240
5241 /* ... and put it on our queue of pages to free */
5242 assert(p->pageq.next == NULL &&
5243 p->pageq.prev == NULL);
5244 p->pageq.next = (queue_entry_t) local_freeq;
5245 local_freeq = p;
5246 if (++local_freed >= PURGE_BATCH_FREE_LIMIT) {
5247 /* flush our queue of pages to free */
5248 vm_page_free_list(local_freeq);
5249 local_freeq = VM_PAGE_NULL;
5250 local_freed = 0;
5251 }
5252 }
5253
5254 /* flush our local queue of pages to free one last time */
5255 if (local_freeq != VM_PAGE_NULL) {
5256 vm_page_free_list(local_freeq);
5257 local_freeq = VM_PAGE_NULL;
5258 local_freed = 0;
5259 }
5260
5261 return num_purged_pages;
5262 }
5263
5264 /*
5265 * vm_object_purgeable_control() allows the caller to control and investigate the
5266 * state of a purgeable object. A purgeable object is created via a call to
5267 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
5268 * never be coalesced with any other object -- even other purgeable objects --
5269 * and will thus always remain a distinct object. A purgeable object has
5270 * special semantics when its reference count is exactly 1. If its reference
5271 * count is greater than 1, then a purgeable object will behave like a normal
5272 * object and attempts to use this interface will result in an error return
5273 * of KERN_INVALID_ARGUMENT.
5274 *
5275 * A purgeable object may be put into a "volatile" state which will make the
5276 * object's pages elligable for being reclaimed without paging to backing
5277 * store if the system runs low on memory. If the pages in a volatile
5278 * purgeable object are reclaimed, the purgeable object is said to have been
5279 * "emptied." When a purgeable object is emptied the system will reclaim as
5280 * many pages from the object as it can in a convenient manner (pages already
5281 * en route to backing store or busy for other reasons are left as is). When
5282 * a purgeable object is made volatile, its pages will generally be reclaimed
5283 * before other pages in the application's working set. This semantic is
5284 * generally used by applications which can recreate the data in the object
5285 * faster than it can be paged in. One such example might be media assets
5286 * which can be reread from a much faster RAID volume.
5287 *
5288 * A purgeable object may be designated as "non-volatile" which means it will
5289 * behave like all other objects in the system with pages being written to and
5290 * read from backing store as needed to satisfy system memory needs. If the
5291 * object was emptied before the object was made non-volatile, that fact will
5292 * be returned as the old state of the purgeable object (see
5293 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
5294 * were reclaimed as part of emptying the object will be refaulted in as
5295 * zero-fill on demand. It is up to the application to note that an object
5296 * was emptied and recreate the objects contents if necessary. When a
5297 * purgeable object is made non-volatile, its pages will generally not be paged
5298 * out to backing store in the immediate future. A purgeable object may also
5299 * be manually emptied.
5300 *
5301 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
5302 * volatile purgeable object may be queried at any time. This information may
5303 * be used as a control input to let the application know when the system is
5304 * experiencing memory pressure and is reclaiming memory.
5305 *
5306 * The specified address may be any address within the purgeable object. If
5307 * the specified address does not represent any object in the target task's
5308 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
5309 * object containing the specified address is not a purgeable object, then
5310 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
5311 * returned.
5312 *
5313 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
5314 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
5315 * state is used to set the new state of the purgeable object and return its
5316 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
5317 * object is returned in the parameter state.
5318 *
5319 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
5320 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
5321 * the non-volatile, volatile and volatile/empty states described above.
5322 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
5323 * immediately reclaim as many pages in the object as can be conveniently
5324 * collected (some may have already been written to backing store or be
5325 * otherwise busy).
5326 *
5327 * The process of making a purgeable object non-volatile and determining its
5328 * previous state is atomic. Thus, if a purgeable object is made
5329 * VM_PURGABLE_NONVOLATILE and the old state is returned as
5330 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
5331 * completely intact and will remain so until the object is made volatile
5332 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
5333 * was reclaimed while it was in a volatile state and its previous contents
5334 * have been lost.
5335 */
5336 /*
5337 * The object must be locked.
5338 */
5339 kern_return_t
5340 vm_object_purgable_control(
5341 vm_object_t object,
5342 vm_purgable_t control,
5343 int *state)
5344 {
5345 int old_state;
5346 int new_state;
5347
5348 if (object == VM_OBJECT_NULL) {
5349 /*
5350 * Object must already be present or it can't be purgeable.
5351 */
5352 return KERN_INVALID_ARGUMENT;
5353 }
5354
5355 /*
5356 * Get current state of the purgeable object.
5357 */
5358 old_state = object->purgable;
5359 if (old_state == VM_PURGABLE_DENY)
5360 return KERN_INVALID_ARGUMENT;
5361
5362 /* purgeable cant have delayed copies - now or in the future */
5363 assert(object->copy == VM_OBJECT_NULL);
5364 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5365
5366 /*
5367 * Execute the desired operation.
5368 */
5369 if (control == VM_PURGABLE_GET_STATE) {
5370 *state = old_state;
5371 return KERN_SUCCESS;
5372 }
5373
5374 new_state = *state & VM_PURGABLE_STATE_MASK;
5375 switch (new_state) {
5376 case VM_PURGABLE_DENY:
5377 case VM_PURGABLE_NONVOLATILE:
5378 object->purgable = new_state;
5379
5380 if (old_state != VM_PURGABLE_NONVOLATILE) {
5381 vm_page_lock_queues();
5382 assert(vm_page_purgeable_count >=
5383 object->resident_page_count);
5384 vm_page_purgeable_count -= object->resident_page_count;
5385
5386 if (old_state==VM_PURGABLE_VOLATILE) {
5387 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
5388 purgeable_q_t queue = vm_purgeable_object_remove(object);
5389 assert(queue);
5390
5391 vm_purgeable_token_delete_first(queue);
5392 assert(queue->debug_count_objects>=0);
5393 };
5394 vm_page_unlock_queues();
5395 }
5396 break;
5397
5398 case VM_PURGABLE_VOLATILE:
5399
5400 if ((old_state != VM_PURGABLE_NONVOLATILE) && (old_state != VM_PURGABLE_VOLATILE))
5401 break;
5402 purgeable_q_t queue;
5403
5404 /* find the correct queue */
5405 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
5406 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
5407 else {
5408 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
5409 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
5410 else
5411 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
5412 }
5413
5414 if (old_state == VM_PURGABLE_NONVOLATILE) {
5415 /* try to add token... this can fail */
5416 vm_page_lock_queues();
5417
5418 kern_return_t result = vm_purgeable_token_add(queue);
5419 if (result != KERN_SUCCESS) {
5420 vm_page_unlock_queues();
5421 return result;
5422 }
5423 vm_page_purgeable_count += object->resident_page_count;
5424
5425 vm_page_unlock_queues();
5426
5427 object->purgable = new_state;
5428
5429 /* object should not be on a queue */
5430 assert(object->objq.next == NULL && object->objq.prev == NULL);
5431 }
5432 else if (old_state == VM_PURGABLE_VOLATILE) {
5433 /*
5434 * if reassigning priorities / purgeable groups, we don't change the
5435 * token queue. So moving priorities will not make pages stay around longer.
5436 * Reasoning is that the algorithm gives most priority to the most important
5437 * object. If a new token is added, the most important object' priority is boosted.
5438 * This biases the system already for purgeable queues that move a lot.
5439 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
5440 */
5441 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
5442
5443 purgeable_q_t old_queue=vm_purgeable_object_remove(object);
5444 assert(old_queue);
5445
5446 if (old_queue != queue) {
5447 kern_return_t result;
5448
5449 /* Changing queue. Have to move token. */
5450 vm_page_lock_queues();
5451 vm_purgeable_token_delete_first(old_queue);
5452 result = vm_purgeable_token_add(queue);
5453 vm_page_unlock_queues();
5454
5455 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */
5456 }
5457 };
5458 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT );
5459
5460 assert(queue->debug_count_objects>=0);
5461
5462 break;
5463
5464
5465 case VM_PURGABLE_EMPTY:
5466 if (old_state != new_state)
5467 {
5468 assert(old_state==VM_PURGABLE_NONVOLATILE || old_state==VM_PURGABLE_VOLATILE);
5469 if(old_state==VM_PURGABLE_VOLATILE) {
5470 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
5471 purgeable_q_t old_queue=vm_purgeable_object_remove(object);
5472 assert(old_queue);
5473 vm_page_lock_queues();
5474 vm_purgeable_token_delete_first(old_queue);
5475 }
5476
5477 if (old_state==VM_PURGABLE_NONVOLATILE) {
5478 vm_page_purgeable_count += object->resident_page_count;
5479 vm_page_lock_queues();
5480 }
5481 (void) vm_object_purge(object);
5482 vm_page_unlock_queues();
5483 }
5484 break;
5485
5486 }
5487 *state = old_state;
5488
5489 return KERN_SUCCESS;
5490 }
5491
5492 #if TASK_SWAPPER
5493 /*
5494 * vm_object_res_deallocate
5495 *
5496 * (recursively) decrement residence counts on vm objects and their shadows.
5497 * Called from vm_object_deallocate and when swapping out an object.
5498 *
5499 * The object is locked, and remains locked throughout the function,
5500 * even as we iterate down the shadow chain. Locks on intermediate objects
5501 * will be dropped, but not the original object.
5502 *
5503 * NOTE: this function used to use recursion, rather than iteration.
5504 */
5505
5506 __private_extern__ void
5507 vm_object_res_deallocate(
5508 vm_object_t object)
5509 {
5510 vm_object_t orig_object = object;
5511 /*
5512 * Object is locked so it can be called directly
5513 * from vm_object_deallocate. Original object is never
5514 * unlocked.
5515 */
5516 assert(object->res_count > 0);
5517 while (--object->res_count == 0) {
5518 assert(object->ref_count >= object->res_count);
5519 vm_object_deactivate_all_pages(object);
5520 /* iterate on shadow, if present */
5521 if (object->shadow != VM_OBJECT_NULL) {
5522 vm_object_t tmp_object = object->shadow;
5523 vm_object_lock(tmp_object);
5524 if (object != orig_object)
5525 vm_object_unlock(object);
5526 object = tmp_object;
5527 assert(object->res_count > 0);
5528 } else
5529 break;
5530 }
5531 if (object != orig_object)
5532 vm_object_unlock(object);
5533 }
5534
5535 /*
5536 * vm_object_res_reference
5537 *
5538 * Internal function to increment residence count on a vm object
5539 * and its shadows. It is called only from vm_object_reference, and
5540 * when swapping in a vm object, via vm_map_swap.
5541 *
5542 * The object is locked, and remains locked throughout the function,
5543 * even as we iterate down the shadow chain. Locks on intermediate objects
5544 * will be dropped, but not the original object.
5545 *
5546 * NOTE: this function used to use recursion, rather than iteration.
5547 */
5548
5549 __private_extern__ void
5550 vm_object_res_reference(
5551 vm_object_t object)
5552 {
5553 vm_object_t orig_object = object;
5554 /*
5555 * Object is locked, so this can be called directly
5556 * from vm_object_reference. This lock is never released.
5557 */
5558 while ((++object->res_count == 1) &&
5559 (object->shadow != VM_OBJECT_NULL)) {
5560 vm_object_t tmp_object = object->shadow;
5561
5562 assert(object->ref_count >= object->res_count);
5563 vm_object_lock(tmp_object);
5564 if (object != orig_object)
5565 vm_object_unlock(object);
5566 object = tmp_object;
5567 }
5568 if (object != orig_object)
5569 vm_object_unlock(object);
5570 assert(orig_object->ref_count >= orig_object->res_count);
5571 }
5572 #endif /* TASK_SWAPPER */
5573
5574 /*
5575 * vm_object_reference:
5576 *
5577 * Gets another reference to the given object.
5578 */
5579 #ifdef vm_object_reference
5580 #undef vm_object_reference
5581 #endif
5582 __private_extern__ void
5583 vm_object_reference(
5584 register vm_object_t object)
5585 {
5586 if (object == VM_OBJECT_NULL)
5587 return;
5588
5589 vm_object_lock(object);
5590 assert(object->ref_count > 0);
5591 vm_object_reference_locked(object);
5592 vm_object_unlock(object);
5593 }
5594
5595 #ifdef MACH_BSD
5596 /*
5597 * Scale the vm_object_cache
5598 * This is required to make sure that the vm_object_cache is big
5599 * enough to effectively cache the mapped file.
5600 * This is really important with UBC as all the regular file vnodes
5601 * have memory object associated with them. Havving this cache too
5602 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5603 *
5604 * This is also needed as number of vnodes can be dynamically scaled.
5605 */
5606 kern_return_t
5607 adjust_vm_object_cache(
5608 __unused vm_size_t oval,
5609 vm_size_t nval)
5610 {
5611 vm_object_cached_max = nval;
5612 vm_object_cache_trim(FALSE);
5613 return (KERN_SUCCESS);
5614 }
5615 #endif /* MACH_BSD */
5616
5617
5618 /*
5619 * vm_object_transpose
5620 *
5621 * This routine takes two VM objects of the same size and exchanges
5622 * their backing store.
5623 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5624 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5625 *
5626 * The VM objects must not be locked by caller.
5627 */
5628 kern_return_t
5629 vm_object_transpose(
5630 vm_object_t object1,
5631 vm_object_t object2,
5632 vm_object_size_t transpose_size)
5633 {
5634 vm_object_t tmp_object;
5635 kern_return_t retval;
5636 boolean_t object1_locked, object2_locked;
5637 boolean_t object1_paging, object2_paging;
5638 vm_page_t page;
5639 vm_object_offset_t page_offset;
5640
5641 tmp_object = VM_OBJECT_NULL;
5642 object1_locked = FALSE; object2_locked = FALSE;
5643 object1_paging = FALSE; object2_paging = FALSE;
5644
5645 if (object1 == object2 ||
5646 object1 == VM_OBJECT_NULL ||
5647 object2 == VM_OBJECT_NULL) {
5648 /*
5649 * If the 2 VM objects are the same, there's
5650 * no point in exchanging their backing store.
5651 */
5652 retval = KERN_INVALID_VALUE;
5653 goto done;
5654 }
5655
5656 vm_object_lock(object1);
5657 object1_locked = TRUE;
5658 if (!object1->alive || object1->terminating ||
5659 object1->copy || object1->shadow || object1->shadowed ||
5660 object1->purgable != VM_PURGABLE_DENY) {
5661 /*
5662 * We don't deal with copy or shadow objects (yet).
5663 */
5664 retval = KERN_INVALID_VALUE;
5665 goto done;
5666 }
5667 /*
5668 * Since we're about to mess with the object's backing store,
5669 * mark it as "paging_in_progress". Note that this is not enough
5670 * to prevent any paging activity on this object, so the caller should
5671 * have "quiesced" the objects beforehand, via a UPL operation with
5672 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5673 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5674 */
5675 vm_object_paging_begin(object1);
5676 object1_paging = TRUE;
5677 vm_object_unlock(object1);
5678 object1_locked = FALSE;
5679
5680 /*
5681 * Same as above for the 2nd object...
5682 */
5683 vm_object_lock(object2);
5684 object2_locked = TRUE;
5685 if (! object2->alive || object2->terminating ||
5686 object2->copy || object2->shadow || object2->shadowed ||
5687 object2->purgable != VM_PURGABLE_DENY) {
5688 retval = KERN_INVALID_VALUE;
5689 goto done;
5690 }
5691 vm_object_paging_begin(object2);
5692 object2_paging = TRUE;
5693 vm_object_unlock(object2);
5694 object2_locked = FALSE;
5695
5696 /*
5697 * Allocate a temporary VM object to hold object1's contents
5698 * while we copy object2 to object1.
5699 */
5700 tmp_object = vm_object_allocate(transpose_size);
5701 vm_object_lock(tmp_object);
5702 vm_object_paging_begin(tmp_object);
5703 tmp_object->can_persist = FALSE;
5704
5705 /*
5706 * Since we need to lock both objects at the same time,
5707 * make sure we always lock them in the same order to
5708 * avoid deadlocks.
5709 */
5710 if (object1 < object2) {
5711 vm_object_lock(object1);
5712 vm_object_lock(object2);
5713 } else {
5714 vm_object_lock(object2);
5715 vm_object_lock(object1);
5716 }
5717 object1_locked = TRUE;
5718 object2_locked = TRUE;
5719
5720 if (object1->size != object2->size ||
5721 object1->size != transpose_size) {
5722 /*
5723 * If the 2 objects don't have the same size, we can't
5724 * exchange their backing stores or one would overflow.
5725 * If their size doesn't match the caller's
5726 * "transpose_size", we can't do it either because the
5727 * transpose operation will affect the entire span of
5728 * the objects.
5729 */
5730 retval = KERN_INVALID_VALUE;
5731 goto done;
5732 }
5733
5734
5735 /*
5736 * Transpose the lists of resident pages.
5737 * This also updates the resident_page_count and the memq_hint.
5738 */
5739 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
5740 /*
5741 * No pages in object1, just transfer pages
5742 * from object2 to object1. No need to go through
5743 * an intermediate object.
5744 */
5745 while (!queue_empty(&object2->memq)) {
5746 page = (vm_page_t) queue_first(&object2->memq);
5747 vm_page_rename(page, object1, page->offset, FALSE);
5748 }
5749 assert(queue_empty(&object2->memq));
5750 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
5751 /*
5752 * No pages in object2, just transfer pages
5753 * from object1 to object2. No need to go through
5754 * an intermediate object.
5755 */
5756 while (!queue_empty(&object1->memq)) {
5757 page = (vm_page_t) queue_first(&object1->memq);
5758 vm_page_rename(page, object2, page->offset, FALSE);
5759 }
5760 assert(queue_empty(&object1->memq));
5761 } else {
5762 /* transfer object1's pages to tmp_object */
5763 vm_page_lock_queues();
5764 while (!queue_empty(&object1->memq)) {
5765 page = (vm_page_t) queue_first(&object1->memq);
5766 page_offset = page->offset;
5767 vm_page_remove(page);
5768 page->offset = page_offset;
5769 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
5770 }
5771 vm_page_unlock_queues();
5772 assert(queue_empty(&object1->memq));
5773 /* transfer object2's pages to object1 */
5774 while (!queue_empty(&object2->memq)) {
5775 page = (vm_page_t) queue_first(&object2->memq);
5776 vm_page_rename(page, object1, page->offset, FALSE);
5777 }
5778 assert(queue_empty(&object2->memq));
5779 /* transfer tmp_object's pages to object1 */
5780 while (!queue_empty(&tmp_object->memq)) {
5781 page = (vm_page_t) queue_first(&tmp_object->memq);
5782 queue_remove(&tmp_object->memq, page,
5783 vm_page_t, listq);
5784 vm_page_insert(page, object2, page->offset);
5785 }
5786 assert(queue_empty(&tmp_object->memq));
5787 }
5788
5789 #define __TRANSPOSE_FIELD(field) \
5790 MACRO_BEGIN \
5791 tmp_object->field = object1->field; \
5792 object1->field = object2->field; \
5793 object2->field = tmp_object->field; \
5794 MACRO_END
5795
5796 /* "size" should be identical */
5797 assert(object1->size == object2->size);
5798 /* "Lock" refers to the object not its contents */
5799 /* "ref_count" refers to the object not its contents */
5800 #if TASK_SWAPPER
5801 /* "res_count" refers to the object not its contents */
5802 #endif
5803 /* "resident_page_count" was updated above when transposing pages */
5804 /* there should be no "copy" */
5805 assert(!object1->copy);
5806 assert(!object2->copy);
5807 /* there should be no "shadow" */
5808 assert(!object1->shadow);
5809 assert(!object2->shadow);
5810 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
5811 __TRANSPOSE_FIELD(pager);
5812 __TRANSPOSE_FIELD(paging_offset);
5813 __TRANSPOSE_FIELD(pager_control);
5814 /* update the memory_objects' pointers back to the VM objects */
5815 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5816 memory_object_control_collapse(object1->pager_control,
5817 object1);
5818 }
5819 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5820 memory_object_control_collapse(object2->pager_control,
5821 object2);
5822 }
5823 __TRANSPOSE_FIELD(copy_strategy);
5824 /* "paging_in_progress" refers to the object not its contents */
5825 assert(object1->paging_in_progress);
5826 assert(object2->paging_in_progress);
5827 /* "all_wanted" refers to the object not its contents */
5828 __TRANSPOSE_FIELD(pager_created);
5829 __TRANSPOSE_FIELD(pager_initialized);
5830 __TRANSPOSE_FIELD(pager_ready);
5831 __TRANSPOSE_FIELD(pager_trusted);
5832 __TRANSPOSE_FIELD(can_persist);
5833 __TRANSPOSE_FIELD(internal);
5834 __TRANSPOSE_FIELD(temporary);
5835 __TRANSPOSE_FIELD(private);
5836 __TRANSPOSE_FIELD(pageout);
5837 /* "alive" should be set */
5838 assert(object1->alive);
5839 assert(object2->alive);
5840 /* "purgeable" should be non-purgeable */
5841 assert(object1->purgable == VM_PURGABLE_DENY);
5842 assert(object2->purgable == VM_PURGABLE_DENY);
5843 /* "shadowed" refers to the the object not its contents */
5844 __TRANSPOSE_FIELD(silent_overwrite);
5845 __TRANSPOSE_FIELD(advisory_pageout);
5846 __TRANSPOSE_FIELD(true_share);
5847 /* "terminating" should not be set */
5848 assert(!object1->terminating);
5849 assert(!object2->terminating);
5850 __TRANSPOSE_FIELD(named);
5851 /* "shadow_severed" refers to the object not its contents */
5852 __TRANSPOSE_FIELD(phys_contiguous);
5853 __TRANSPOSE_FIELD(nophyscache);
5854 /* "cached_list" should be NULL */
5855 assert(object1->cached_list.prev == NULL);
5856 assert(object1->cached_list.next == NULL);
5857 assert(object2->cached_list.prev == NULL);
5858 assert(object2->cached_list.next == NULL);
5859 /* "msr_q" is linked to the object not its contents */
5860 assert(queue_empty(&object1->msr_q));
5861 assert(queue_empty(&object2->msr_q));
5862 __TRANSPOSE_FIELD(last_alloc);
5863 __TRANSPOSE_FIELD(sequential);
5864 __TRANSPOSE_FIELD(pages_created);
5865 __TRANSPOSE_FIELD(pages_used);
5866 #if MACH_PAGEMAP
5867 __TRANSPOSE_FIELD(existence_map);
5868 #endif
5869 __TRANSPOSE_FIELD(cow_hint);
5870 #if MACH_ASSERT
5871 __TRANSPOSE_FIELD(paging_object);
5872 #endif
5873 __TRANSPOSE_FIELD(wimg_bits);
5874 __TRANSPOSE_FIELD(code_signed);
5875 __TRANSPOSE_FIELD(not_in_use);
5876 #ifdef UPL_DEBUG
5877 /* "uplq" refers to the object not its contents (see upl_transpose()) */
5878 #endif
5879
5880 #undef __TRANSPOSE_FIELD
5881
5882 retval = KERN_SUCCESS;
5883
5884 done:
5885 /*
5886 * Cleanup.
5887 */
5888 if (tmp_object != VM_OBJECT_NULL) {
5889 vm_object_paging_end(tmp_object);
5890 vm_object_unlock(tmp_object);
5891 /*
5892 * Re-initialize the temporary object to avoid
5893 * deallocating a real pager.
5894 */
5895 _vm_object_allocate(transpose_size, tmp_object);
5896 vm_object_deallocate(tmp_object);
5897 tmp_object = VM_OBJECT_NULL;
5898 }
5899
5900 if (object1_locked) {
5901 vm_object_unlock(object1);
5902 object1_locked = FALSE;
5903 }
5904 if (object2_locked) {
5905 vm_object_unlock(object2);
5906 object2_locked = FALSE;
5907 }
5908 if (object1_paging) {
5909 vm_object_lock(object1);
5910 vm_object_paging_end(object1);
5911 vm_object_unlock(object1);
5912 object1_paging = FALSE;
5913 }
5914 if (object2_paging) {
5915 vm_object_lock(object2);
5916 vm_object_paging_end(object2);
5917 vm_object_unlock(object2);
5918 object2_paging = FALSE;
5919 }
5920
5921 return retval;
5922 }
5923
5924
5925 /*
5926 * vm_object_build_cluster
5927 *
5928 * Determine how big a cluster we should issue an I/O for...
5929 *
5930 * Inputs: *start == offset of page needed
5931 * *length == maximum cluster pager can handle
5932 * Outputs: *start == beginning offset of cluster
5933 * *length == length of cluster to try
5934 *
5935 * The original *start will be encompassed by the cluster
5936 *
5937 */
5938 extern int speculative_reads_disabled;
5939
5940 uint32_t pre_heat_scaling[MAX_UPL_TRANSFER];
5941 uint32_t pre_heat_cluster[MAX_UPL_TRANSFER];
5942
5943 #define PRE_HEAT_MULTIPLIER 4
5944
5945 __private_extern__ void
5946 vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
5947 vm_size_t *length, vm_object_fault_info_t fault_info)
5948 {
5949 vm_size_t pre_heat_size;
5950 vm_size_t tail_size;
5951 vm_size_t head_size;
5952 vm_size_t max_length;
5953 vm_size_t cluster_size;
5954 vm_object_offset_t object_size;
5955 vm_object_offset_t orig_start;
5956 vm_object_offset_t target_start;
5957 vm_object_offset_t offset;
5958 vm_behavior_t behavior;
5959 boolean_t look_behind = TRUE;
5960 boolean_t look_ahead = TRUE;
5961 int sequential_run;
5962 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
5963
5964 assert( !(*length & PAGE_MASK));
5965 assert( !(*start & PAGE_MASK_64));
5966
5967 if ( (max_length = *length) > (MAX_UPL_TRANSFER * PAGE_SIZE) )
5968 max_length = (MAX_UPL_TRANSFER * PAGE_SIZE);
5969 /*
5970 * we'll always return a cluster size of at least
5971 * 1 page, since the original fault must always
5972 * be processed
5973 */
5974 *length = PAGE_SIZE;
5975
5976 if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
5977 /*
5978 * no cluster... just fault the page in
5979 */
5980 return;
5981 }
5982 orig_start = *start;
5983 target_start = orig_start;
5984 cluster_size = round_page_32(fault_info->cluster_size);
5985 behavior = fault_info->behavior;
5986
5987 vm_object_lock(object);
5988
5989 if (object->internal)
5990 object_size = object->size;
5991 else if (object->pager != MEMORY_OBJECT_NULL)
5992 vnode_pager_get_object_size(object->pager, &object_size);
5993 else
5994 goto out; /* pager is gone for this object, nothing more to do */
5995
5996 object_size = round_page_64(object_size);
5997
5998 if (orig_start >= object_size) {
5999 /*
6000 * fault occurred beyond the EOF...
6001 * we need to punt w/o changing the
6002 * starting offset
6003 */
6004 goto out;
6005 }
6006 if (object->pages_used > object->pages_created) {
6007 /*
6008 * must have wrapped our 32 bit counters
6009 * so reset
6010 */
6011 object->pages_used = object->pages_created = 0;
6012 }
6013 if ((sequential_run = object->sequential)) {
6014 if (sequential_run < 0) {
6015 sequential_behavior = VM_BEHAVIOR_RSEQNTL;
6016 sequential_run = 0 - sequential_run;
6017 } else {
6018 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
6019 }
6020 }
6021 switch(behavior) {
6022
6023 default:
6024 behavior = VM_BEHAVIOR_DEFAULT;
6025
6026 case VM_BEHAVIOR_DEFAULT:
6027 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
6028 goto out;
6029
6030 if (sequential_run >= (3 * PAGE_SIZE)) {
6031 pre_heat_size = sequential_run + PAGE_SIZE;
6032
6033 if ((behavior = sequential_behavior) == VM_BEHAVIOR_SEQUENTIAL)
6034 look_behind = FALSE;
6035 else
6036 look_ahead = FALSE;
6037 } else {
6038 uint32_t pages_unused;
6039
6040 if (object->pages_created < 32 * PRE_HEAT_MULTIPLIER) {
6041 /*
6042 * prime the pump
6043 */
6044 pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
6045 break;
6046 }
6047 pages_unused = object->pages_created - object->pages_used;
6048
6049 if (pages_unused < (object->pages_created / 8)) {
6050 pre_heat_size = PAGE_SIZE * 32 * PRE_HEAT_MULTIPLIER;
6051 } else if (pages_unused < (object->pages_created / 4)) {
6052 pre_heat_size = PAGE_SIZE * 16 * PRE_HEAT_MULTIPLIER;
6053 } else if (pages_unused < (object->pages_created / 2)) {
6054 pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
6055 } else {
6056 pre_heat_size = PAGE_SIZE * 4 * PRE_HEAT_MULTIPLIER;
6057 }
6058 }
6059 break;
6060
6061 case VM_BEHAVIOR_RANDOM:
6062 if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
6063 goto out;
6064 break;
6065
6066 case VM_BEHAVIOR_SEQUENTIAL:
6067 if ((pre_heat_size = cluster_size) == 0)
6068 pre_heat_size = sequential_run + PAGE_SIZE;
6069 look_behind = FALSE;
6070
6071 break;
6072
6073 case VM_BEHAVIOR_RSEQNTL:
6074 if ((pre_heat_size = cluster_size) == 0)
6075 pre_heat_size = sequential_run + PAGE_SIZE;
6076 look_ahead = FALSE;
6077
6078 break;
6079
6080 }
6081 if (pre_heat_size > max_length)
6082 pre_heat_size = max_length;
6083
6084 if (behavior == VM_BEHAVIOR_DEFAULT && vm_page_free_count < vm_page_free_target)
6085 pre_heat_size /= 2;
6086
6087 if (look_ahead == TRUE) {
6088 if (look_behind == TRUE)
6089 target_start &= ~(pre_heat_size - 1);
6090
6091 if ((target_start + pre_heat_size) > object_size)
6092 pre_heat_size = (vm_size_t)(trunc_page_64(object_size - target_start));
6093
6094 tail_size = pre_heat_size - (orig_start - target_start) - PAGE_SIZE;
6095 } else {
6096 if (pre_heat_size > target_start)
6097 pre_heat_size = target_start;
6098 tail_size = 0;
6099 }
6100 pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
6101
6102 if (pre_heat_size <= PAGE_SIZE)
6103 goto out;
6104
6105 if (look_behind == TRUE) {
6106 /*
6107 * take a look at the pages before the original
6108 * faulting offset
6109 */
6110 head_size = pre_heat_size - tail_size - PAGE_SIZE;
6111
6112 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
6113 /*
6114 * don't poke below the lowest offset
6115 */
6116 if (offset < fault_info->lo_offset)
6117 break;
6118 /*
6119 * for external objects and internal objects w/o an existence map
6120 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
6121 */
6122 #if MACH_PAGEMAP
6123 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
6124 /*
6125 * we know for a fact that the pager can't provide the page
6126 * so don't include it or any pages beyond it in this cluster
6127 */
6128 break;
6129 }
6130 #endif
6131 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
6132 /*
6133 * don't bridge resident pages
6134 */
6135 break;
6136 }
6137 *start = offset;
6138 *length += PAGE_SIZE;
6139 }
6140 }
6141 if (look_ahead == TRUE) {
6142 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
6143 /*
6144 * don't poke above the highest offset
6145 */
6146 if (offset >= fault_info->hi_offset)
6147 break;
6148 /*
6149 * for external objects and internal objects w/o an existence map
6150 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
6151 */
6152 #if MACH_PAGEMAP
6153 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
6154 /*
6155 * we know for a fact that the pager can't provide the page
6156 * so don't include it or any pages beyond it in this cluster
6157 */
6158 break;
6159 }
6160 #endif
6161 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
6162 /*
6163 * don't bridge resident pages
6164 */
6165 break;
6166 }
6167 *length += PAGE_SIZE;
6168 }
6169 }
6170 out:
6171 pre_heat_cluster[*length / PAGE_SIZE]++;
6172
6173 vm_object_unlock(object);
6174 }
6175
6176
6177 /*
6178 * Allow manipulation of individual page state. This is actually part of
6179 * the UPL regimen but takes place on the VM object rather than on a UPL
6180 */
6181
6182 kern_return_t
6183 vm_object_page_op(
6184 vm_object_t object,
6185 vm_object_offset_t offset,
6186 int ops,
6187 ppnum_t *phys_entry,
6188 int *flags)
6189 {
6190 vm_page_t dst_page;
6191
6192 vm_object_lock(object);
6193
6194 if(ops & UPL_POP_PHYSICAL) {
6195 if(object->phys_contiguous) {
6196 if (phys_entry) {
6197 *phys_entry = (ppnum_t)
6198 (object->shadow_offset >> 12);
6199 }
6200 vm_object_unlock(object);
6201 return KERN_SUCCESS;
6202 } else {
6203 vm_object_unlock(object);
6204 return KERN_INVALID_OBJECT;
6205 }
6206 }
6207 if(object->phys_contiguous) {
6208 vm_object_unlock(object);
6209 return KERN_INVALID_OBJECT;
6210 }
6211
6212 while(TRUE) {
6213 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
6214 vm_object_unlock(object);
6215 return KERN_FAILURE;
6216 }
6217
6218 /* Sync up on getting the busy bit */
6219 if((dst_page->busy || dst_page->cleaning) &&
6220 (((ops & UPL_POP_SET) &&
6221 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
6222 /* someone else is playing with the page, we will */
6223 /* have to wait */
6224 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
6225 continue;
6226 }
6227
6228 if (ops & UPL_POP_DUMP) {
6229 if (dst_page->pmapped == TRUE)
6230 pmap_disconnect(dst_page->phys_page);
6231
6232 vm_page_lock_queues();
6233 vm_page_free(dst_page);
6234 vm_page_unlock_queues();
6235
6236 break;
6237 }
6238
6239 if (flags) {
6240 *flags = 0;
6241
6242 /* Get the condition of flags before requested ops */
6243 /* are undertaken */
6244
6245 if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
6246 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
6247 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
6248 if(dst_page->absent) *flags |= UPL_POP_ABSENT;
6249 if(dst_page->busy) *flags |= UPL_POP_BUSY;
6250 }
6251
6252 /* The caller should have made a call either contingent with */
6253 /* or prior to this call to set UPL_POP_BUSY */
6254 if(ops & UPL_POP_SET) {
6255 /* The protection granted with this assert will */
6256 /* not be complete. If the caller violates the */
6257 /* convention and attempts to change page state */
6258 /* without first setting busy we may not see it */
6259 /* because the page may already be busy. However */
6260 /* if such violations occur we will assert sooner */
6261 /* or later. */
6262 assert(dst_page->busy || (ops & UPL_POP_BUSY));
6263 if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
6264 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
6265 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
6266 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
6267 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
6268 }
6269
6270 if(ops & UPL_POP_CLR) {
6271 assert(dst_page->busy);
6272 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
6273 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
6274 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
6275 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
6276 if (ops & UPL_POP_BUSY) {
6277 dst_page->busy = FALSE;
6278 PAGE_WAKEUP(dst_page);
6279 }
6280 }
6281
6282 if (dst_page->encrypted) {
6283 /*
6284 * ENCRYPTED SWAP:
6285 * We need to decrypt this encrypted page before the
6286 * caller can access its contents.
6287 * But if the caller really wants to access the page's
6288 * contents, they have to keep the page "busy".
6289 * Otherwise, the page could get recycled or re-encrypted
6290 * at any time.
6291 */
6292 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
6293 dst_page->busy) {
6294 /*
6295 * The page is stable enough to be accessed by
6296 * the caller, so make sure its contents are
6297 * not encrypted.
6298 */
6299 vm_page_decrypt(dst_page, 0);
6300 } else {
6301 /*
6302 * The page is not busy, so don't bother
6303 * decrypting it, since anything could
6304 * happen to it between now and when the
6305 * caller wants to access it.
6306 * We should not give the caller access
6307 * to this page.
6308 */
6309 assert(!phys_entry);
6310 }
6311 }
6312
6313 if (phys_entry) {
6314 /*
6315 * The physical page number will remain valid
6316 * only if the page is kept busy.
6317 * ENCRYPTED SWAP: make sure we don't let the
6318 * caller access an encrypted page.
6319 */
6320 assert(dst_page->busy);
6321 assert(!dst_page->encrypted);
6322 *phys_entry = dst_page->phys_page;
6323 }
6324
6325 break;
6326 }
6327
6328 vm_object_unlock(object);
6329 return KERN_SUCCESS;
6330
6331 }
6332
6333 /*
6334 * vm_object_range_op offers performance enhancement over
6335 * vm_object_page_op for page_op functions which do not require page
6336 * level state to be returned from the call. Page_op was created to provide
6337 * a low-cost alternative to page manipulation via UPLs when only a single
6338 * page was involved. The range_op call establishes the ability in the _op
6339 * family of functions to work on multiple pages where the lack of page level
6340 * state handling allows the caller to avoid the overhead of the upl structures.
6341 */
6342
6343 kern_return_t
6344 vm_object_range_op(
6345 vm_object_t object,
6346 vm_object_offset_t offset_beg,
6347 vm_object_offset_t offset_end,
6348 int ops,
6349 int *range)
6350 {
6351 vm_object_offset_t offset;
6352 vm_page_t dst_page;
6353
6354 if (object->resident_page_count == 0) {
6355 if (range) {
6356 if (ops & UPL_ROP_PRESENT)
6357 *range = 0;
6358 else
6359 *range = offset_end - offset_beg;
6360 }
6361 return KERN_SUCCESS;
6362 }
6363 vm_object_lock(object);
6364
6365 if (object->phys_contiguous) {
6366 vm_object_unlock(object);
6367 return KERN_INVALID_OBJECT;
6368 }
6369
6370 offset = offset_beg & ~PAGE_MASK_64;
6371
6372 while (offset < offset_end) {
6373 dst_page = vm_page_lookup(object, offset);
6374 if (dst_page != VM_PAGE_NULL) {
6375 if (ops & UPL_ROP_DUMP) {
6376 if (dst_page->busy || dst_page->cleaning) {
6377 /*
6378 * someone else is playing with the
6379 * page, we will have to wait
6380 */
6381 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
6382 /*
6383 * need to relook the page up since it's
6384 * state may have changed while we slept
6385 * it might even belong to a different object
6386 * at this point
6387 */
6388 continue;
6389 }
6390 if (dst_page->pmapped == TRUE)
6391 pmap_disconnect(dst_page->phys_page);
6392
6393 vm_page_lock_queues();
6394 vm_page_free(dst_page);
6395 vm_page_unlock_queues();
6396
6397 } else if (ops & UPL_ROP_ABSENT)
6398 break;
6399 } else if (ops & UPL_ROP_PRESENT)
6400 break;
6401
6402 offset += PAGE_SIZE;
6403 }
6404 vm_object_unlock(object);
6405
6406 if (range) {
6407 if (offset > offset_end)
6408 offset = offset_end;
6409 if(offset > offset_beg)
6410 *range = offset - offset_beg;
6411 else *range=0;
6412 }
6413 return KERN_SUCCESS;
6414 }
6415
6416
6417 uint32_t scan_object_collision = 0;
6418
6419 void
6420 vm_object_lock(vm_object_t object)
6421 {
6422 if (object == vm_pageout_scan_wants_object) {
6423 scan_object_collision++;
6424 mutex_pause(2);
6425 }
6426 lck_rw_lock_exclusive(&object->Lock);
6427 }
6428
6429 boolean_t
6430 vm_object_lock_try(vm_object_t object)
6431 {
6432 if (object == vm_pageout_scan_wants_object) {
6433 scan_object_collision++;
6434 mutex_pause(2);
6435 }
6436 return (lck_rw_try_lock_exclusive(&object->Lock));
6437 }
6438
6439 void
6440 vm_object_lock_shared(vm_object_t object)
6441 {
6442 if (object == vm_pageout_scan_wants_object) {
6443 scan_object_collision++;
6444 mutex_pause(2);
6445 }
6446 lck_rw_lock_shared(&object->Lock);
6447 }
6448
6449 boolean_t
6450 vm_object_lock_try_shared(vm_object_t object)
6451 {
6452 if (object == vm_pageout_scan_wants_object) {
6453 scan_object_collision++;
6454 mutex_pause(2);
6455 }
6456 return (lck_rw_try_lock_shared(&object->Lock));
6457 }