]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
3f63de7d75dd01608465b57262bca69682ee4e10
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm/vm_object.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Virtual memory object module.
57 */
58
59 #include <mach_pagemap.h>
60 #include <task_swapper.h>
61
62 #include <mach/mach_types.h>
63 #include <mach/memory_object.h>
64 #include <mach/memory_object_default.h>
65 #include <mach/memory_object_control_server.h>
66 #include <mach/vm_param.h>
67
68 #include <ipc/ipc_types.h>
69 #include <ipc/ipc_port.h>
70
71 #include <kern/kern_types.h>
72 #include <kern/assert.h>
73 #include <kern/lock.h>
74 #include <kern/queue.h>
75 #include <kern/xpr.h>
76 #include <kern/zalloc.h>
77 #include <kern/host.h>
78 #include <kern/host_statistics.h>
79 #include <kern/processor.h>
80 #include <kern/misc_protos.h>
81
82 #include <vm/memory_object.h>
83 #include <vm/vm_fault.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_pageout.h>
88 #include <vm/vm_protos.h>
89
90 /*
91 * Virtual memory objects maintain the actual data
92 * associated with allocated virtual memory. A given
93 * page of memory exists within exactly one object.
94 *
95 * An object is only deallocated when all "references"
96 * are given up.
97 *
98 * Associated with each object is a list of all resident
99 * memory pages belonging to that object; this list is
100 * maintained by the "vm_page" module, but locked by the object's
101 * lock.
102 *
103 * Each object also records the memory object reference
104 * that is used by the kernel to request and write
105 * back data (the memory object, field "pager"), etc...
106 *
107 * Virtual memory objects are allocated to provide
108 * zero-filled memory (vm_allocate) or map a user-defined
109 * memory object into a virtual address space (vm_map).
110 *
111 * Virtual memory objects that refer to a user-defined
112 * memory object are called "permanent", because all changes
113 * made in virtual memory are reflected back to the
114 * memory manager, which may then store it permanently.
115 * Other virtual memory objects are called "temporary",
116 * meaning that changes need be written back only when
117 * necessary to reclaim pages, and that storage associated
118 * with the object can be discarded once it is no longer
119 * mapped.
120 *
121 * A permanent memory object may be mapped into more
122 * than one virtual address space. Moreover, two threads
123 * may attempt to make the first mapping of a memory
124 * object concurrently. Only one thread is allowed to
125 * complete this mapping; all others wait for the
126 * "pager_initialized" field is asserted, indicating
127 * that the first thread has initialized all of the
128 * necessary fields in the virtual memory object structure.
129 *
130 * The kernel relies on a *default memory manager* to
131 * provide backing storage for the zero-filled virtual
132 * memory objects. The pager memory objects associated
133 * with these temporary virtual memory objects are only
134 * requested from the default memory manager when it
135 * becomes necessary. Virtual memory objects
136 * that depend on the default memory manager are called
137 * "internal". The "pager_created" field is provided to
138 * indicate whether these ports have ever been allocated.
139 *
140 * The kernel may also create virtual memory objects to
141 * hold changed pages after a copy-on-write operation.
142 * In this case, the virtual memory object (and its
143 * backing storage -- its memory object) only contain
144 * those pages that have been changed. The "shadow"
145 * field refers to the virtual memory object that contains
146 * the remainder of the contents. The "shadow_offset"
147 * field indicates where in the "shadow" these contents begin.
148 * The "copy" field refers to a virtual memory object
149 * to which changed pages must be copied before changing
150 * this object, in order to implement another form
151 * of copy-on-write optimization.
152 *
153 * The virtual memory object structure also records
154 * the attributes associated with its memory object.
155 * The "pager_ready", "can_persist" and "copy_strategy"
156 * fields represent those attributes. The "cached_list"
157 * field is used in the implementation of the persistence
158 * attribute.
159 *
160 * ZZZ Continue this comment.
161 */
162
163 /* Forward declarations for internal functions. */
164 static kern_return_t vm_object_terminate(
165 vm_object_t object);
166
167 extern void vm_object_remove(
168 vm_object_t object);
169
170 static vm_object_t vm_object_cache_trim(
171 boolean_t called_from_vm_object_deallocate);
172
173 static void vm_object_deactivate_all_pages(
174 vm_object_t object);
175
176 static kern_return_t vm_object_copy_call(
177 vm_object_t src_object,
178 vm_object_offset_t src_offset,
179 vm_object_size_t size,
180 vm_object_t *_result_object);
181
182 static void vm_object_do_collapse(
183 vm_object_t object,
184 vm_object_t backing_object);
185
186 static void vm_object_do_bypass(
187 vm_object_t object,
188 vm_object_t backing_object);
189
190 static void vm_object_release_pager(
191 memory_object_t pager);
192
193 static zone_t vm_object_zone; /* vm backing store zone */
194
195 /*
196 * All wired-down kernel memory belongs to a single virtual
197 * memory object (kernel_object) to avoid wasting data structures.
198 */
199 static struct vm_object kernel_object_store;
200 __private_extern__ vm_object_t kernel_object = &kernel_object_store;
201
202 /*
203 * The submap object is used as a placeholder for vm_map_submap
204 * operations. The object is declared in vm_map.c because it
205 * is exported by the vm_map module. The storage is declared
206 * here because it must be initialized here.
207 */
208 static struct vm_object vm_submap_object_store;
209
210 /*
211 * Virtual memory objects are initialized from
212 * a template (see vm_object_allocate).
213 *
214 * When adding a new field to the virtual memory
215 * object structure, be sure to add initialization
216 * (see _vm_object_allocate()).
217 */
218 static struct vm_object vm_object_template;
219
220 /*
221 * Virtual memory objects that are not referenced by
222 * any address maps, but that are allowed to persist
223 * (an attribute specified by the associated memory manager),
224 * are kept in a queue (vm_object_cached_list).
225 *
226 * When an object from this queue is referenced again,
227 * for example to make another address space mapping,
228 * it must be removed from the queue. That is, the
229 * queue contains *only* objects with zero references.
230 *
231 * The kernel may choose to terminate objects from this
232 * queue in order to reclaim storage. The current policy
233 * is to permit a fixed maximum number of unreferenced
234 * objects (vm_object_cached_max).
235 *
236 * A spin lock (accessed by routines
237 * vm_object_cache_{lock,lock_try,unlock}) governs the
238 * object cache. It must be held when objects are
239 * added to or removed from the cache (in vm_object_terminate).
240 * The routines that acquire a reference to a virtual
241 * memory object based on one of the memory object ports
242 * must also lock the cache.
243 *
244 * Ideally, the object cache should be more isolated
245 * from the reference mechanism, so that the lock need
246 * not be held to make simple references.
247 */
248 static queue_head_t vm_object_cached_list;
249 static int vm_object_cached_count=0;
250 static int vm_object_cached_high; /* highest # cached objects */
251 static int vm_object_cached_max = 512; /* may be patched*/
252
253 static decl_mutex_data(,vm_object_cached_lock_data)
254
255 #define vm_object_cache_lock() \
256 mutex_lock(&vm_object_cached_lock_data)
257 #define vm_object_cache_lock_try() \
258 mutex_try(&vm_object_cached_lock_data)
259 #define vm_object_cache_unlock() \
260 mutex_unlock(&vm_object_cached_lock_data)
261
262 #define VM_OBJECT_HASH_COUNT 1024
263 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
264 static struct zone *vm_object_hash_zone;
265
266 struct vm_object_hash_entry {
267 queue_chain_t hash_link; /* hash chain link */
268 memory_object_t pager; /* pager we represent */
269 vm_object_t object; /* corresponding object */
270 boolean_t waiting; /* someone waiting for
271 * termination */
272 };
273
274 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
275 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
276
277 #define VM_OBJECT_HASH_SHIFT 8
278 #define vm_object_hash(pager) \
279 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
280
281 void vm_object_hash_entry_free(
282 vm_object_hash_entry_t entry);
283
284 static void vm_object_reap(vm_object_t object);
285 static void vm_object_reap_async(vm_object_t object);
286 static void vm_object_reaper_thread(void);
287 static queue_head_t vm_object_reaper_queue; /* protected by vm_object_cache_lock() */
288 unsigned int vm_object_reap_count = 0;
289 unsigned int vm_object_reap_count_async = 0;
290
291 /*
292 * vm_object_hash_lookup looks up a pager in the hashtable
293 * and returns the corresponding entry, with optional removal.
294 */
295
296 static vm_object_hash_entry_t
297 vm_object_hash_lookup(
298 memory_object_t pager,
299 boolean_t remove_entry)
300 {
301 register queue_t bucket;
302 register vm_object_hash_entry_t entry;
303
304 bucket = &vm_object_hashtable[vm_object_hash(pager)];
305
306 entry = (vm_object_hash_entry_t)queue_first(bucket);
307 while (!queue_end(bucket, (queue_entry_t)entry)) {
308 if (entry->pager == pager && !remove_entry)
309 return(entry);
310 else if (entry->pager == pager) {
311 queue_remove(bucket, entry,
312 vm_object_hash_entry_t, hash_link);
313 return(entry);
314 }
315
316 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
317 }
318
319 return(VM_OBJECT_HASH_ENTRY_NULL);
320 }
321
322 /*
323 * vm_object_hash_enter enters the specified
324 * pager / cache object association in the hashtable.
325 */
326
327 static void
328 vm_object_hash_insert(
329 vm_object_hash_entry_t entry)
330 {
331 register queue_t bucket;
332
333 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
334
335 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
336 }
337
338 static vm_object_hash_entry_t
339 vm_object_hash_entry_alloc(
340 memory_object_t pager)
341 {
342 vm_object_hash_entry_t entry;
343
344 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
345 entry->pager = pager;
346 entry->object = VM_OBJECT_NULL;
347 entry->waiting = FALSE;
348
349 return(entry);
350 }
351
352 void
353 vm_object_hash_entry_free(
354 vm_object_hash_entry_t entry)
355 {
356 zfree(vm_object_hash_zone, entry);
357 }
358
359 /*
360 * vm_object_allocate:
361 *
362 * Returns a new object with the given size.
363 */
364
365 __private_extern__ void
366 _vm_object_allocate(
367 vm_object_size_t size,
368 vm_object_t object)
369 {
370 XPR(XPR_VM_OBJECT,
371 "vm_object_allocate, object 0x%X size 0x%X\n",
372 (integer_t)object, size, 0,0,0);
373
374 *object = vm_object_template;
375 queue_init(&object->memq);
376 queue_init(&object->msr_q);
377 #ifdef UPL_DEBUG
378 queue_init(&object->uplq);
379 #endif /* UPL_DEBUG */
380 vm_object_lock_init(object);
381 object->size = size;
382 }
383
384 __private_extern__ vm_object_t
385 vm_object_allocate(
386 vm_object_size_t size)
387 {
388 register vm_object_t object;
389
390 object = (vm_object_t) zalloc(vm_object_zone);
391
392 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
393
394 if (object != VM_OBJECT_NULL)
395 _vm_object_allocate(size, object);
396
397 return object;
398 }
399
400 /*
401 * vm_object_bootstrap:
402 *
403 * Initialize the VM objects module.
404 */
405 __private_extern__ void
406 vm_object_bootstrap(void)
407 {
408 register int i;
409
410 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
411 round_page_32(512*1024),
412 round_page_32(12*1024),
413 "vm objects");
414
415 queue_init(&vm_object_reaper_queue);
416
417 queue_init(&vm_object_cached_list);
418 mutex_init(&vm_object_cached_lock_data, 0);
419
420 vm_object_hash_zone =
421 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
422 round_page_32(512*1024),
423 round_page_32(12*1024),
424 "vm object hash entries");
425
426 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
427 queue_init(&vm_object_hashtable[i]);
428
429 /*
430 * Fill in a template object, for quick initialization
431 */
432
433 /* memq; Lock; init after allocation */
434 vm_object_template.size = 0;
435 vm_object_template.memq_hint = VM_PAGE_NULL;
436 vm_object_template.ref_count = 1;
437 #if TASK_SWAPPER
438 vm_object_template.res_count = 1;
439 #endif /* TASK_SWAPPER */
440 vm_object_template.resident_page_count = 0;
441 vm_object_template.copy = VM_OBJECT_NULL;
442 vm_object_template.shadow = VM_OBJECT_NULL;
443 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
444 vm_object_template.cow_hint = ~(vm_offset_t)0;
445 vm_object_template.true_share = FALSE;
446
447 vm_object_template.pager = MEMORY_OBJECT_NULL;
448 vm_object_template.paging_offset = 0;
449 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
450 /* msr_q; init after allocation */
451
452 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
453 vm_object_template.absent_count = 0;
454 vm_object_template.paging_in_progress = 0;
455
456 /* Begin bitfields */
457 vm_object_template.all_wanted = 0; /* all bits FALSE */
458 vm_object_template.pager_created = FALSE;
459 vm_object_template.pager_initialized = FALSE;
460 vm_object_template.pager_ready = FALSE;
461 vm_object_template.pager_trusted = FALSE;
462 vm_object_template.can_persist = FALSE;
463 vm_object_template.internal = TRUE;
464 vm_object_template.temporary = TRUE;
465 vm_object_template.private = FALSE;
466 vm_object_template.pageout = FALSE;
467 vm_object_template.alive = TRUE;
468 vm_object_template.purgable = VM_OBJECT_NONPURGABLE;
469 vm_object_template.silent_overwrite = FALSE;
470 vm_object_template.advisory_pageout = FALSE;
471 vm_object_template.shadowed = FALSE;
472 vm_object_template.terminating = FALSE;
473 vm_object_template.shadow_severed = FALSE;
474 vm_object_template.phys_contiguous = FALSE;
475 vm_object_template.nophyscache = FALSE;
476 /* End bitfields */
477
478 /* cache bitfields */
479 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
480
481 /* cached_list; init after allocation */
482 vm_object_template.last_alloc = (vm_object_offset_t) 0;
483 vm_object_template.cluster_size = 0;
484 #if MACH_PAGEMAP
485 vm_object_template.existence_map = VM_EXTERNAL_NULL;
486 #endif /* MACH_PAGEMAP */
487 #if MACH_ASSERT
488 vm_object_template.paging_object = VM_OBJECT_NULL;
489 #endif /* MACH_ASSERT */
490
491 /*
492 * Initialize the "kernel object"
493 */
494
495 kernel_object = &kernel_object_store;
496
497 /*
498 * Note that in the following size specifications, we need to add 1 because
499 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
500 */
501
502 #ifdef ppc
503 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
504 kernel_object);
505 #else
506 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
507 kernel_object);
508 #endif
509 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
510
511 /*
512 * Initialize the "submap object". Make it as large as the
513 * kernel object so that no limit is imposed on submap sizes.
514 */
515
516 vm_submap_object = &vm_submap_object_store;
517 #ifdef ppc
518 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
519 vm_submap_object);
520 #else
521 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
522 vm_submap_object);
523 #endif
524 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
525
526 /*
527 * Create an "extra" reference to this object so that we never
528 * try to deallocate it; zfree doesn't like to be called with
529 * non-zone memory.
530 */
531 vm_object_reference(vm_submap_object);
532
533 #if MACH_PAGEMAP
534 vm_external_module_initialize();
535 #endif /* MACH_PAGEMAP */
536 }
537
538 void
539 vm_object_reaper_init(void)
540 {
541 kern_return_t kr;
542 thread_t thread;
543
544 kr = kernel_thread_start_priority(
545 (thread_continue_t) vm_object_reaper_thread,
546 NULL,
547 BASEPRI_PREEMPT - 1,
548 &thread);
549 if (kr != KERN_SUCCESS) {
550 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
551 }
552 thread_deallocate(thread);
553 }
554
555 __private_extern__ void
556 vm_object_init(void)
557 {
558 /*
559 * Finish initializing the kernel object.
560 */
561 }
562
563 /* remove the typedef below when emergency work-around is taken out */
564 typedef struct vnode_pager {
565 memory_object_t pager;
566 memory_object_t pager_handle; /* pager */
567 memory_object_control_t control_handle; /* memory object's control handle */
568 void *vnode_handle; /* vnode handle */
569 } *vnode_pager_t;
570
571 #define MIGHT_NOT_CACHE_SHADOWS 1
572 #if MIGHT_NOT_CACHE_SHADOWS
573 static int cache_shadows = TRUE;
574 #endif /* MIGHT_NOT_CACHE_SHADOWS */
575
576 /*
577 * vm_object_deallocate:
578 *
579 * Release a reference to the specified object,
580 * gained either through a vm_object_allocate
581 * or a vm_object_reference call. When all references
582 * are gone, storage associated with this object
583 * may be relinquished.
584 *
585 * No object may be locked.
586 */
587 __private_extern__ void
588 vm_object_deallocate(
589 register vm_object_t object)
590 {
591 boolean_t retry_cache_trim = FALSE;
592 vm_object_t shadow = VM_OBJECT_NULL;
593
594 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
595 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
596
597
598 while (object != VM_OBJECT_NULL) {
599
600 /*
601 * The cache holds a reference (uncounted) to
602 * the object; we must lock it before removing
603 * the object.
604 */
605 for (;;) {
606 vm_object_cache_lock();
607
608 /*
609 * if we try to take a regular lock here
610 * we risk deadlocking against someone
611 * holding a lock on this object while
612 * trying to vm_object_deallocate a different
613 * object
614 */
615 if (vm_object_lock_try(object))
616 break;
617 vm_object_cache_unlock();
618 mutex_pause(); /* wait a bit */
619 }
620 assert(object->ref_count > 0);
621
622 /*
623 * If the object has a named reference, and only
624 * that reference would remain, inform the pager
625 * about the last "mapping" reference going away.
626 */
627 if ((object->ref_count == 2) && (object->named)) {
628 memory_object_t pager = object->pager;
629
630 /* Notify the Pager that there are no */
631 /* more mappers for this object */
632
633 if (pager != MEMORY_OBJECT_NULL) {
634 vm_object_unlock(object);
635 vm_object_cache_unlock();
636
637 memory_object_unmap(pager);
638
639 for (;;) {
640 vm_object_cache_lock();
641
642 /*
643 * if we try to take a regular lock here
644 * we risk deadlocking against someone
645 * holding a lock on this object while
646 * trying to vm_object_deallocate a different
647 * object
648 */
649 if (vm_object_lock_try(object))
650 break;
651 vm_object_cache_unlock();
652 mutex_pause(); /* wait a bit */
653 }
654 assert(object->ref_count > 0);
655 }
656 }
657
658 /*
659 * Lose the reference. If other references
660 * remain, then we are done, unless we need
661 * to retry a cache trim.
662 * If it is the last reference, then keep it
663 * until any pending initialization is completed.
664 */
665
666 /* if the object is terminating, it cannot go into */
667 /* the cache and we obviously should not call */
668 /* terminate again. */
669
670 if ((object->ref_count > 1) || object->terminating) {
671 object->ref_count--;
672 vm_object_res_deallocate(object);
673 vm_object_cache_unlock();
674
675 if (object->ref_count == 1 &&
676 object->shadow != VM_OBJECT_NULL) {
677 /*
678 * We don't use this VM object anymore. We
679 * would like to collapse it into its parent(s),
680 * but we don't have any pointers back to these
681 * parent object(s).
682 * But we can try and collapse this object with
683 * its own shadows, in case these are useless
684 * too...
685 */
686 vm_object_collapse(object, 0);
687 }
688
689 vm_object_unlock(object);
690 if (retry_cache_trim &&
691 ((object = vm_object_cache_trim(TRUE)) !=
692 VM_OBJECT_NULL)) {
693 continue;
694 }
695 return;
696 }
697
698 /*
699 * We have to wait for initialization
700 * before destroying or caching the object.
701 */
702
703 if (object->pager_created && ! object->pager_initialized) {
704 assert(! object->can_persist);
705 vm_object_assert_wait(object,
706 VM_OBJECT_EVENT_INITIALIZED,
707 THREAD_UNINT);
708 vm_object_unlock(object);
709 vm_object_cache_unlock();
710 thread_block(THREAD_CONTINUE_NULL);
711 continue;
712 }
713
714 /*
715 * If this object can persist, then enter it in
716 * the cache. Otherwise, terminate it.
717 *
718 * NOTE: Only permanent objects are cached, and
719 * permanent objects cannot have shadows. This
720 * affects the residence counting logic in a minor
721 * way (can do it in-line, mostly).
722 */
723
724 if ((object->can_persist) && (object->alive)) {
725 /*
726 * Now it is safe to decrement reference count,
727 * and to return if reference count is > 0.
728 */
729 if (--object->ref_count > 0) {
730 vm_object_res_deallocate(object);
731 vm_object_unlock(object);
732 vm_object_cache_unlock();
733 if (retry_cache_trim &&
734 ((object = vm_object_cache_trim(TRUE)) !=
735 VM_OBJECT_NULL)) {
736 continue;
737 }
738 return;
739 }
740
741 #if MIGHT_NOT_CACHE_SHADOWS
742 /*
743 * Remove shadow now if we don't
744 * want to cache shadows.
745 */
746 if (! cache_shadows) {
747 shadow = object->shadow;
748 object->shadow = VM_OBJECT_NULL;
749 }
750 #endif /* MIGHT_NOT_CACHE_SHADOWS */
751
752 /*
753 * Enter the object onto the queue of
754 * cached objects, and deactivate
755 * all of its pages.
756 */
757 assert(object->shadow == VM_OBJECT_NULL);
758 VM_OBJ_RES_DECR(object);
759 XPR(XPR_VM_OBJECT,
760 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
761 (integer_t)object,
762 (integer_t)vm_object_cached_list.next,
763 (integer_t)vm_object_cached_list.prev,0,0);
764
765 vm_object_cached_count++;
766 if (vm_object_cached_count > vm_object_cached_high)
767 vm_object_cached_high = vm_object_cached_count;
768 queue_enter(&vm_object_cached_list, object,
769 vm_object_t, cached_list);
770 vm_object_cache_unlock();
771 vm_object_deactivate_all_pages(object);
772 vm_object_unlock(object);
773
774 #if MIGHT_NOT_CACHE_SHADOWS
775 /*
776 * If we have a shadow that we need
777 * to deallocate, do so now, remembering
778 * to trim the cache later.
779 */
780 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
781 object = shadow;
782 retry_cache_trim = TRUE;
783 continue;
784 }
785 #endif /* MIGHT_NOT_CACHE_SHADOWS */
786
787 /*
788 * Trim the cache. If the cache trim
789 * returns with a shadow for us to deallocate,
790 * then remember to retry the cache trim
791 * when we are done deallocating the shadow.
792 * Otherwise, we are done.
793 */
794
795 object = vm_object_cache_trim(TRUE);
796 if (object == VM_OBJECT_NULL) {
797 return;
798 }
799 retry_cache_trim = TRUE;
800
801 } else {
802 /*
803 * This object is not cachable; terminate it.
804 */
805 XPR(XPR_VM_OBJECT,
806 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
807 (integer_t)object, object->resident_page_count,
808 object->paging_in_progress,
809 (void *)current_thread(),object->ref_count);
810
811 VM_OBJ_RES_DECR(object); /* XXX ? */
812 /*
813 * Terminate this object. If it had a shadow,
814 * then deallocate it; otherwise, if we need
815 * to retry a cache trim, do so now; otherwise,
816 * we are done. "pageout" objects have a shadow,
817 * but maintain a "paging reference" rather than
818 * a normal reference.
819 */
820 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
821 if(vm_object_terminate(object) != KERN_SUCCESS) {
822 return;
823 }
824 if (shadow != VM_OBJECT_NULL) {
825 object = shadow;
826 continue;
827 }
828 if (retry_cache_trim &&
829 ((object = vm_object_cache_trim(TRUE)) !=
830 VM_OBJECT_NULL)) {
831 continue;
832 }
833 return;
834 }
835 }
836 assert(! retry_cache_trim);
837 }
838
839 /*
840 * Check to see whether we really need to trim
841 * down the cache. If so, remove an object from
842 * the cache, terminate it, and repeat.
843 *
844 * Called with, and returns with, cache lock unlocked.
845 */
846 vm_object_t
847 vm_object_cache_trim(
848 boolean_t called_from_vm_object_deallocate)
849 {
850 register vm_object_t object = VM_OBJECT_NULL;
851 vm_object_t shadow;
852
853 for (;;) {
854
855 /*
856 * If we no longer need to trim the cache,
857 * then we are done.
858 */
859
860 vm_object_cache_lock();
861 if (vm_object_cached_count <= vm_object_cached_max) {
862 vm_object_cache_unlock();
863 return VM_OBJECT_NULL;
864 }
865
866 /*
867 * We must trim down the cache, so remove
868 * the first object in the cache.
869 */
870 XPR(XPR_VM_OBJECT,
871 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
872 (integer_t)vm_object_cached_list.next,
873 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
874
875 object = (vm_object_t) queue_first(&vm_object_cached_list);
876 if(object == (vm_object_t) &vm_object_cached_list) {
877 /* something's wrong with the calling parameter or */
878 /* the value of vm_object_cached_count, just fix */
879 /* and return */
880 if(vm_object_cached_max < 0)
881 vm_object_cached_max = 0;
882 vm_object_cached_count = 0;
883 vm_object_cache_unlock();
884 return VM_OBJECT_NULL;
885 }
886 vm_object_lock(object);
887 queue_remove(&vm_object_cached_list, object, vm_object_t,
888 cached_list);
889 vm_object_cached_count--;
890
891 /*
892 * Since this object is in the cache, we know
893 * that it is initialized and has no references.
894 * Take a reference to avoid recursive deallocations.
895 */
896
897 assert(object->pager_initialized);
898 assert(object->ref_count == 0);
899 object->ref_count++;
900
901 /*
902 * Terminate the object.
903 * If the object had a shadow, we let vm_object_deallocate
904 * deallocate it. "pageout" objects have a shadow, but
905 * maintain a "paging reference" rather than a normal
906 * reference.
907 * (We are careful here to limit recursion.)
908 */
909 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
910 if(vm_object_terminate(object) != KERN_SUCCESS)
911 continue;
912 if (shadow != VM_OBJECT_NULL) {
913 if (called_from_vm_object_deallocate) {
914 return shadow;
915 } else {
916 vm_object_deallocate(shadow);
917 }
918 }
919 }
920 }
921
922 boolean_t vm_object_terminate_remove_all = FALSE;
923
924 /*
925 * Routine: vm_object_terminate
926 * Purpose:
927 * Free all resources associated with a vm_object.
928 * In/out conditions:
929 * Upon entry, the object must be locked,
930 * and the object must have exactly one reference.
931 *
932 * The shadow object reference is left alone.
933 *
934 * The object must be unlocked if its found that pages
935 * must be flushed to a backing object. If someone
936 * manages to map the object while it is being flushed
937 * the object is returned unlocked and unchanged. Otherwise,
938 * upon exit, the cache will be unlocked, and the
939 * object will cease to exist.
940 */
941 static kern_return_t
942 vm_object_terminate(
943 register vm_object_t object)
944 {
945 register vm_page_t p;
946 vm_object_t shadow_object;
947
948 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
949 (integer_t)object, object->ref_count, 0, 0, 0);
950
951 if (!object->pageout && (!object->temporary || object->can_persist)
952 && (object->pager != NULL || object->shadow_severed)) {
953 vm_object_cache_unlock();
954 while (!queue_empty(&object->memq)) {
955 /*
956 * Clear pager_trusted bit so that the pages get yanked
957 * out of the object instead of cleaned in place. This
958 * prevents a deadlock in XMM and makes more sense anyway.
959 */
960 object->pager_trusted = FALSE;
961
962 p = (vm_page_t) queue_first(&object->memq);
963
964 VM_PAGE_CHECK(p);
965
966 if (p->busy || p->cleaning) {
967 if(p->cleaning || p->absent) {
968 vm_object_paging_wait(object, THREAD_UNINT);
969 continue;
970 } else {
971 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
972 }
973 }
974
975 vm_page_lock_queues();
976 p->busy = TRUE;
977 VM_PAGE_QUEUES_REMOVE(p);
978 vm_page_unlock_queues();
979
980 if (p->absent || p->private) {
981
982 /*
983 * For private pages, VM_PAGE_FREE just
984 * leaves the page structure around for
985 * its owner to clean up. For absent
986 * pages, the structure is returned to
987 * the appropriate pool.
988 */
989
990 goto free_page;
991 }
992
993 if (p->fictitious)
994 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
995
996 if (!p->dirty)
997 p->dirty = pmap_is_modified(p->phys_page);
998
999 if ((p->dirty || p->precious) && !p->error && object->alive) {
1000 vm_pageout_cluster(p); /* flush page */
1001 vm_object_paging_wait(object, THREAD_UNINT);
1002 XPR(XPR_VM_OBJECT,
1003 "vm_object_terminate restart, object 0x%X ref %d\n",
1004 (integer_t)object, object->ref_count, 0, 0, 0);
1005 } else {
1006 free_page:
1007 VM_PAGE_FREE(p);
1008 }
1009 }
1010 vm_object_unlock(object);
1011 vm_object_cache_lock();
1012 vm_object_lock(object);
1013 }
1014
1015 /*
1016 * Make sure the object isn't already being terminated
1017 */
1018 if(object->terminating) {
1019 object->ref_count -= 1;
1020 assert(object->ref_count > 0);
1021 vm_object_cache_unlock();
1022 vm_object_unlock(object);
1023 return KERN_FAILURE;
1024 }
1025
1026 /*
1027 * Did somebody get a reference to the object while we were
1028 * cleaning it?
1029 */
1030 if(object->ref_count != 1) {
1031 object->ref_count -= 1;
1032 assert(object->ref_count > 0);
1033 vm_object_res_deallocate(object);
1034 vm_object_cache_unlock();
1035 vm_object_unlock(object);
1036 return KERN_FAILURE;
1037 }
1038
1039 /*
1040 * Make sure no one can look us up now.
1041 */
1042
1043 object->terminating = TRUE;
1044 object->alive = FALSE;
1045 vm_object_remove(object);
1046
1047 /*
1048 * Detach the object from its shadow if we are the shadow's
1049 * copy. The reference we hold on the shadow must be dropped
1050 * by our caller.
1051 */
1052 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1053 !(object->pageout)) {
1054 vm_object_lock(shadow_object);
1055 if (shadow_object->copy == object)
1056 shadow_object->copy = VM_OBJECT_NULL;
1057 vm_object_unlock(shadow_object);
1058 }
1059
1060 if (object->paging_in_progress != 0) {
1061 /*
1062 * There are still some paging_in_progress references
1063 * on this object, meaning that there are some paging
1064 * or other I/O operations in progress for this VM object.
1065 * Such operations take some paging_in_progress references
1066 * up front to ensure that the object doesn't go away, but
1067 * they may also need to acquire a reference on the VM object,
1068 * to map it in kernel space, for example. That means that
1069 * they may end up releasing the last reference on the VM
1070 * object, triggering its termination, while still holding
1071 * paging_in_progress references. Waiting for these
1072 * pending paging_in_progress references to go away here would
1073 * deadlock.
1074 *
1075 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1076 * complete the VM object termination if it still holds
1077 * paging_in_progress references at this point.
1078 *
1079 * No new paging_in_progress should appear now that the
1080 * VM object is "terminating" and not "alive".
1081 */
1082 vm_object_reap_async(object);
1083 vm_object_cache_unlock();
1084 vm_object_unlock(object);
1085 /*
1086 * Return KERN_FAILURE to let the caller know that we
1087 * haven't completed the termination and it can't drop this
1088 * object's reference on its shadow object yet.
1089 * The reaper thread will take care of that once it has
1090 * completed this object's termination.
1091 */
1092 return KERN_FAILURE;
1093 }
1094
1095 /* complete the VM object termination */
1096 vm_object_reap(object);
1097 object = VM_OBJECT_NULL;
1098 /* cache lock and object lock were released by vm_object_reap() */
1099
1100 return KERN_SUCCESS;
1101 }
1102
1103 /*
1104 * vm_object_reap():
1105 *
1106 * Complete the termination of a VM object after it's been marked
1107 * as "terminating" and "!alive" by vm_object_terminate().
1108 *
1109 * The VM object cache and the VM object must be locked by caller.
1110 * The locks will be released on return and the VM object is no longer valid.
1111 */
1112 void
1113 vm_object_reap(
1114 vm_object_t object)
1115 {
1116 memory_object_t pager;
1117 vm_page_t p;
1118
1119 #if DEBUG
1120 mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
1121 mutex_assert(&object->Lock, MA_OWNED);
1122 #endif /* DEBUG */
1123
1124 vm_object_reap_count++;
1125
1126 /*
1127 * The pageout daemon might be playing with our pages.
1128 * Now that the object is dead, it won't touch any more
1129 * pages, but some pages might already be on their way out.
1130 * Hence, we wait until the active paging activities have
1131 * ceased before we break the association with the pager
1132 * itself.
1133 */
1134 while (object->paging_in_progress != 0) {
1135 vm_object_cache_unlock();
1136 vm_object_wait(object,
1137 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1138 THREAD_UNINT);
1139 vm_object_cache_lock();
1140 vm_object_lock(object);
1141 }
1142
1143 assert(object->paging_in_progress == 0);
1144 pager = object->pager;
1145 object->pager = MEMORY_OBJECT_NULL;
1146
1147 if (pager != MEMORY_OBJECT_NULL)
1148 memory_object_control_disable(object->pager_control);
1149 vm_object_cache_unlock();
1150
1151 object->ref_count--;
1152 #if TASK_SWAPPER
1153 assert(object->res_count == 0);
1154 #endif /* TASK_SWAPPER */
1155
1156 assert (object->ref_count == 0);
1157
1158 /*
1159 * Clean or free the pages, as appropriate.
1160 * It is possible for us to find busy/absent pages,
1161 * if some faults on this object were aborted.
1162 */
1163 if (object->pageout) {
1164 assert(object->shadow != VM_OBJECT_NULL);
1165
1166 vm_pageout_object_terminate(object);
1167
1168 } else if ((object->temporary && !object->can_persist) ||
1169 (pager == MEMORY_OBJECT_NULL)) {
1170 while (!queue_empty(&object->memq)) {
1171 p = (vm_page_t) queue_first(&object->memq);
1172
1173 VM_PAGE_CHECK(p);
1174 VM_PAGE_FREE(p);
1175 }
1176 } else if (!queue_empty(&object->memq)) {
1177 panic("vm_object_reap: queue just emptied isn't");
1178 }
1179
1180 assert(object->paging_in_progress == 0);
1181 assert(object->ref_count == 0);
1182
1183 /*
1184 * If the pager has not already been released by
1185 * vm_object_destroy, we need to terminate it and
1186 * release our reference to it here.
1187 */
1188 if (pager != MEMORY_OBJECT_NULL) {
1189 vm_object_unlock(object);
1190 vm_object_release_pager(pager);
1191 vm_object_lock(object);
1192 }
1193
1194 /* kick off anyone waiting on terminating */
1195 object->terminating = FALSE;
1196 vm_object_paging_begin(object);
1197 vm_object_paging_end(object);
1198 vm_object_unlock(object);
1199
1200 #if MACH_PAGEMAP
1201 vm_external_destroy(object->existence_map, object->size);
1202 #endif /* MACH_PAGEMAP */
1203
1204 object->shadow = VM_OBJECT_NULL;
1205
1206 /*
1207 * Free the space for the object.
1208 */
1209 zfree(vm_object_zone, object);
1210 object = VM_OBJECT_NULL;
1211 }
1212
1213 void
1214 vm_object_reap_async(
1215 vm_object_t object)
1216 {
1217 #if DEBUG
1218 mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
1219 mutex_assert(&object->Lock, MA_OWNED);
1220 #endif /* DEBUG */
1221
1222 vm_object_reap_count_async++;
1223
1224 /* enqueue the VM object... */
1225 queue_enter(&vm_object_reaper_queue, object,
1226 vm_object_t, cached_list);
1227 /* ... and wake up the reaper thread */
1228 thread_wakeup((event_t) &vm_object_reaper_queue);
1229 }
1230
1231 void
1232 vm_object_reaper_thread(void)
1233 {
1234 vm_object_t object, shadow_object;
1235
1236 vm_object_cache_lock();
1237
1238 while (!queue_empty(&vm_object_reaper_queue)) {
1239 queue_remove_first(&vm_object_reaper_queue,
1240 object,
1241 vm_object_t,
1242 cached_list);
1243 vm_object_lock(object);
1244 assert(object->terminating);
1245 assert(!object->alive);
1246
1247 shadow_object =
1248 object->pageout ? VM_OBJECT_NULL : object->shadow;
1249
1250 vm_object_reap(object);
1251 /* cache is unlocked and object is no longer valid */
1252 object = VM_OBJECT_NULL;
1253
1254 if (shadow_object != VM_OBJECT_NULL) {
1255 /*
1256 * Drop the reference "object" was holding on
1257 * its shadow object.
1258 */
1259 vm_object_deallocate(shadow_object);
1260 shadow_object = VM_OBJECT_NULL;
1261 }
1262
1263 vm_object_cache_lock();
1264 }
1265
1266 /* wait for more work... */
1267 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT);
1268 vm_object_cache_unlock();
1269 thread_block((thread_continue_t) vm_object_reaper_thread);
1270 /*NOTREACHED*/
1271 }
1272
1273 /*
1274 * Routine: vm_object_pager_wakeup
1275 * Purpose: Wake up anyone waiting for termination of a pager.
1276 */
1277
1278 static void
1279 vm_object_pager_wakeup(
1280 memory_object_t pager)
1281 {
1282 vm_object_hash_entry_t entry;
1283 boolean_t waiting = FALSE;
1284
1285 /*
1286 * If anyone was waiting for the memory_object_terminate
1287 * to be queued, wake them up now.
1288 */
1289 vm_object_cache_lock();
1290 entry = vm_object_hash_lookup(pager, TRUE);
1291 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1292 waiting = entry->waiting;
1293 vm_object_cache_unlock();
1294 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1295 if (waiting)
1296 thread_wakeup((event_t) pager);
1297 vm_object_hash_entry_free(entry);
1298 }
1299 }
1300
1301 /*
1302 * Routine: vm_object_release_pager
1303 * Purpose: Terminate the pager and, upon completion,
1304 * release our last reference to it.
1305 * just like memory_object_terminate, except
1306 * that we wake up anyone blocked in vm_object_enter
1307 * waiting for termination message to be queued
1308 * before calling memory_object_init.
1309 */
1310 static void
1311 vm_object_release_pager(
1312 memory_object_t pager)
1313 {
1314
1315 /*
1316 * Terminate the pager.
1317 */
1318
1319 (void) memory_object_terminate(pager);
1320
1321 /*
1322 * Wakeup anyone waiting for this terminate
1323 */
1324 vm_object_pager_wakeup(pager);
1325
1326 /*
1327 * Release reference to pager.
1328 */
1329 memory_object_deallocate(pager);
1330 }
1331
1332 /*
1333 * Routine: vm_object_destroy
1334 * Purpose:
1335 * Shut down a VM object, despite the
1336 * presence of address map (or other) references
1337 * to the vm_object.
1338 */
1339 kern_return_t
1340 vm_object_destroy(
1341 vm_object_t object,
1342 __unused kern_return_t reason)
1343 {
1344 memory_object_t old_pager;
1345
1346 if (object == VM_OBJECT_NULL)
1347 return(KERN_SUCCESS);
1348
1349 /*
1350 * Remove the pager association immediately.
1351 *
1352 * This will prevent the memory manager from further
1353 * meddling. [If it wanted to flush data or make
1354 * other changes, it should have done so before performing
1355 * the destroy call.]
1356 */
1357
1358 vm_object_cache_lock();
1359 vm_object_lock(object);
1360 object->can_persist = FALSE;
1361 object->named = FALSE;
1362 object->alive = FALSE;
1363
1364 /*
1365 * Rip out the pager from the vm_object now...
1366 */
1367
1368 vm_object_remove(object);
1369 old_pager = object->pager;
1370 object->pager = MEMORY_OBJECT_NULL;
1371 if (old_pager != MEMORY_OBJECT_NULL)
1372 memory_object_control_disable(object->pager_control);
1373 vm_object_cache_unlock();
1374
1375 /*
1376 * Wait for the existing paging activity (that got
1377 * through before we nulled out the pager) to subside.
1378 */
1379
1380 vm_object_paging_wait(object, THREAD_UNINT);
1381 vm_object_unlock(object);
1382
1383 /*
1384 * Terminate the object now.
1385 */
1386 if (old_pager != MEMORY_OBJECT_NULL) {
1387 vm_object_release_pager(old_pager);
1388
1389 /*
1390 * JMM - Release the caller's reference. This assumes the
1391 * caller had a reference to release, which is a big (but
1392 * currently valid) assumption if this is driven from the
1393 * vnode pager (it is holding a named reference when making
1394 * this call)..
1395 */
1396 vm_object_deallocate(object);
1397
1398 }
1399 return(KERN_SUCCESS);
1400 }
1401
1402 /*
1403 * vm_object_deactivate_pages
1404 *
1405 * Deactivate all pages in the specified object. (Keep its pages
1406 * in memory even though it is no longer referenced.)
1407 *
1408 * The object must be locked.
1409 */
1410 static void
1411 vm_object_deactivate_all_pages(
1412 register vm_object_t object)
1413 {
1414 register vm_page_t p;
1415
1416 queue_iterate(&object->memq, p, vm_page_t, listq) {
1417 vm_page_lock_queues();
1418 if (!p->busy)
1419 vm_page_deactivate(p);
1420 vm_page_unlock_queues();
1421 }
1422 }
1423
1424 __private_extern__ void
1425 vm_object_deactivate_pages(
1426 vm_object_t object,
1427 vm_object_offset_t offset,
1428 vm_object_size_t size,
1429 boolean_t kill_page)
1430 {
1431 vm_object_t orig_object;
1432 int pages_moved = 0;
1433 int pages_found = 0;
1434
1435 /*
1436 * entered with object lock held, acquire a paging reference to
1437 * prevent the memory_object and control ports from
1438 * being destroyed.
1439 */
1440 orig_object = object;
1441
1442 for (;;) {
1443 register vm_page_t m;
1444 vm_object_offset_t toffset;
1445 vm_object_size_t tsize;
1446
1447 vm_object_paging_begin(object);
1448 vm_page_lock_queues();
1449
1450 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1451
1452 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1453
1454 pages_found++;
1455
1456 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1457
1458 assert(!m->laundry);
1459
1460 m->reference = FALSE;
1461 pmap_clear_reference(m->phys_page);
1462
1463 if ((kill_page) && (object->internal)) {
1464 m->precious = FALSE;
1465 m->dirty = FALSE;
1466 pmap_clear_modify(m->phys_page);
1467 vm_external_state_clr(object->existence_map, offset);
1468 }
1469 VM_PAGE_QUEUES_REMOVE(m);
1470
1471 assert(!m->laundry);
1472 assert(m->object != kernel_object);
1473 assert(m->pageq.next == NULL &&
1474 m->pageq.prev == NULL);
1475 if(m->zero_fill) {
1476 queue_enter_first(
1477 &vm_page_queue_zf,
1478 m, vm_page_t, pageq);
1479 } else {
1480 queue_enter_first(
1481 &vm_page_queue_inactive,
1482 m, vm_page_t, pageq);
1483 }
1484
1485 m->inactive = TRUE;
1486 if (!m->fictitious)
1487 vm_page_inactive_count++;
1488
1489 pages_moved++;
1490 }
1491 }
1492 }
1493 vm_page_unlock_queues();
1494 vm_object_paging_end(object);
1495
1496 if (object->shadow) {
1497 vm_object_t tmp_object;
1498
1499 kill_page = 0;
1500
1501 offset += object->shadow_offset;
1502
1503 tmp_object = object->shadow;
1504 vm_object_lock(tmp_object);
1505
1506 if (object != orig_object)
1507 vm_object_unlock(object);
1508 object = tmp_object;
1509 } else
1510 break;
1511 }
1512 if (object != orig_object)
1513 vm_object_unlock(object);
1514 }
1515
1516 /*
1517 * Routine: vm_object_pmap_protect
1518 *
1519 * Purpose:
1520 * Reduces the permission for all physical
1521 * pages in the specified object range.
1522 *
1523 * If removing write permission only, it is
1524 * sufficient to protect only the pages in
1525 * the top-level object; only those pages may
1526 * have write permission.
1527 *
1528 * If removing all access, we must follow the
1529 * shadow chain from the top-level object to
1530 * remove access to all pages in shadowed objects.
1531 *
1532 * The object must *not* be locked. The object must
1533 * be temporary/internal.
1534 *
1535 * If pmap is not NULL, this routine assumes that
1536 * the only mappings for the pages are in that
1537 * pmap.
1538 */
1539
1540 __private_extern__ void
1541 vm_object_pmap_protect(
1542 register vm_object_t object,
1543 register vm_object_offset_t offset,
1544 vm_object_size_t size,
1545 pmap_t pmap,
1546 vm_map_offset_t pmap_start,
1547 vm_prot_t prot)
1548 {
1549 if (object == VM_OBJECT_NULL)
1550 return;
1551 size = vm_object_round_page(size);
1552 offset = vm_object_trunc_page(offset);
1553
1554 vm_object_lock(object);
1555
1556 assert(object->internal);
1557
1558 while (TRUE) {
1559 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1560 vm_object_unlock(object);
1561 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1562 return;
1563 }
1564
1565 /* if we are doing large ranges with respect to resident */
1566 /* page count then we should interate over pages otherwise */
1567 /* inverse page look-up will be faster */
1568 if (ptoa_64(object->resident_page_count / 4) < size) {
1569 vm_page_t p;
1570 vm_object_offset_t end;
1571
1572 end = offset + size;
1573
1574 if (pmap != PMAP_NULL) {
1575 queue_iterate(&object->memq, p, vm_page_t, listq) {
1576 if (!p->fictitious &&
1577 (offset <= p->offset) && (p->offset < end)) {
1578 vm_map_offset_t start;
1579
1580 start = pmap_start + p->offset - offset;
1581 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1582 }
1583 }
1584 } else {
1585 queue_iterate(&object->memq, p, vm_page_t, listq) {
1586 if (!p->fictitious &&
1587 (offset <= p->offset) && (p->offset < end)) {
1588
1589 pmap_page_protect(p->phys_page,
1590 prot & ~p->page_lock);
1591 }
1592 }
1593 }
1594 } else {
1595 vm_page_t p;
1596 vm_object_offset_t end;
1597 vm_object_offset_t target_off;
1598
1599 end = offset + size;
1600
1601 if (pmap != PMAP_NULL) {
1602 for(target_off = offset;
1603 target_off < end;
1604 target_off += PAGE_SIZE) {
1605 p = vm_page_lookup(object, target_off);
1606 if (p != VM_PAGE_NULL) {
1607 vm_offset_t start;
1608 start = pmap_start +
1609 (vm_offset_t)(p->offset - offset);
1610 pmap_protect(pmap, start,
1611 start + PAGE_SIZE, prot);
1612 }
1613 }
1614 } else {
1615 for(target_off = offset;
1616 target_off < end; target_off += PAGE_SIZE) {
1617 p = vm_page_lookup(object, target_off);
1618 if (p != VM_PAGE_NULL) {
1619 pmap_page_protect(p->phys_page,
1620 prot & ~p->page_lock);
1621 }
1622 }
1623 }
1624 }
1625
1626 if (prot == VM_PROT_NONE) {
1627 /*
1628 * Must follow shadow chain to remove access
1629 * to pages in shadowed objects.
1630 */
1631 register vm_object_t next_object;
1632
1633 next_object = object->shadow;
1634 if (next_object != VM_OBJECT_NULL) {
1635 offset += object->shadow_offset;
1636 vm_object_lock(next_object);
1637 vm_object_unlock(object);
1638 object = next_object;
1639 }
1640 else {
1641 /*
1642 * End of chain - we are done.
1643 */
1644 break;
1645 }
1646 }
1647 else {
1648 /*
1649 * Pages in shadowed objects may never have
1650 * write permission - we may stop here.
1651 */
1652 break;
1653 }
1654 }
1655
1656 vm_object_unlock(object);
1657 }
1658
1659 /*
1660 * Routine: vm_object_copy_slowly
1661 *
1662 * Description:
1663 * Copy the specified range of the source
1664 * virtual memory object without using
1665 * protection-based optimizations (such
1666 * as copy-on-write). The pages in the
1667 * region are actually copied.
1668 *
1669 * In/out conditions:
1670 * The caller must hold a reference and a lock
1671 * for the source virtual memory object. The source
1672 * object will be returned *unlocked*.
1673 *
1674 * Results:
1675 * If the copy is completed successfully, KERN_SUCCESS is
1676 * returned. If the caller asserted the interruptible
1677 * argument, and an interruption occurred while waiting
1678 * for a user-generated event, MACH_SEND_INTERRUPTED is
1679 * returned. Other values may be returned to indicate
1680 * hard errors during the copy operation.
1681 *
1682 * A new virtual memory object is returned in a
1683 * parameter (_result_object). The contents of this
1684 * new object, starting at a zero offset, are a copy
1685 * of the source memory region. In the event of
1686 * an error, this parameter will contain the value
1687 * VM_OBJECT_NULL.
1688 */
1689 __private_extern__ kern_return_t
1690 vm_object_copy_slowly(
1691 register vm_object_t src_object,
1692 vm_object_offset_t src_offset,
1693 vm_object_size_t size,
1694 boolean_t interruptible,
1695 vm_object_t *_result_object) /* OUT */
1696 {
1697 vm_object_t new_object;
1698 vm_object_offset_t new_offset;
1699
1700 vm_object_offset_t src_lo_offset = src_offset;
1701 vm_object_offset_t src_hi_offset = src_offset + size;
1702
1703 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1704 src_object, src_offset, size, 0, 0);
1705
1706 if (size == 0) {
1707 vm_object_unlock(src_object);
1708 *_result_object = VM_OBJECT_NULL;
1709 return(KERN_INVALID_ARGUMENT);
1710 }
1711
1712 /*
1713 * Prevent destruction of the source object while we copy.
1714 */
1715
1716 assert(src_object->ref_count > 0);
1717 src_object->ref_count++;
1718 VM_OBJ_RES_INCR(src_object);
1719 vm_object_unlock(src_object);
1720
1721 /*
1722 * Create a new object to hold the copied pages.
1723 * A few notes:
1724 * We fill the new object starting at offset 0,
1725 * regardless of the input offset.
1726 * We don't bother to lock the new object within
1727 * this routine, since we have the only reference.
1728 */
1729
1730 new_object = vm_object_allocate(size);
1731 new_offset = 0;
1732 vm_object_lock(new_object);
1733
1734 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1735
1736 for ( ;
1737 size != 0 ;
1738 src_offset += PAGE_SIZE_64,
1739 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1740 ) {
1741 vm_page_t new_page;
1742 vm_fault_return_t result;
1743
1744 while ((new_page = vm_page_alloc(new_object, new_offset))
1745 == VM_PAGE_NULL) {
1746 if (!vm_page_wait(interruptible)) {
1747 vm_object_unlock(new_object);
1748 vm_object_deallocate(new_object);
1749 vm_object_deallocate(src_object);
1750 *_result_object = VM_OBJECT_NULL;
1751 return(MACH_SEND_INTERRUPTED);
1752 }
1753 }
1754
1755 do {
1756 vm_prot_t prot = VM_PROT_READ;
1757 vm_page_t _result_page;
1758 vm_page_t top_page;
1759 register
1760 vm_page_t result_page;
1761 kern_return_t error_code;
1762
1763 vm_object_lock(src_object);
1764 vm_object_paging_begin(src_object);
1765
1766 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1767 result = vm_fault_page(src_object, src_offset,
1768 VM_PROT_READ, FALSE, interruptible,
1769 src_lo_offset, src_hi_offset,
1770 VM_BEHAVIOR_SEQUENTIAL,
1771 &prot, &_result_page, &top_page,
1772 (int *)0,
1773 &error_code, FALSE, FALSE, NULL, 0);
1774
1775 switch(result) {
1776 case VM_FAULT_SUCCESS:
1777 result_page = _result_page;
1778
1779 /*
1780 * We don't need to hold the object
1781 * lock -- the busy page will be enough.
1782 * [We don't care about picking up any
1783 * new modifications.]
1784 *
1785 * Copy the page to the new object.
1786 *
1787 * POLICY DECISION:
1788 * If result_page is clean,
1789 * we could steal it instead
1790 * of copying.
1791 */
1792
1793 vm_object_unlock(result_page->object);
1794 vm_page_copy(result_page, new_page);
1795
1796 /*
1797 * Let go of both pages (make them
1798 * not busy, perform wakeup, activate).
1799 */
1800
1801 new_page->busy = FALSE;
1802 new_page->dirty = TRUE;
1803 vm_object_lock(result_page->object);
1804 PAGE_WAKEUP_DONE(result_page);
1805
1806 vm_page_lock_queues();
1807 if (!result_page->active &&
1808 !result_page->inactive)
1809 vm_page_activate(result_page);
1810 vm_page_activate(new_page);
1811 vm_page_unlock_queues();
1812
1813 /*
1814 * Release paging references and
1815 * top-level placeholder page, if any.
1816 */
1817
1818 vm_fault_cleanup(result_page->object,
1819 top_page);
1820
1821 break;
1822
1823 case VM_FAULT_RETRY:
1824 break;
1825
1826 case VM_FAULT_FICTITIOUS_SHORTAGE:
1827 vm_page_more_fictitious();
1828 break;
1829
1830 case VM_FAULT_MEMORY_SHORTAGE:
1831 if (vm_page_wait(interruptible))
1832 break;
1833 /* fall thru */
1834
1835 case VM_FAULT_INTERRUPTED:
1836 vm_page_free(new_page);
1837 vm_object_unlock(new_object);
1838 vm_object_deallocate(new_object);
1839 vm_object_deallocate(src_object);
1840 *_result_object = VM_OBJECT_NULL;
1841 return(MACH_SEND_INTERRUPTED);
1842
1843 case VM_FAULT_MEMORY_ERROR:
1844 /*
1845 * A policy choice:
1846 * (a) ignore pages that we can't
1847 * copy
1848 * (b) return the null object if
1849 * any page fails [chosen]
1850 */
1851
1852 vm_page_lock_queues();
1853 vm_page_free(new_page);
1854 vm_page_unlock_queues();
1855 vm_object_unlock(new_object);
1856 vm_object_deallocate(new_object);
1857 vm_object_deallocate(src_object);
1858 *_result_object = VM_OBJECT_NULL;
1859 return(error_code ? error_code:
1860 KERN_MEMORY_ERROR);
1861 }
1862 } while (result != VM_FAULT_SUCCESS);
1863 }
1864
1865 /*
1866 * Lose the extra reference, and return our object.
1867 */
1868
1869 vm_object_unlock(new_object);
1870 vm_object_deallocate(src_object);
1871 *_result_object = new_object;
1872 return(KERN_SUCCESS);
1873 }
1874
1875 /*
1876 * Routine: vm_object_copy_quickly
1877 *
1878 * Purpose:
1879 * Copy the specified range of the source virtual
1880 * memory object, if it can be done without waiting
1881 * for user-generated events.
1882 *
1883 * Results:
1884 * If the copy is successful, the copy is returned in
1885 * the arguments; otherwise, the arguments are not
1886 * affected.
1887 *
1888 * In/out conditions:
1889 * The object should be unlocked on entry and exit.
1890 */
1891
1892 /*ARGSUSED*/
1893 __private_extern__ boolean_t
1894 vm_object_copy_quickly(
1895 vm_object_t *_object, /* INOUT */
1896 __unused vm_object_offset_t offset, /* IN */
1897 __unused vm_object_size_t size, /* IN */
1898 boolean_t *_src_needs_copy, /* OUT */
1899 boolean_t *_dst_needs_copy) /* OUT */
1900 {
1901 vm_object_t object = *_object;
1902 memory_object_copy_strategy_t copy_strategy;
1903
1904 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1905 *_object, offset, size, 0, 0);
1906 if (object == VM_OBJECT_NULL) {
1907 *_src_needs_copy = FALSE;
1908 *_dst_needs_copy = FALSE;
1909 return(TRUE);
1910 }
1911
1912 vm_object_lock(object);
1913
1914 copy_strategy = object->copy_strategy;
1915
1916 switch (copy_strategy) {
1917 case MEMORY_OBJECT_COPY_SYMMETRIC:
1918
1919 /*
1920 * Symmetric copy strategy.
1921 * Make another reference to the object.
1922 * Leave object/offset unchanged.
1923 */
1924
1925 assert(object->ref_count > 0);
1926 object->ref_count++;
1927 vm_object_res_reference(object);
1928 object->shadowed = TRUE;
1929 vm_object_unlock(object);
1930
1931 /*
1932 * Both source and destination must make
1933 * shadows, and the source must be made
1934 * read-only if not already.
1935 */
1936
1937 *_src_needs_copy = TRUE;
1938 *_dst_needs_copy = TRUE;
1939
1940 break;
1941
1942 case MEMORY_OBJECT_COPY_DELAY:
1943 vm_object_unlock(object);
1944 return(FALSE);
1945
1946 default:
1947 vm_object_unlock(object);
1948 return(FALSE);
1949 }
1950 return(TRUE);
1951 }
1952
1953 static int copy_call_count = 0;
1954 static int copy_call_sleep_count = 0;
1955 static int copy_call_restart_count = 0;
1956
1957 /*
1958 * Routine: vm_object_copy_call [internal]
1959 *
1960 * Description:
1961 * Copy the source object (src_object), using the
1962 * user-managed copy algorithm.
1963 *
1964 * In/out conditions:
1965 * The source object must be locked on entry. It
1966 * will be *unlocked* on exit.
1967 *
1968 * Results:
1969 * If the copy is successful, KERN_SUCCESS is returned.
1970 * A new object that represents the copied virtual
1971 * memory is returned in a parameter (*_result_object).
1972 * If the return value indicates an error, this parameter
1973 * is not valid.
1974 */
1975 static kern_return_t
1976 vm_object_copy_call(
1977 vm_object_t src_object,
1978 vm_object_offset_t src_offset,
1979 vm_object_size_t size,
1980 vm_object_t *_result_object) /* OUT */
1981 {
1982 kern_return_t kr;
1983 vm_object_t copy;
1984 boolean_t check_ready = FALSE;
1985
1986 /*
1987 * If a copy is already in progress, wait and retry.
1988 *
1989 * XXX
1990 * Consider making this call interruptable, as Mike
1991 * intended it to be.
1992 *
1993 * XXXO
1994 * Need a counter or version or something to allow
1995 * us to use the copy that the currently requesting
1996 * thread is obtaining -- is it worth adding to the
1997 * vm object structure? Depends how common this case it.
1998 */
1999 copy_call_count++;
2000 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
2001 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
2002 THREAD_UNINT);
2003 copy_call_restart_count++;
2004 }
2005
2006 /*
2007 * Indicate (for the benefit of memory_object_create_copy)
2008 * that we want a copy for src_object. (Note that we cannot
2009 * do a real assert_wait before calling memory_object_copy,
2010 * so we simply set the flag.)
2011 */
2012
2013 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
2014 vm_object_unlock(src_object);
2015
2016 /*
2017 * Ask the memory manager to give us a memory object
2018 * which represents a copy of the src object.
2019 * The memory manager may give us a memory object
2020 * which we already have, or it may give us a
2021 * new memory object. This memory object will arrive
2022 * via memory_object_create_copy.
2023 */
2024
2025 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
2026 if (kr != KERN_SUCCESS) {
2027 return kr;
2028 }
2029
2030 /*
2031 * Wait for the copy to arrive.
2032 */
2033 vm_object_lock(src_object);
2034 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
2035 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
2036 THREAD_UNINT);
2037 copy_call_sleep_count++;
2038 }
2039 Retry:
2040 assert(src_object->copy != VM_OBJECT_NULL);
2041 copy = src_object->copy;
2042 if (!vm_object_lock_try(copy)) {
2043 vm_object_unlock(src_object);
2044 mutex_pause(); /* wait a bit */
2045 vm_object_lock(src_object);
2046 goto Retry;
2047 }
2048 if (copy->size < src_offset+size)
2049 copy->size = src_offset+size;
2050
2051 if (!copy->pager_ready)
2052 check_ready = TRUE;
2053
2054 /*
2055 * Return the copy.
2056 */
2057 *_result_object = copy;
2058 vm_object_unlock(copy);
2059 vm_object_unlock(src_object);
2060
2061 /* Wait for the copy to be ready. */
2062 if (check_ready == TRUE) {
2063 vm_object_lock(copy);
2064 while (!copy->pager_ready) {
2065 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
2066 }
2067 vm_object_unlock(copy);
2068 }
2069
2070 return KERN_SUCCESS;
2071 }
2072
2073 static int copy_delayed_lock_collisions = 0;
2074 static int copy_delayed_max_collisions = 0;
2075 static int copy_delayed_lock_contention = 0;
2076 static int copy_delayed_protect_iterate = 0;
2077
2078 /*
2079 * Routine: vm_object_copy_delayed [internal]
2080 *
2081 * Description:
2082 * Copy the specified virtual memory object, using
2083 * the asymmetric copy-on-write algorithm.
2084 *
2085 * In/out conditions:
2086 * The src_object must be locked on entry. It will be unlocked
2087 * on exit - so the caller must also hold a reference to it.
2088 *
2089 * This routine will not block waiting for user-generated
2090 * events. It is not interruptible.
2091 */
2092 __private_extern__ vm_object_t
2093 vm_object_copy_delayed(
2094 vm_object_t src_object,
2095 vm_object_offset_t src_offset,
2096 vm_object_size_t size)
2097 {
2098 vm_object_t new_copy = VM_OBJECT_NULL;
2099 vm_object_t old_copy;
2100 vm_page_t p;
2101 vm_object_size_t copy_size = src_offset + size;
2102
2103 int collisions = 0;
2104 /*
2105 * The user-level memory manager wants to see all of the changes
2106 * to this object, but it has promised not to make any changes on
2107 * its own.
2108 *
2109 * Perform an asymmetric copy-on-write, as follows:
2110 * Create a new object, called a "copy object" to hold
2111 * pages modified by the new mapping (i.e., the copy,
2112 * not the original mapping).
2113 * Record the original object as the backing object for
2114 * the copy object. If the original mapping does not
2115 * change a page, it may be used read-only by the copy.
2116 * Record the copy object in the original object.
2117 * When the original mapping causes a page to be modified,
2118 * it must be copied to a new page that is "pushed" to
2119 * the copy object.
2120 * Mark the new mapping (the copy object) copy-on-write.
2121 * This makes the copy object itself read-only, allowing
2122 * it to be reused if the original mapping makes no
2123 * changes, and simplifying the synchronization required
2124 * in the "push" operation described above.
2125 *
2126 * The copy-on-write is said to be assymetric because the original
2127 * object is *not* marked copy-on-write. A copied page is pushed
2128 * to the copy object, regardless which party attempted to modify
2129 * the page.
2130 *
2131 * Repeated asymmetric copy operations may be done. If the
2132 * original object has not been changed since the last copy, its
2133 * copy object can be reused. Otherwise, a new copy object can be
2134 * inserted between the original object and its previous copy
2135 * object. Since any copy object is read-only, this cannot affect
2136 * affect the contents of the previous copy object.
2137 *
2138 * Note that a copy object is higher in the object tree than the
2139 * original object; therefore, use of the copy object recorded in
2140 * the original object must be done carefully, to avoid deadlock.
2141 */
2142
2143 Retry:
2144
2145 /*
2146 * Wait for paging in progress.
2147 */
2148 if (!src_object->true_share)
2149 vm_object_paging_wait(src_object, THREAD_UNINT);
2150
2151 /*
2152 * See whether we can reuse the result of a previous
2153 * copy operation.
2154 */
2155
2156 old_copy = src_object->copy;
2157 if (old_copy != VM_OBJECT_NULL) {
2158 /*
2159 * Try to get the locks (out of order)
2160 */
2161 if (!vm_object_lock_try(old_copy)) {
2162 vm_object_unlock(src_object);
2163 mutex_pause();
2164
2165 /* Heisenberg Rules */
2166 copy_delayed_lock_collisions++;
2167 if (collisions++ == 0)
2168 copy_delayed_lock_contention++;
2169
2170 if (collisions > copy_delayed_max_collisions)
2171 copy_delayed_max_collisions = collisions;
2172
2173 vm_object_lock(src_object);
2174 goto Retry;
2175 }
2176
2177 /*
2178 * Determine whether the old copy object has
2179 * been modified.
2180 */
2181
2182 if (old_copy->resident_page_count == 0 &&
2183 !old_copy->pager_created) {
2184 /*
2185 * It has not been modified.
2186 *
2187 * Return another reference to
2188 * the existing copy-object if
2189 * we can safely grow it (if
2190 * needed).
2191 */
2192
2193 if (old_copy->size < copy_size) {
2194 /*
2195 * We can't perform a delayed copy if any of the
2196 * pages in the extended range are wired (because
2197 * we can't safely take write permission away from
2198 * wired pages). If the pages aren't wired, then
2199 * go ahead and protect them.
2200 */
2201 copy_delayed_protect_iterate++;
2202 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2203 if (!p->fictitious &&
2204 p->offset >= old_copy->size &&
2205 p->offset < copy_size) {
2206 if (p->wire_count > 0) {
2207 vm_object_unlock(old_copy);
2208 vm_object_unlock(src_object);
2209
2210 if (new_copy != VM_OBJECT_NULL) {
2211 vm_object_unlock(new_copy);
2212 vm_object_deallocate(new_copy);
2213 }
2214
2215 return VM_OBJECT_NULL;
2216 } else {
2217 pmap_page_protect(p->phys_page,
2218 (VM_PROT_ALL & ~VM_PROT_WRITE &
2219 ~p->page_lock));
2220 }
2221 }
2222 }
2223 old_copy->size = copy_size;
2224 }
2225
2226 vm_object_reference_locked(old_copy);
2227 vm_object_unlock(old_copy);
2228 vm_object_unlock(src_object);
2229
2230 if (new_copy != VM_OBJECT_NULL) {
2231 vm_object_unlock(new_copy);
2232 vm_object_deallocate(new_copy);
2233 }
2234
2235 return(old_copy);
2236 }
2237
2238 /*
2239 * Adjust the size argument so that the newly-created
2240 * copy object will be large enough to back either the
2241 * old copy object or the new mapping.
2242 */
2243 if (old_copy->size > copy_size)
2244 copy_size = old_copy->size;
2245
2246 if (new_copy == VM_OBJECT_NULL) {
2247 vm_object_unlock(old_copy);
2248 vm_object_unlock(src_object);
2249 new_copy = vm_object_allocate(copy_size);
2250 vm_object_lock(src_object);
2251 vm_object_lock(new_copy);
2252 goto Retry;
2253 }
2254 new_copy->size = copy_size;
2255
2256 /*
2257 * The copy-object is always made large enough to
2258 * completely shadow the original object, since
2259 * it may have several users who want to shadow
2260 * the original object at different points.
2261 */
2262
2263 assert((old_copy->shadow == src_object) &&
2264 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2265
2266 } else if (new_copy == VM_OBJECT_NULL) {
2267 vm_object_unlock(src_object);
2268 new_copy = vm_object_allocate(copy_size);
2269 vm_object_lock(src_object);
2270 vm_object_lock(new_copy);
2271 goto Retry;
2272 }
2273
2274 /*
2275 * We now have the src object locked, and the new copy object
2276 * allocated and locked (and potentially the old copy locked).
2277 * Before we go any further, make sure we can still perform
2278 * a delayed copy, as the situation may have changed.
2279 *
2280 * Specifically, we can't perform a delayed copy if any of the
2281 * pages in the range are wired (because we can't safely take
2282 * write permission away from wired pages). If the pages aren't
2283 * wired, then go ahead and protect them.
2284 */
2285 copy_delayed_protect_iterate++;
2286 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2287 if (!p->fictitious && p->offset < copy_size) {
2288 if (p->wire_count > 0) {
2289 if (old_copy)
2290 vm_object_unlock(old_copy);
2291 vm_object_unlock(src_object);
2292 vm_object_unlock(new_copy);
2293 vm_object_deallocate(new_copy);
2294 return VM_OBJECT_NULL;
2295 } else {
2296 pmap_page_protect(p->phys_page,
2297 (VM_PROT_ALL & ~VM_PROT_WRITE &
2298 ~p->page_lock));
2299 }
2300 }
2301 }
2302
2303 if (old_copy != VM_OBJECT_NULL) {
2304 /*
2305 * Make the old copy-object shadow the new one.
2306 * It will receive no more pages from the original
2307 * object.
2308 */
2309
2310 src_object->ref_count--; /* remove ref. from old_copy */
2311 assert(src_object->ref_count > 0);
2312 old_copy->shadow = new_copy;
2313 assert(new_copy->ref_count > 0);
2314 new_copy->ref_count++; /* for old_copy->shadow ref. */
2315
2316 #if TASK_SWAPPER
2317 if (old_copy->res_count) {
2318 VM_OBJ_RES_INCR(new_copy);
2319 VM_OBJ_RES_DECR(src_object);
2320 }
2321 #endif
2322
2323 vm_object_unlock(old_copy); /* done with old_copy */
2324 }
2325
2326 /*
2327 * Point the new copy at the existing object.
2328 */
2329 new_copy->shadow = src_object;
2330 new_copy->shadow_offset = 0;
2331 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2332 assert(src_object->ref_count > 0);
2333 src_object->ref_count++;
2334 VM_OBJ_RES_INCR(src_object);
2335 src_object->copy = new_copy;
2336 vm_object_unlock(src_object);
2337 vm_object_unlock(new_copy);
2338
2339 XPR(XPR_VM_OBJECT,
2340 "vm_object_copy_delayed: used copy object %X for source %X\n",
2341 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2342
2343 return(new_copy);
2344 }
2345
2346 /*
2347 * Routine: vm_object_copy_strategically
2348 *
2349 * Purpose:
2350 * Perform a copy according to the source object's
2351 * declared strategy. This operation may block,
2352 * and may be interrupted.
2353 */
2354 __private_extern__ kern_return_t
2355 vm_object_copy_strategically(
2356 register vm_object_t src_object,
2357 vm_object_offset_t src_offset,
2358 vm_object_size_t size,
2359 vm_object_t *dst_object, /* OUT */
2360 vm_object_offset_t *dst_offset, /* OUT */
2361 boolean_t *dst_needs_copy) /* OUT */
2362 {
2363 boolean_t result;
2364 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2365 memory_object_copy_strategy_t copy_strategy;
2366
2367 assert(src_object != VM_OBJECT_NULL);
2368
2369 vm_object_lock(src_object);
2370
2371 /*
2372 * The copy strategy is only valid if the memory manager
2373 * is "ready". Internal objects are always ready.
2374 */
2375
2376 while (!src_object->internal && !src_object->pager_ready) {
2377 wait_result_t wait_result;
2378
2379 wait_result = vm_object_sleep( src_object,
2380 VM_OBJECT_EVENT_PAGER_READY,
2381 interruptible);
2382 if (wait_result != THREAD_AWAKENED) {
2383 vm_object_unlock(src_object);
2384 *dst_object = VM_OBJECT_NULL;
2385 *dst_offset = 0;
2386 *dst_needs_copy = FALSE;
2387 return(MACH_SEND_INTERRUPTED);
2388 }
2389 }
2390
2391 copy_strategy = src_object->copy_strategy;
2392
2393 /*
2394 * Use the appropriate copy strategy.
2395 */
2396
2397 switch (copy_strategy) {
2398 case MEMORY_OBJECT_COPY_DELAY:
2399 *dst_object = vm_object_copy_delayed(src_object,
2400 src_offset, size);
2401 if (*dst_object != VM_OBJECT_NULL) {
2402 *dst_offset = src_offset;
2403 *dst_needs_copy = TRUE;
2404 result = KERN_SUCCESS;
2405 break;
2406 }
2407 vm_object_lock(src_object);
2408 /* fall thru when delayed copy not allowed */
2409
2410 case MEMORY_OBJECT_COPY_NONE:
2411 result = vm_object_copy_slowly(src_object, src_offset, size,
2412 interruptible, dst_object);
2413 if (result == KERN_SUCCESS) {
2414 *dst_offset = 0;
2415 *dst_needs_copy = FALSE;
2416 }
2417 break;
2418
2419 case MEMORY_OBJECT_COPY_CALL:
2420 result = vm_object_copy_call(src_object, src_offset, size,
2421 dst_object);
2422 if (result == KERN_SUCCESS) {
2423 *dst_offset = src_offset;
2424 *dst_needs_copy = TRUE;
2425 }
2426 break;
2427
2428 case MEMORY_OBJECT_COPY_SYMMETRIC:
2429 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2430 vm_object_unlock(src_object);
2431 result = KERN_MEMORY_RESTART_COPY;
2432 break;
2433
2434 default:
2435 panic("copy_strategically: bad strategy");
2436 result = KERN_INVALID_ARGUMENT;
2437 }
2438 return(result);
2439 }
2440
2441 /*
2442 * vm_object_shadow:
2443 *
2444 * Create a new object which is backed by the
2445 * specified existing object range. The source
2446 * object reference is deallocated.
2447 *
2448 * The new object and offset into that object
2449 * are returned in the source parameters.
2450 */
2451 boolean_t vm_object_shadow_check = FALSE;
2452
2453 __private_extern__ boolean_t
2454 vm_object_shadow(
2455 vm_object_t *object, /* IN/OUT */
2456 vm_object_offset_t *offset, /* IN/OUT */
2457 vm_object_size_t length)
2458 {
2459 register vm_object_t source;
2460 register vm_object_t result;
2461
2462 source = *object;
2463 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2464
2465 /*
2466 * Determine if we really need a shadow.
2467 */
2468
2469 if (vm_object_shadow_check && source->ref_count == 1 &&
2470 (source->shadow == VM_OBJECT_NULL ||
2471 source->shadow->copy == VM_OBJECT_NULL))
2472 {
2473 source->shadowed = FALSE;
2474 return FALSE;
2475 }
2476
2477 /*
2478 * Allocate a new object with the given length
2479 */
2480
2481 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2482 panic("vm_object_shadow: no object for shadowing");
2483
2484 /*
2485 * The new object shadows the source object, adding
2486 * a reference to it. Our caller changes his reference
2487 * to point to the new object, removing a reference to
2488 * the source object. Net result: no change of reference
2489 * count.
2490 */
2491 result->shadow = source;
2492
2493 /*
2494 * Store the offset into the source object,
2495 * and fix up the offset into the new object.
2496 */
2497
2498 result->shadow_offset = *offset;
2499
2500 /*
2501 * Return the new things
2502 */
2503
2504 *offset = 0;
2505 *object = result;
2506 return TRUE;
2507 }
2508
2509 /*
2510 * The relationship between vm_object structures and
2511 * the memory_object requires careful synchronization.
2512 *
2513 * All associations are created by memory_object_create_named
2514 * for external pagers and vm_object_pager_create for internal
2515 * objects as follows:
2516 *
2517 * pager: the memory_object itself, supplied by
2518 * the user requesting a mapping (or the kernel,
2519 * when initializing internal objects); the
2520 * kernel simulates holding send rights by keeping
2521 * a port reference;
2522 *
2523 * pager_request:
2524 * the memory object control port,
2525 * created by the kernel; the kernel holds
2526 * receive (and ownership) rights to this
2527 * port, but no other references.
2528 *
2529 * When initialization is complete, the "initialized" field
2530 * is asserted. Other mappings using a particular memory object,
2531 * and any references to the vm_object gained through the
2532 * port association must wait for this initialization to occur.
2533 *
2534 * In order to allow the memory manager to set attributes before
2535 * requests (notably virtual copy operations, but also data or
2536 * unlock requests) are made, a "ready" attribute is made available.
2537 * Only the memory manager may affect the value of this attribute.
2538 * Its value does not affect critical kernel functions, such as
2539 * internal object initialization or destruction. [Furthermore,
2540 * memory objects created by the kernel are assumed to be ready
2541 * immediately; the default memory manager need not explicitly
2542 * set the "ready" attribute.]
2543 *
2544 * [Both the "initialized" and "ready" attribute wait conditions
2545 * use the "pager" field as the wait event.]
2546 *
2547 * The port associations can be broken down by any of the
2548 * following routines:
2549 * vm_object_terminate:
2550 * No references to the vm_object remain, and
2551 * the object cannot (or will not) be cached.
2552 * This is the normal case, and is done even
2553 * though one of the other cases has already been
2554 * done.
2555 * memory_object_destroy:
2556 * The memory manager has requested that the
2557 * kernel relinquish references to the memory
2558 * object. [The memory manager may not want to
2559 * destroy the memory object, but may wish to
2560 * refuse or tear down existing memory mappings.]
2561 *
2562 * Each routine that breaks an association must break all of
2563 * them at once. At some later time, that routine must clear
2564 * the pager field and release the memory object references.
2565 * [Furthermore, each routine must cope with the simultaneous
2566 * or previous operations of the others.]
2567 *
2568 * In addition to the lock on the object, the vm_object_cache_lock
2569 * governs the associations. References gained through the
2570 * association require use of the cache lock.
2571 *
2572 * Because the pager field may be cleared spontaneously, it
2573 * cannot be used to determine whether a memory object has
2574 * ever been associated with a particular vm_object. [This
2575 * knowledge is important to the shadow object mechanism.]
2576 * For this reason, an additional "created" attribute is
2577 * provided.
2578 *
2579 * During various paging operations, the pager reference found in the
2580 * vm_object must be valid. To prevent this from being released,
2581 * (other than being removed, i.e., made null), routines may use
2582 * the vm_object_paging_begin/end routines [actually, macros].
2583 * The implementation uses the "paging_in_progress" and "wanted" fields.
2584 * [Operations that alter the validity of the pager values include the
2585 * termination routines and vm_object_collapse.]
2586 */
2587
2588 #if 0
2589 static void vm_object_abort_activity(
2590 vm_object_t object);
2591
2592 /*
2593 * Routine: vm_object_abort_activity [internal use only]
2594 * Purpose:
2595 * Abort paging requests pending on this object.
2596 * In/out conditions:
2597 * The object is locked on entry and exit.
2598 */
2599 static void
2600 vm_object_abort_activity(
2601 vm_object_t object)
2602 {
2603 register
2604 vm_page_t p;
2605 vm_page_t next;
2606
2607 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
2608 (integer_t)object, 0, 0, 0, 0);
2609
2610 /*
2611 * Abort all activity that would be waiting
2612 * for a result on this memory object.
2613 *
2614 * We could also choose to destroy all pages
2615 * that we have in memory for this object, but
2616 * we don't.
2617 */
2618
2619 p = (vm_page_t) queue_first(&object->memq);
2620 while (!queue_end(&object->memq, (queue_entry_t) p)) {
2621 next = (vm_page_t) queue_next(&p->listq);
2622
2623 /*
2624 * If it's being paged in, destroy it.
2625 * If an unlock has been requested, start it again.
2626 */
2627
2628 if (p->busy && p->absent) {
2629 VM_PAGE_FREE(p);
2630 }
2631 else {
2632 if (p->unlock_request != VM_PROT_NONE)
2633 p->unlock_request = VM_PROT_NONE;
2634 PAGE_WAKEUP(p);
2635 }
2636
2637 p = next;
2638 }
2639
2640 /*
2641 * Wake up threads waiting for the memory object to
2642 * become ready.
2643 */
2644
2645 object->pager_ready = TRUE;
2646 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2647 }
2648
2649 /*
2650 * Routine: vm_object_pager_dead
2651 *
2652 * Purpose:
2653 * A port is being destroy, and the IPC kobject code
2654 * can't tell if it represents a pager port or not.
2655 * So this function is called each time it sees a port
2656 * die.
2657 * THIS IS HORRIBLY INEFFICIENT. We should only call
2658 * this routine if we had requested a notification on
2659 * the port.
2660 */
2661
2662 __private_extern__ void
2663 vm_object_pager_dead(
2664 ipc_port_t pager)
2665 {
2666 vm_object_t object;
2667 vm_object_hash_entry_t entry;
2668
2669 /*
2670 * Perform essentially the same operations as in vm_object_lookup,
2671 * except that this time we look up based on the memory_object
2672 * port, not the control port.
2673 */
2674 vm_object_cache_lock();
2675 entry = vm_object_hash_lookup(pager, FALSE);
2676 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2677 entry->object == VM_OBJECT_NULL) {
2678 vm_object_cache_unlock();
2679 return;
2680 }
2681
2682 object = entry->object;
2683 entry->object = VM_OBJECT_NULL;
2684
2685 vm_object_lock(object);
2686 if (object->ref_count == 0) {
2687 XPR(XPR_VM_OBJECT_CACHE,
2688 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2689 (integer_t)object,
2690 (integer_t)vm_object_cached_list.next,
2691 (integer_t)vm_object_cached_list.prev, 0,0);
2692
2693 queue_remove(&vm_object_cached_list, object,
2694 vm_object_t, cached_list);
2695 vm_object_cached_count--;
2696 }
2697 object->ref_count++;
2698 vm_object_res_reference(object);
2699
2700 object->can_persist = FALSE;
2701
2702 assert(object->pager == pager);
2703
2704 /*
2705 * Remove the pager association.
2706 *
2707 * Note that the memory_object itself is dead, so
2708 * we don't bother with it.
2709 */
2710
2711 object->pager = MEMORY_OBJECT_NULL;
2712
2713 vm_object_unlock(object);
2714 vm_object_cache_unlock();
2715
2716 vm_object_pager_wakeup(pager);
2717
2718 /*
2719 * Release the pager reference. Note that there's no
2720 * point in trying the memory_object_terminate call
2721 * because the memory_object itself is dead. Also
2722 * release the memory_object_control reference, since
2723 * the pager didn't do that either.
2724 */
2725
2726 memory_object_deallocate(pager);
2727 memory_object_control_deallocate(object->pager_request);
2728
2729
2730 /*
2731 * Restart pending page requests
2732 */
2733 vm_object_lock(object);
2734 vm_object_abort_activity(object);
2735 vm_object_unlock(object);
2736
2737 /*
2738 * Lose the object reference.
2739 */
2740
2741 vm_object_deallocate(object);
2742 }
2743 #endif
2744
2745 /*
2746 * Routine: vm_object_enter
2747 * Purpose:
2748 * Find a VM object corresponding to the given
2749 * pager; if no such object exists, create one,
2750 * and initialize the pager.
2751 */
2752 vm_object_t
2753 vm_object_enter(
2754 memory_object_t pager,
2755 vm_object_size_t size,
2756 boolean_t internal,
2757 boolean_t init,
2758 boolean_t named)
2759 {
2760 register vm_object_t object;
2761 vm_object_t new_object;
2762 boolean_t must_init;
2763 vm_object_hash_entry_t entry, new_entry;
2764
2765 if (pager == MEMORY_OBJECT_NULL)
2766 return(vm_object_allocate(size));
2767
2768 new_object = VM_OBJECT_NULL;
2769 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2770 must_init = init;
2771
2772 /*
2773 * Look for an object associated with this port.
2774 */
2775
2776 vm_object_cache_lock();
2777 do {
2778 entry = vm_object_hash_lookup(pager, FALSE);
2779
2780 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2781 if (new_object == VM_OBJECT_NULL) {
2782 /*
2783 * We must unlock to create a new object;
2784 * if we do so, we must try the lookup again.
2785 */
2786 vm_object_cache_unlock();
2787 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2788 new_entry = vm_object_hash_entry_alloc(pager);
2789 new_object = vm_object_allocate(size);
2790 vm_object_cache_lock();
2791 } else {
2792 /*
2793 * Lookup failed twice, and we have something
2794 * to insert; set the object.
2795 */
2796 vm_object_hash_insert(new_entry);
2797 entry = new_entry;
2798 entry->object = new_object;
2799 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2800 new_object = VM_OBJECT_NULL;
2801 must_init = TRUE;
2802 }
2803 } else if (entry->object == VM_OBJECT_NULL) {
2804 /*
2805 * If a previous object is being terminated,
2806 * we must wait for the termination message
2807 * to be queued (and lookup the entry again).
2808 */
2809 entry->waiting = TRUE;
2810 entry = VM_OBJECT_HASH_ENTRY_NULL;
2811 assert_wait((event_t) pager, THREAD_UNINT);
2812 vm_object_cache_unlock();
2813 thread_block(THREAD_CONTINUE_NULL);
2814 vm_object_cache_lock();
2815 }
2816 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
2817
2818 object = entry->object;
2819 assert(object != VM_OBJECT_NULL);
2820
2821 if (!must_init) {
2822 vm_object_lock(object);
2823 assert(!internal || object->internal);
2824 if (named) {
2825 assert(!object->named);
2826 object->named = TRUE;
2827 }
2828 if (object->ref_count == 0) {
2829 XPR(XPR_VM_OBJECT_CACHE,
2830 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2831 (integer_t)object,
2832 (integer_t)vm_object_cached_list.next,
2833 (integer_t)vm_object_cached_list.prev, 0,0);
2834 queue_remove(&vm_object_cached_list, object,
2835 vm_object_t, cached_list);
2836 vm_object_cached_count--;
2837 }
2838 object->ref_count++;
2839 vm_object_res_reference(object);
2840 vm_object_unlock(object);
2841
2842 VM_STAT(hits++);
2843 }
2844 assert(object->ref_count > 0);
2845
2846 VM_STAT(lookups++);
2847
2848 vm_object_cache_unlock();
2849
2850 XPR(XPR_VM_OBJECT,
2851 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2852 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2853
2854 /*
2855 * If we raced to create a vm_object but lost, let's
2856 * throw away ours.
2857 */
2858
2859 if (new_object != VM_OBJECT_NULL)
2860 vm_object_deallocate(new_object);
2861
2862 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2863 vm_object_hash_entry_free(new_entry);
2864
2865 if (must_init) {
2866 memory_object_control_t control;
2867
2868 /*
2869 * Allocate request port.
2870 */
2871
2872 control = memory_object_control_allocate(object);
2873 assert (control != MEMORY_OBJECT_CONTROL_NULL);
2874
2875 vm_object_lock(object);
2876 assert(object != kernel_object);
2877
2878 /*
2879 * Copy the reference we were given.
2880 */
2881
2882 memory_object_reference(pager);
2883 object->pager_created = TRUE;
2884 object->pager = pager;
2885 object->internal = internal;
2886 object->pager_trusted = internal;
2887 if (!internal) {
2888 /* copy strategy invalid until set by memory manager */
2889 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2890 }
2891 object->pager_control = control;
2892 object->pager_ready = FALSE;
2893
2894 vm_object_unlock(object);
2895
2896 /*
2897 * Let the pager know we're using it.
2898 */
2899
2900 (void) memory_object_init(pager,
2901 object->pager_control,
2902 PAGE_SIZE);
2903
2904 vm_object_lock(object);
2905 if (named)
2906 object->named = TRUE;
2907 if (internal) {
2908 object->pager_ready = TRUE;
2909 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2910 }
2911
2912 object->pager_initialized = TRUE;
2913 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2914 } else {
2915 vm_object_lock(object);
2916 }
2917
2918 /*
2919 * [At this point, the object must be locked]
2920 */
2921
2922 /*
2923 * Wait for the work above to be done by the first
2924 * thread to map this object.
2925 */
2926
2927 while (!object->pager_initialized) {
2928 vm_object_sleep(object,
2929 VM_OBJECT_EVENT_INITIALIZED,
2930 THREAD_UNINT);
2931 }
2932 vm_object_unlock(object);
2933
2934 XPR(XPR_VM_OBJECT,
2935 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2936 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2937 return(object);
2938 }
2939
2940 /*
2941 * Routine: vm_object_pager_create
2942 * Purpose:
2943 * Create a memory object for an internal object.
2944 * In/out conditions:
2945 * The object is locked on entry and exit;
2946 * it may be unlocked within this call.
2947 * Limitations:
2948 * Only one thread may be performing a
2949 * vm_object_pager_create on an object at
2950 * a time. Presumably, only the pageout
2951 * daemon will be using this routine.
2952 */
2953
2954 void
2955 vm_object_pager_create(
2956 register vm_object_t object)
2957 {
2958 memory_object_t pager;
2959 vm_object_hash_entry_t entry;
2960 #if MACH_PAGEMAP
2961 vm_object_size_t size;
2962 vm_external_map_t map;
2963 #endif /* MACH_PAGEMAP */
2964
2965 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2966 (integer_t)object, 0,0,0,0);
2967
2968 assert(object != kernel_object);
2969
2970 if (memory_manager_default_check() != KERN_SUCCESS)
2971 return;
2972
2973 /*
2974 * Prevent collapse or termination by holding a paging reference
2975 */
2976
2977 vm_object_paging_begin(object);
2978 if (object->pager_created) {
2979 /*
2980 * Someone else got to it first...
2981 * wait for them to finish initializing the ports
2982 */
2983 while (!object->pager_initialized) {
2984 vm_object_sleep(object,
2985 VM_OBJECT_EVENT_INITIALIZED,
2986 THREAD_UNINT);
2987 }
2988 vm_object_paging_end(object);
2989 return;
2990 }
2991
2992 /*
2993 * Indicate that a memory object has been assigned
2994 * before dropping the lock, to prevent a race.
2995 */
2996
2997 object->pager_created = TRUE;
2998 object->paging_offset = 0;
2999
3000 #if MACH_PAGEMAP
3001 size = object->size;
3002 #endif /* MACH_PAGEMAP */
3003 vm_object_unlock(object);
3004
3005 #if MACH_PAGEMAP
3006 map = vm_external_create(size);
3007 vm_object_lock(object);
3008 assert(object->size == size);
3009 object->existence_map = map;
3010 vm_object_unlock(object);
3011 #endif /* MACH_PAGEMAP */
3012
3013 /*
3014 * Create the [internal] pager, and associate it with this object.
3015 *
3016 * We make the association here so that vm_object_enter()
3017 * can look up the object to complete initializing it. No
3018 * user will ever map this object.
3019 */
3020 {
3021 memory_object_default_t dmm;
3022 vm_size_t cluster_size;
3023
3024 /* acquire a reference for the default memory manager */
3025 dmm = memory_manager_default_reference(&cluster_size);
3026 assert(cluster_size >= PAGE_SIZE);
3027
3028 object->cluster_size = cluster_size; /* XXX ??? */
3029 assert(object->temporary);
3030
3031 /* create our new memory object */
3032 (void) memory_object_create(dmm, object->size, &pager);
3033
3034 memory_object_default_deallocate(dmm);
3035 }
3036
3037 entry = vm_object_hash_entry_alloc(pager);
3038
3039 vm_object_cache_lock();
3040 vm_object_hash_insert(entry);
3041
3042 entry->object = object;
3043 vm_object_cache_unlock();
3044
3045 /*
3046 * A reference was returned by
3047 * memory_object_create(), and it is
3048 * copied by vm_object_enter().
3049 */
3050
3051 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
3052 panic("vm_object_pager_create: mismatch");
3053
3054 /*
3055 * Drop the reference we were passed.
3056 */
3057 memory_object_deallocate(pager);
3058
3059 vm_object_lock(object);
3060
3061 /*
3062 * Release the paging reference
3063 */
3064 vm_object_paging_end(object);
3065 }
3066
3067 /*
3068 * Routine: vm_object_remove
3069 * Purpose:
3070 * Eliminate the pager/object association
3071 * for this pager.
3072 * Conditions:
3073 * The object cache must be locked.
3074 */
3075 __private_extern__ void
3076 vm_object_remove(
3077 vm_object_t object)
3078 {
3079 memory_object_t pager;
3080
3081 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
3082 vm_object_hash_entry_t entry;
3083
3084 entry = vm_object_hash_lookup(pager, FALSE);
3085 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
3086 entry->object = VM_OBJECT_NULL;
3087 }
3088
3089 }
3090
3091 /*
3092 * Global variables for vm_object_collapse():
3093 *
3094 * Counts for normal collapses and bypasses.
3095 * Debugging variables, to watch or disable collapse.
3096 */
3097 static long object_collapses = 0;
3098 static long object_bypasses = 0;
3099
3100 static boolean_t vm_object_collapse_allowed = TRUE;
3101 static boolean_t vm_object_bypass_allowed = TRUE;
3102
3103 static int vm_external_discarded;
3104 static int vm_external_collapsed;
3105
3106 unsigned long vm_object_collapse_encrypted = 0;
3107
3108 /*
3109 * Routine: vm_object_do_collapse
3110 * Purpose:
3111 * Collapse an object with the object backing it.
3112 * Pages in the backing object are moved into the
3113 * parent, and the backing object is deallocated.
3114 * Conditions:
3115 * Both objects and the cache are locked; the page
3116 * queues are unlocked.
3117 *
3118 */
3119 static void
3120 vm_object_do_collapse(
3121 vm_object_t object,
3122 vm_object_t backing_object)
3123 {
3124 vm_page_t p, pp;
3125 vm_object_offset_t new_offset, backing_offset;
3126 vm_object_size_t size;
3127
3128 backing_offset = object->shadow_offset;
3129 size = object->size;
3130
3131 /*
3132 * Move all in-memory pages from backing_object
3133 * to the parent. Pages that have been paged out
3134 * will be overwritten by any of the parent's
3135 * pages that shadow them.
3136 */
3137
3138 while (!queue_empty(&backing_object->memq)) {
3139
3140 p = (vm_page_t) queue_first(&backing_object->memq);
3141
3142 new_offset = (p->offset - backing_offset);
3143
3144 assert(!p->busy || p->absent);
3145
3146 /*
3147 * If the parent has a page here, or if
3148 * this page falls outside the parent,
3149 * dispose of it.
3150 *
3151 * Otherwise, move it as planned.
3152 */
3153
3154 if (p->offset < backing_offset || new_offset >= size) {
3155 VM_PAGE_FREE(p);
3156 } else {
3157 /*
3158 * ENCRYPTED SWAP:
3159 * The encryption key includes the "pager" and the
3160 * "paging_offset". These might not be the same in
3161 * the new object, so we can't just move an encrypted
3162 * page from one object to the other. We can't just
3163 * decrypt the page here either, because that would drop
3164 * the object lock.
3165 * The caller should check for encrypted pages before
3166 * attempting to collapse.
3167 */
3168 ASSERT_PAGE_DECRYPTED(p);
3169
3170 pp = vm_page_lookup(object, new_offset);
3171 if (pp == VM_PAGE_NULL) {
3172
3173 /*
3174 * Parent now has no page.
3175 * Move the backing object's page up.
3176 */
3177
3178 vm_page_rename(p, object, new_offset);
3179 #if MACH_PAGEMAP
3180 } else if (pp->absent) {
3181
3182 /*
3183 * Parent has an absent page...
3184 * it's not being paged in, so
3185 * it must really be missing from
3186 * the parent.
3187 *
3188 * Throw out the absent page...
3189 * any faults looking for that
3190 * page will restart with the new
3191 * one.
3192 */
3193
3194 VM_PAGE_FREE(pp);
3195 vm_page_rename(p, object, new_offset);
3196 #endif /* MACH_PAGEMAP */
3197 } else {
3198 assert(! pp->absent);
3199
3200 /*
3201 * Parent object has a real page.
3202 * Throw away the backing object's
3203 * page.
3204 */
3205 VM_PAGE_FREE(p);
3206 }
3207 }
3208 }
3209
3210 #if !MACH_PAGEMAP
3211 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL
3212 || (!backing_object->pager_created
3213 && backing_object->pager == MEMORY_OBJECT_NULL));
3214 #else
3215 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3216 #endif /* !MACH_PAGEMAP */
3217
3218 if (backing_object->pager != MEMORY_OBJECT_NULL) {
3219 vm_object_hash_entry_t entry;
3220
3221 /*
3222 * Move the pager from backing_object to object.
3223 *
3224 * XXX We're only using part of the paging space
3225 * for keeps now... we ought to discard the
3226 * unused portion.
3227 */
3228
3229 assert(!object->paging_in_progress);
3230 object->pager = backing_object->pager;
3231 entry = vm_object_hash_lookup(object->pager, FALSE);
3232 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3233 entry->object = object;
3234 object->pager_created = backing_object->pager_created;
3235 object->pager_control = backing_object->pager_control;
3236 object->pager_ready = backing_object->pager_ready;
3237 object->pager_initialized = backing_object->pager_initialized;
3238 object->cluster_size = backing_object->cluster_size;
3239 object->paging_offset =
3240 backing_object->paging_offset + backing_offset;
3241 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
3242 memory_object_control_collapse(object->pager_control,
3243 object);
3244 }
3245 }
3246
3247 vm_object_cache_unlock();
3248
3249 #if MACH_PAGEMAP
3250 /*
3251 * If the shadow offset is 0, the use the existence map from
3252 * the backing object if there is one. If the shadow offset is
3253 * not zero, toss it.
3254 *
3255 * XXX - If the shadow offset is not 0 then a bit copy is needed
3256 * if the map is to be salvaged. For now, we just just toss the
3257 * old map, giving the collapsed object no map. This means that
3258 * the pager is invoked for zero fill pages. If analysis shows
3259 * that this happens frequently and is a performance hit, then
3260 * this code should be fixed to salvage the map.
3261 */
3262 assert(object->existence_map == VM_EXTERNAL_NULL);
3263 if (backing_offset || (size != backing_object->size)) {
3264 vm_external_discarded++;
3265 vm_external_destroy(backing_object->existence_map,
3266 backing_object->size);
3267 }
3268 else {
3269 vm_external_collapsed++;
3270 object->existence_map = backing_object->existence_map;
3271 }
3272 backing_object->existence_map = VM_EXTERNAL_NULL;
3273 #endif /* MACH_PAGEMAP */
3274
3275 /*
3276 * Object now shadows whatever backing_object did.
3277 * Note that the reference to backing_object->shadow
3278 * moves from within backing_object to within object.
3279 */
3280
3281 assert(!object->phys_contiguous);
3282 assert(!backing_object->phys_contiguous);
3283 object->shadow = backing_object->shadow;
3284 if (object->shadow) {
3285 object->shadow_offset += backing_object->shadow_offset;
3286 } else {
3287 /* no shadow, therefore no shadow offset... */
3288 object->shadow_offset = 0;
3289 }
3290 assert((object->shadow == VM_OBJECT_NULL) ||
3291 (object->shadow->copy != backing_object));
3292
3293 /*
3294 * Discard backing_object.
3295 *
3296 * Since the backing object has no pages, no
3297 * pager left, and no object references within it,
3298 * all that is necessary is to dispose of it.
3299 */
3300
3301 assert((backing_object->ref_count == 1) &&
3302 (backing_object->resident_page_count == 0) &&
3303 (backing_object->paging_in_progress == 0));
3304
3305 backing_object->alive = FALSE;
3306 vm_object_unlock(backing_object);
3307
3308 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3309 (integer_t)backing_object, 0,0,0,0);
3310
3311 zfree(vm_object_zone, backing_object);
3312
3313 object_collapses++;
3314 }
3315
3316 static void
3317 vm_object_do_bypass(
3318 vm_object_t object,
3319 vm_object_t backing_object)
3320 {
3321 /*
3322 * Make the parent shadow the next object
3323 * in the chain.
3324 */
3325
3326 #if TASK_SWAPPER
3327 /*
3328 * Do object reference in-line to
3329 * conditionally increment shadow's
3330 * residence count. If object is not
3331 * resident, leave residence count
3332 * on shadow alone.
3333 */
3334 if (backing_object->shadow != VM_OBJECT_NULL) {
3335 vm_object_lock(backing_object->shadow);
3336 backing_object->shadow->ref_count++;
3337 if (object->res_count != 0)
3338 vm_object_res_reference(backing_object->shadow);
3339 vm_object_unlock(backing_object->shadow);
3340 }
3341 #else /* TASK_SWAPPER */
3342 vm_object_reference(backing_object->shadow);
3343 #endif /* TASK_SWAPPER */
3344
3345 assert(!object->phys_contiguous);
3346 assert(!backing_object->phys_contiguous);
3347 object->shadow = backing_object->shadow;
3348 if (object->shadow) {
3349 object->shadow_offset += backing_object->shadow_offset;
3350 } else {
3351 /* no shadow, therefore no shadow offset... */
3352 object->shadow_offset = 0;
3353 }
3354
3355 /*
3356 * Backing object might have had a copy pointer
3357 * to us. If it did, clear it.
3358 */
3359 if (backing_object->copy == object) {
3360 backing_object->copy = VM_OBJECT_NULL;
3361 }
3362
3363 /*
3364 * Drop the reference count on backing_object.
3365 #if TASK_SWAPPER
3366 * Since its ref_count was at least 2, it
3367 * will not vanish; so we don't need to call
3368 * vm_object_deallocate.
3369 * [FBDP: that doesn't seem to be true any more]
3370 *
3371 * The res_count on the backing object is
3372 * conditionally decremented. It's possible
3373 * (via vm_pageout_scan) to get here with
3374 * a "swapped" object, which has a 0 res_count,
3375 * in which case, the backing object res_count
3376 * is already down by one.
3377 #else
3378 * Don't call vm_object_deallocate unless
3379 * ref_count drops to zero.
3380 *
3381 * The ref_count can drop to zero here if the
3382 * backing object could be bypassed but not
3383 * collapsed, such as when the backing object
3384 * is temporary and cachable.
3385 #endif
3386 */
3387 if (backing_object->ref_count > 1) {
3388 backing_object->ref_count--;
3389 #if TASK_SWAPPER
3390 if (object->res_count != 0)
3391 vm_object_res_deallocate(backing_object);
3392 assert(backing_object->ref_count > 0);
3393 #endif /* TASK_SWAPPER */
3394 vm_object_unlock(backing_object);
3395 } else {
3396
3397 /*
3398 * Drop locks so that we can deallocate
3399 * the backing object.
3400 */
3401
3402 #if TASK_SWAPPER
3403 if (object->res_count == 0) {
3404 /* XXX get a reference for the deallocate below */
3405 vm_object_res_reference(backing_object);
3406 }
3407 #endif /* TASK_SWAPPER */
3408 vm_object_unlock(object);
3409 vm_object_unlock(backing_object);
3410 vm_object_deallocate(backing_object);
3411
3412 /*
3413 * Relock object. We don't have to reverify
3414 * its state since vm_object_collapse will
3415 * do that for us as it starts at the
3416 * top of its loop.
3417 */
3418
3419 vm_object_lock(object);
3420 }
3421
3422 object_bypasses++;
3423 }
3424
3425
3426 /*
3427 * vm_object_collapse:
3428 *
3429 * Perform an object collapse or an object bypass if appropriate.
3430 * The real work of collapsing and bypassing is performed in
3431 * the routines vm_object_do_collapse and vm_object_do_bypass.
3432 *
3433 * Requires that the object be locked and the page queues be unlocked.
3434 *
3435 */
3436 static unsigned long vm_object_collapse_calls = 0;
3437 static unsigned long vm_object_collapse_objects = 0;
3438 static unsigned long vm_object_collapse_do_collapse = 0;
3439 static unsigned long vm_object_collapse_do_bypass = 0;
3440 __private_extern__ void
3441 vm_object_collapse(
3442 register vm_object_t object,
3443 register vm_object_offset_t hint_offset)
3444 {
3445 register vm_object_t backing_object;
3446 register unsigned int rcount;
3447 register unsigned int size;
3448 vm_object_offset_t collapse_min_offset;
3449 vm_object_offset_t collapse_max_offset;
3450 vm_page_t page;
3451 vm_object_t original_object;
3452
3453 vm_object_collapse_calls++;
3454
3455 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3456 return;
3457 }
3458
3459 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3460 (integer_t)object, 0,0,0,0);
3461
3462 if (object == VM_OBJECT_NULL)
3463 return;
3464
3465 original_object = object;
3466
3467 while (TRUE) {
3468 vm_object_collapse_objects++;
3469 /*
3470 * Verify that the conditions are right for either
3471 * collapse or bypass:
3472 */
3473
3474 /*
3475 * There is a backing object, and
3476 */
3477
3478 backing_object = object->shadow;
3479 if (backing_object == VM_OBJECT_NULL) {
3480 if (object != original_object) {
3481 vm_object_unlock(object);
3482 }
3483 return;
3484 }
3485
3486 /*
3487 * No pages in the object are currently
3488 * being paged out, and
3489 */
3490 if (object->paging_in_progress != 0 ||
3491 object->absent_count != 0) {
3492 /* try and collapse the rest of the shadow chain */
3493 vm_object_lock(backing_object);
3494 if (object != original_object) {
3495 vm_object_unlock(object);
3496 }
3497 object = backing_object;
3498 continue;
3499 }
3500
3501 vm_object_lock(backing_object);
3502
3503 /*
3504 * ...
3505 * The backing object is not read_only,
3506 * and no pages in the backing object are
3507 * currently being paged out.
3508 * The backing object is internal.
3509 *
3510 */
3511
3512 if (!backing_object->internal ||
3513 backing_object->paging_in_progress != 0) {
3514 /* try and collapse the rest of the shadow chain */
3515 if (object != original_object) {
3516 vm_object_unlock(object);
3517 }
3518 object = backing_object;
3519 continue;
3520 }
3521
3522 /*
3523 * The backing object can't be a copy-object:
3524 * the shadow_offset for the copy-object must stay
3525 * as 0. Furthermore (for the 'we have all the
3526 * pages' case), if we bypass backing_object and
3527 * just shadow the next object in the chain, old
3528 * pages from that object would then have to be copied
3529 * BOTH into the (former) backing_object and into the
3530 * parent object.
3531 */
3532 if (backing_object->shadow != VM_OBJECT_NULL &&
3533 backing_object->shadow->copy == backing_object) {
3534 /* try and collapse the rest of the shadow chain */
3535 if (object != original_object) {
3536 vm_object_unlock(object);
3537 }
3538 object = backing_object;
3539 continue;
3540 }
3541
3542 /*
3543 * We can now try to either collapse the backing
3544 * object (if the parent is the only reference to
3545 * it) or (perhaps) remove the parent's reference
3546 * to it.
3547 *
3548 * If there is exactly one reference to the backing
3549 * object, we may be able to collapse it into the
3550 * parent.
3551 *
3552 * If MACH_PAGEMAP is defined:
3553 * The parent must not have a pager created for it,
3554 * since collapsing a backing_object dumps new pages
3555 * into the parent that its pager doesn't know about
3556 * (and the collapse code can't merge the existence
3557 * maps).
3558 * Otherwise:
3559 * As long as one of the objects is still not known
3560 * to the pager, we can collapse them.
3561 */
3562 if (backing_object->ref_count == 1 &&
3563 (!object->pager_created
3564 #if !MACH_PAGEMAP
3565 || !backing_object->pager_created
3566 #endif /*!MACH_PAGEMAP */
3567 ) && vm_object_collapse_allowed) {
3568
3569 XPR(XPR_VM_OBJECT,
3570 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3571 (integer_t)backing_object, (integer_t)object,
3572 (integer_t)backing_object->pager,
3573 (integer_t)backing_object->pager_control, 0);
3574
3575 /*
3576 * We need the cache lock for collapsing,
3577 * but we must not deadlock.
3578 */
3579
3580 if (! vm_object_cache_lock_try()) {
3581 if (object != original_object) {
3582 vm_object_unlock(object);
3583 }
3584 vm_object_unlock(backing_object);
3585 return;
3586 }
3587
3588 /*
3589 * ENCRYPTED SWAP
3590 * We can't collapse the object if it contains
3591 * any encypted page, because the encryption key
3592 * includes the <object,offset> info. We can't
3593 * drop the object lock in vm_object_do_collapse()
3594 * so we can't decrypt the page there either.
3595 */
3596 if (vm_pages_encrypted) {
3597 collapse_min_offset = object->shadow_offset;
3598 collapse_max_offset =
3599 object->shadow_offset + object->size;
3600 queue_iterate(&backing_object->memq,
3601 page, vm_page_t, listq) {
3602 if (page->encrypted &&
3603 (page->offset >=
3604 collapse_min_offset) &&
3605 (page->offset <
3606 collapse_max_offset)) {
3607 /*
3608 * We found an encrypted page
3609 * in the backing object,
3610 * within the range covered
3611 * by the parent object: we can
3612 * not collapse them.
3613 */
3614 vm_object_collapse_encrypted++;
3615 vm_object_cache_unlock();
3616 goto try_bypass;
3617 }
3618 }
3619 }
3620
3621 /*
3622 * Collapse the object with its backing
3623 * object, and try again with the object's
3624 * new backing object.
3625 */
3626
3627 vm_object_do_collapse(object, backing_object);
3628 vm_object_collapse_do_collapse++;
3629 continue;
3630 }
3631
3632 try_bypass:
3633 /*
3634 * Collapsing the backing object was not possible
3635 * or permitted, so let's try bypassing it.
3636 */
3637
3638 if (! vm_object_bypass_allowed) {
3639 /* try and collapse the rest of the shadow chain */
3640 if (object != original_object) {
3641 vm_object_unlock(object);
3642 }
3643 object = backing_object;
3644 continue;
3645 }
3646
3647
3648 /*
3649 * If the object doesn't have all its pages present,
3650 * we have to make sure no pages in the backing object
3651 * "show through" before bypassing it.
3652 */
3653 size = atop(object->size);
3654 rcount = object->resident_page_count;
3655 if (rcount != size) {
3656 vm_object_offset_t offset;
3657 vm_object_offset_t backing_offset;
3658 unsigned int backing_rcount;
3659 unsigned int lookups = 0;
3660
3661 /*
3662 * If the backing object has a pager but no pagemap,
3663 * then we cannot bypass it, because we don't know
3664 * what pages it has.
3665 */
3666 if (backing_object->pager_created
3667 #if MACH_PAGEMAP
3668 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3669 #endif /* MACH_PAGEMAP */
3670 ) {
3671 /* try and collapse the rest of the shadow chain */
3672 if (object != original_object) {
3673 vm_object_unlock(object);
3674 }
3675 object = backing_object;
3676 continue;
3677 }
3678
3679 /*
3680 * If the object has a pager but no pagemap,
3681 * then we cannot bypass it, because we don't know
3682 * what pages it has.
3683 */
3684 if (object->pager_created
3685 #if MACH_PAGEMAP
3686 && (object->existence_map == VM_EXTERNAL_NULL)
3687 #endif /* MACH_PAGEMAP */
3688 ) {
3689 /* try and collapse the rest of the shadow chain */
3690 if (object != original_object) {
3691 vm_object_unlock(object);
3692 }
3693 object = backing_object;
3694 continue;
3695 }
3696
3697 /*
3698 * If all of the pages in the backing object are
3699 * shadowed by the parent object, the parent
3700 * object no longer has to shadow the backing
3701 * object; it can shadow the next one in the
3702 * chain.
3703 *
3704 * If the backing object has existence info,
3705 * we must check examine its existence info
3706 * as well.
3707 *
3708 */
3709
3710 backing_offset = object->shadow_offset;
3711 backing_rcount = backing_object->resident_page_count;
3712
3713 #define EXISTS_IN_OBJECT(obj, off, rc) \
3714 (vm_external_state_get((obj)->existence_map, \
3715 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3716 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3717
3718 /*
3719 * Check the hint location first
3720 * (since it is often the quickest way out of here).
3721 */
3722 if (object->cow_hint != ~(vm_offset_t)0)
3723 hint_offset = (vm_object_offset_t)object->cow_hint;
3724 else
3725 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
3726 (hint_offset - 8 * PAGE_SIZE_64) : 0;
3727
3728 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
3729 backing_offset, backing_rcount) &&
3730 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
3731 /* dependency right at the hint */
3732 object->cow_hint = (vm_offset_t)hint_offset;
3733 /* try and collapse the rest of the shadow chain */
3734 if (object != original_object) {
3735 vm_object_unlock(object);
3736 }
3737 object = backing_object;
3738 continue;
3739 }
3740
3741 /*
3742 * If the object's window onto the backing_object
3743 * is large compared to the number of resident
3744 * pages in the backing object, it makes sense to
3745 * walk the backing_object's resident pages first.
3746 *
3747 * NOTE: Pages may be in both the existence map and
3748 * resident. So, we can't permanently decrement
3749 * the rcount here because the second loop may
3750 * find the same pages in the backing object'
3751 * existence map that we found here and we would
3752 * double-decrement the rcount. We also may or
3753 * may not have found the
3754 */
3755 if (backing_rcount && size >
3756 ((backing_object->existence_map) ?
3757 backing_rcount : (backing_rcount >> 1))) {
3758 unsigned int rc = rcount;
3759 vm_page_t p;
3760
3761 backing_rcount = backing_object->resident_page_count;
3762 p = (vm_page_t)queue_first(&backing_object->memq);
3763 do {
3764 /* Until we get more than one lookup lock */
3765 if (lookups > 256) {
3766 lookups = 0;
3767 delay(1);
3768 }
3769
3770 offset = (p->offset - backing_offset);
3771 if (offset < object->size &&
3772 offset != hint_offset &&
3773 !EXISTS_IN_OBJECT(object, offset, rc)) {
3774 /* found a dependency */
3775 object->cow_hint = (vm_offset_t)offset;
3776 break;
3777 }
3778 p = (vm_page_t) queue_next(&p->listq);
3779
3780 } while (--backing_rcount);
3781 if (backing_rcount != 0 ) {
3782 /* try and collapse the rest of the shadow chain */
3783 if (object != original_object) {
3784 vm_object_unlock(object);
3785 }
3786 object = backing_object;
3787 continue;
3788 }
3789 }
3790
3791 /*
3792 * Walk through the offsets looking for pages in the
3793 * backing object that show through to the object.
3794 */
3795 if (backing_rcount || backing_object->existence_map) {
3796 offset = hint_offset;
3797
3798 while((offset =
3799 (offset + PAGE_SIZE_64 < object->size) ?
3800 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
3801
3802 /* Until we get more than one lookup lock */
3803 if (lookups > 256) {
3804 lookups = 0;
3805 delay(1);
3806 }
3807
3808 if (EXISTS_IN_OBJECT(backing_object, offset +
3809 backing_offset, backing_rcount) &&
3810 !EXISTS_IN_OBJECT(object, offset, rcount)) {
3811 /* found a dependency */
3812 object->cow_hint = (vm_offset_t)offset;
3813 break;
3814 }
3815 }
3816 if (offset != hint_offset) {
3817 /* try and collapse the rest of the shadow chain */
3818 if (object != original_object) {
3819 vm_object_unlock(object);
3820 }
3821 object = backing_object;
3822 continue;
3823 }
3824 }
3825 }
3826
3827 /* reset the offset hint for any objects deeper in the chain */
3828 object->cow_hint = (vm_offset_t)0;
3829
3830 /*
3831 * All interesting pages in the backing object
3832 * already live in the parent or its pager.
3833 * Thus we can bypass the backing object.
3834 */
3835
3836 vm_object_do_bypass(object, backing_object);
3837 vm_object_collapse_do_bypass++;
3838
3839 /*
3840 * Try again with this object's new backing object.
3841 */
3842
3843 continue;
3844 }
3845
3846 if (object != original_object) {
3847 vm_object_unlock(object);
3848 }
3849 }
3850
3851 /*
3852 * Routine: vm_object_page_remove: [internal]
3853 * Purpose:
3854 * Removes all physical pages in the specified
3855 * object range from the object's list of pages.
3856 *
3857 * In/out conditions:
3858 * The object must be locked.
3859 * The object must not have paging_in_progress, usually
3860 * guaranteed by not having a pager.
3861 */
3862 unsigned int vm_object_page_remove_lookup = 0;
3863 unsigned int vm_object_page_remove_iterate = 0;
3864
3865 __private_extern__ void
3866 vm_object_page_remove(
3867 register vm_object_t object,
3868 register vm_object_offset_t start,
3869 register vm_object_offset_t end)
3870 {
3871 register vm_page_t p, next;
3872
3873 /*
3874 * One and two page removals are most popular.
3875 * The factor of 16 here is somewhat arbitrary.
3876 * It balances vm_object_lookup vs iteration.
3877 */
3878
3879 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
3880 vm_object_page_remove_lookup++;
3881
3882 for (; start < end; start += PAGE_SIZE_64) {
3883 p = vm_page_lookup(object, start);
3884 if (p != VM_PAGE_NULL) {
3885 assert(!p->cleaning && !p->pageout);
3886 if (!p->fictitious)
3887 pmap_disconnect(p->phys_page);
3888 VM_PAGE_FREE(p);
3889 }
3890 }
3891 } else {
3892 vm_object_page_remove_iterate++;
3893
3894 p = (vm_page_t) queue_first(&object->memq);
3895 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3896 next = (vm_page_t) queue_next(&p->listq);
3897 if ((start <= p->offset) && (p->offset < end)) {
3898 assert(!p->cleaning && !p->pageout);
3899 if (!p->fictitious)
3900 pmap_disconnect(p->phys_page);
3901 VM_PAGE_FREE(p);
3902 }
3903 p = next;
3904 }
3905 }
3906 }
3907
3908
3909 /*
3910 * Routine: vm_object_coalesce
3911 * Function: Coalesces two objects backing up adjoining
3912 * regions of memory into a single object.
3913 *
3914 * returns TRUE if objects were combined.
3915 *
3916 * NOTE: Only works at the moment if the second object is NULL -
3917 * if it's not, which object do we lock first?
3918 *
3919 * Parameters:
3920 * prev_object First object to coalesce
3921 * prev_offset Offset into prev_object
3922 * next_object Second object into coalesce
3923 * next_offset Offset into next_object
3924 *
3925 * prev_size Size of reference to prev_object
3926 * next_size Size of reference to next_object
3927 *
3928 * Conditions:
3929 * The object(s) must *not* be locked. The map must be locked
3930 * to preserve the reference to the object(s).
3931 */
3932 static int vm_object_coalesce_count = 0;
3933
3934 __private_extern__ boolean_t
3935 vm_object_coalesce(
3936 register vm_object_t prev_object,
3937 vm_object_t next_object,
3938 vm_object_offset_t prev_offset,
3939 __unused vm_object_offset_t next_offset,
3940 vm_object_size_t prev_size,
3941 vm_object_size_t next_size)
3942 {
3943 vm_object_size_t newsize;
3944
3945 #ifdef lint
3946 next_offset++;
3947 #endif /* lint */
3948
3949 if (next_object != VM_OBJECT_NULL) {
3950 return(FALSE);
3951 }
3952
3953 if (prev_object == VM_OBJECT_NULL) {
3954 return(TRUE);
3955 }
3956
3957 XPR(XPR_VM_OBJECT,
3958 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3959 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3960
3961 vm_object_lock(prev_object);
3962
3963 /*
3964 * Try to collapse the object first
3965 */
3966 vm_object_collapse(prev_object, prev_offset);
3967
3968 /*
3969 * Can't coalesce if pages not mapped to
3970 * prev_entry may be in use any way:
3971 * . more than one reference
3972 * . paged out
3973 * . shadows another object
3974 * . has a copy elsewhere
3975 * . is purgable
3976 * . paging references (pages might be in page-list)
3977 */
3978
3979 if ((prev_object->ref_count > 1) ||
3980 prev_object->pager_created ||
3981 (prev_object->shadow != VM_OBJECT_NULL) ||
3982 (prev_object->copy != VM_OBJECT_NULL) ||
3983 (prev_object->true_share != FALSE) ||
3984 (prev_object->purgable != VM_OBJECT_NONPURGABLE) ||
3985 (prev_object->paging_in_progress != 0)) {
3986 vm_object_unlock(prev_object);
3987 return(FALSE);
3988 }
3989
3990 vm_object_coalesce_count++;
3991
3992 /*
3993 * Remove any pages that may still be in the object from
3994 * a previous deallocation.
3995 */
3996 vm_object_page_remove(prev_object,
3997 prev_offset + prev_size,
3998 prev_offset + prev_size + next_size);
3999
4000 /*
4001 * Extend the object if necessary.
4002 */
4003 newsize = prev_offset + prev_size + next_size;
4004 if (newsize > prev_object->size) {
4005 #if MACH_PAGEMAP
4006 /*
4007 * We cannot extend an object that has existence info,
4008 * since the existence info might then fail to cover
4009 * the entire object.
4010 *
4011 * This assertion must be true because the object
4012 * has no pager, and we only create existence info
4013 * for objects with pagers.
4014 */
4015 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
4016 #endif /* MACH_PAGEMAP */
4017 prev_object->size = newsize;
4018 }
4019
4020 vm_object_unlock(prev_object);
4021 return(TRUE);
4022 }
4023
4024 /*
4025 * Attach a set of physical pages to an object, so that they can
4026 * be mapped by mapping the object. Typically used to map IO memory.
4027 *
4028 * The mapping function and its private data are used to obtain the
4029 * physical addresses for each page to be mapped.
4030 */
4031 void
4032 vm_object_page_map(
4033 vm_object_t object,
4034 vm_object_offset_t offset,
4035 vm_object_size_t size,
4036 vm_object_offset_t (*map_fn)(void *map_fn_data,
4037 vm_object_offset_t offset),
4038 void *map_fn_data) /* private to map_fn */
4039 {
4040 int num_pages;
4041 int i;
4042 vm_page_t m;
4043 vm_page_t old_page;
4044 vm_object_offset_t addr;
4045
4046 num_pages = atop_64(size);
4047
4048 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
4049
4050 addr = (*map_fn)(map_fn_data, offset);
4051
4052 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
4053 vm_page_more_fictitious();
4054
4055 vm_object_lock(object);
4056 if ((old_page = vm_page_lookup(object, offset))
4057 != VM_PAGE_NULL)
4058 {
4059 vm_page_lock_queues();
4060 vm_page_free(old_page);
4061 vm_page_unlock_queues();
4062 }
4063
4064 vm_page_init(m, addr);
4065 /* private normally requires lock_queues but since we */
4066 /* are initializing the page, its not necessary here */
4067 m->private = TRUE; /* don`t free page */
4068 m->wire_count = 1;
4069 vm_page_insert(m, object, offset);
4070
4071 PAGE_WAKEUP_DONE(m);
4072 vm_object_unlock(object);
4073 }
4074 }
4075
4076 #include <mach_kdb.h>
4077
4078 #if MACH_KDB
4079 #include <ddb/db_output.h>
4080 #include <vm/vm_print.h>
4081
4082 #define printf kdbprintf
4083
4084 extern boolean_t vm_object_cached(
4085 vm_object_t object);
4086
4087 extern void print_bitstring(
4088 char byte);
4089
4090 boolean_t vm_object_print_pages = FALSE;
4091
4092 void
4093 print_bitstring(
4094 char byte)
4095 {
4096 printf("%c%c%c%c%c%c%c%c",
4097 ((byte & (1 << 0)) ? '1' : '0'),
4098 ((byte & (1 << 1)) ? '1' : '0'),
4099 ((byte & (1 << 2)) ? '1' : '0'),
4100 ((byte & (1 << 3)) ? '1' : '0'),
4101 ((byte & (1 << 4)) ? '1' : '0'),
4102 ((byte & (1 << 5)) ? '1' : '0'),
4103 ((byte & (1 << 6)) ? '1' : '0'),
4104 ((byte & (1 << 7)) ? '1' : '0'));
4105 }
4106
4107 boolean_t
4108 vm_object_cached(
4109 register vm_object_t object)
4110 {
4111 register vm_object_t o;
4112
4113 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
4114 if (object == o) {
4115 return TRUE;
4116 }
4117 }
4118 return FALSE;
4119 }
4120
4121 #if MACH_PAGEMAP
4122 /*
4123 * vm_external_print: [ debug ]
4124 */
4125 void
4126 vm_external_print(
4127 vm_external_map_t emap,
4128 vm_size_t size)
4129 {
4130 if (emap == VM_EXTERNAL_NULL) {
4131 printf("0 ");
4132 } else {
4133 vm_size_t existence_size = stob(size);
4134 printf("{ size=%d, map=[", existence_size);
4135 if (existence_size > 0) {
4136 print_bitstring(emap[0]);
4137 }
4138 if (existence_size > 1) {
4139 print_bitstring(emap[1]);
4140 }
4141 if (existence_size > 2) {
4142 printf("...");
4143 print_bitstring(emap[existence_size-1]);
4144 }
4145 printf("] }\n");
4146 }
4147 return;
4148 }
4149 #endif /* MACH_PAGEMAP */
4150
4151 int
4152 vm_follow_object(
4153 vm_object_t object)
4154 {
4155 int count = 0;
4156 int orig_db_indent = db_indent;
4157
4158 while (TRUE) {
4159 if (object == VM_OBJECT_NULL) {
4160 db_indent = orig_db_indent;
4161 return count;
4162 }
4163
4164 count += 1;
4165
4166 iprintf("object 0x%x", object);
4167 printf(", shadow=0x%x", object->shadow);
4168 printf(", copy=0x%x", object->copy);
4169 printf(", pager=0x%x", object->pager);
4170 printf(", ref=%d\n", object->ref_count);
4171
4172 db_indent += 2;
4173 object = object->shadow;
4174 }
4175
4176 }
4177
4178 /*
4179 * vm_object_print: [ debug ]
4180 */
4181 void
4182 vm_object_print(
4183 db_addr_t db_addr,
4184 __unused boolean_t have_addr,
4185 __unused int arg_count,
4186 __unused char *modif)
4187 {
4188 vm_object_t object;
4189 register vm_page_t p;
4190 const char *s;
4191
4192 register int count;
4193
4194 object = (vm_object_t) (long) db_addr;
4195 if (object == VM_OBJECT_NULL)
4196 return;
4197
4198 iprintf("object 0x%x\n", object);
4199
4200 db_indent += 2;
4201
4202 iprintf("size=0x%x", object->size);
4203 printf(", cluster=0x%x", object->cluster_size);
4204 printf(", memq_hint=%p", object->memq_hint);
4205 printf(", ref_count=%d\n", object->ref_count);
4206 iprintf("");
4207 #if TASK_SWAPPER
4208 printf("res_count=%d, ", object->res_count);
4209 #endif /* TASK_SWAPPER */
4210 printf("resident_page_count=%d\n", object->resident_page_count);
4211
4212 iprintf("shadow=0x%x", object->shadow);
4213 if (object->shadow) {
4214 register int i = 0;
4215 vm_object_t shadow = object;
4216 while((shadow = shadow->shadow))
4217 i++;
4218 printf(" (depth %d)", i);
4219 }
4220 printf(", copy=0x%x", object->copy);
4221 printf(", shadow_offset=0x%x", object->shadow_offset);
4222 printf(", last_alloc=0x%x\n", object->last_alloc);
4223
4224 iprintf("pager=0x%x", object->pager);
4225 printf(", paging_offset=0x%x", object->paging_offset);
4226 printf(", pager_control=0x%x\n", object->pager_control);
4227
4228 iprintf("copy_strategy=%d[", object->copy_strategy);
4229 switch (object->copy_strategy) {
4230 case MEMORY_OBJECT_COPY_NONE:
4231 printf("copy_none");
4232 break;
4233
4234 case MEMORY_OBJECT_COPY_CALL:
4235 printf("copy_call");
4236 break;
4237
4238 case MEMORY_OBJECT_COPY_DELAY:
4239 printf("copy_delay");
4240 break;
4241
4242 case MEMORY_OBJECT_COPY_SYMMETRIC:
4243 printf("copy_symmetric");
4244 break;
4245
4246 case MEMORY_OBJECT_COPY_INVALID:
4247 printf("copy_invalid");
4248 break;
4249
4250 default:
4251 printf("?");
4252 }
4253 printf("]");
4254 printf(", absent_count=%d\n", object->absent_count);
4255
4256 iprintf("all_wanted=0x%x<", object->all_wanted);
4257 s = "";
4258 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4259 printf("%sinit", s);
4260 s = ",";
4261 }
4262 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4263 printf("%sready", s);
4264 s = ",";
4265 }
4266 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4267 printf("%spaging", s);
4268 s = ",";
4269 }
4270 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
4271 printf("%sabsent", s);
4272 s = ",";
4273 }
4274 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4275 printf("%slock", s);
4276 s = ",";
4277 }
4278 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4279 printf("%suncaching", s);
4280 s = ",";
4281 }
4282 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4283 printf("%scopy_call", s);
4284 s = ",";
4285 }
4286 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4287 printf("%scaching", s);
4288 s = ",";
4289 }
4290 printf(">");
4291 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4292
4293 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4294 (object->pager_created ? "" : "!"),
4295 (object->pager_initialized ? "" : "!"),
4296 (object->pager_ready ? "" : "!"),
4297 (object->can_persist ? "" : "!"),
4298 (object->pager_trusted ? "" : "!"),
4299 (object->pageout ? "" : "!"),
4300 (object->internal ? "internal" : "external"),
4301 (object->temporary ? "temporary" : "permanent"));
4302 iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
4303 (object->alive ? "" : "!"),
4304 ((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"),
4305 ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"),
4306 ((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"),
4307 (object->shadowed ? "" : "!"),
4308 (vm_object_cached(object) ? "" : "!"),
4309 (object->private ? "" : "!"));
4310 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4311 (object->advisory_pageout ? "" : "!"),
4312 (object->silent_overwrite ? "" : "!"));
4313
4314 #if MACH_PAGEMAP
4315 iprintf("existence_map=");
4316 vm_external_print(object->existence_map, object->size);
4317 #endif /* MACH_PAGEMAP */
4318 #if MACH_ASSERT
4319 iprintf("paging_object=0x%x\n", object->paging_object);
4320 #endif /* MACH_ASSERT */
4321
4322 if (vm_object_print_pages) {
4323 count = 0;
4324 p = (vm_page_t) queue_first(&object->memq);
4325 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4326 if (count == 0) {
4327 iprintf("memory:=");
4328 } else if (count == 2) {
4329 printf("\n");
4330 iprintf(" ...");
4331 count = 0;
4332 } else {
4333 printf(",");
4334 }
4335 count++;
4336
4337 printf("(off=0x%llX,page=%p)", p->offset, p);
4338 p = (vm_page_t) queue_next(&p->listq);
4339 }
4340 if (count != 0) {
4341 printf("\n");
4342 }
4343 }
4344 db_indent -= 2;
4345 }
4346
4347
4348 /*
4349 * vm_object_find [ debug ]
4350 *
4351 * Find all tasks which reference the given vm_object.
4352 */
4353
4354 boolean_t vm_object_find(vm_object_t object);
4355 boolean_t vm_object_print_verbose = FALSE;
4356
4357 boolean_t
4358 vm_object_find(
4359 vm_object_t object)
4360 {
4361 task_t task;
4362 vm_map_t map;
4363 vm_map_entry_t entry;
4364 processor_set_t pset = &default_pset;
4365 boolean_t found = FALSE;
4366
4367 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
4368 map = task->map;
4369 for (entry = vm_map_first_entry(map);
4370 entry && entry != vm_map_to_entry(map);
4371 entry = entry->vme_next) {
4372
4373 vm_object_t obj;
4374
4375 /*
4376 * For the time being skip submaps,
4377 * only the kernel can have submaps,
4378 * and unless we are interested in
4379 * kernel objects, we can simply skip
4380 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4381 * for a full solution.
4382 */
4383 if (entry->is_sub_map)
4384 continue;
4385 if (entry)
4386 obj = entry->object.vm_object;
4387 else
4388 continue;
4389
4390 while (obj != VM_OBJECT_NULL) {
4391 if (obj == object) {
4392 if (!found) {
4393 printf("TASK\t\tMAP\t\tENTRY\n");
4394 found = TRUE;
4395 }
4396 printf("0x%x\t0x%x\t0x%x\n",
4397 task, map, entry);
4398 }
4399 obj = obj->shadow;
4400 }
4401 }
4402 }
4403
4404 return(found);
4405 }
4406
4407 #endif /* MACH_KDB */
4408
4409 kern_return_t
4410 vm_object_populate_with_private(
4411 vm_object_t object,
4412 vm_object_offset_t offset,
4413 ppnum_t phys_page,
4414 vm_size_t size)
4415 {
4416 ppnum_t base_page;
4417 vm_object_offset_t base_offset;
4418
4419
4420 if(!object->private)
4421 return KERN_FAILURE;
4422
4423 base_page = phys_page;
4424
4425 vm_object_lock(object);
4426 if(!object->phys_contiguous) {
4427 vm_page_t m;
4428 if((base_offset = trunc_page_64(offset)) != offset) {
4429 vm_object_unlock(object);
4430 return KERN_FAILURE;
4431 }
4432 base_offset += object->paging_offset;
4433 while(size) {
4434 m = vm_page_lookup(object, base_offset);
4435 if(m != VM_PAGE_NULL) {
4436 if(m->fictitious) {
4437 vm_page_lock_queues();
4438 m->fictitious = FALSE;
4439 m->private = TRUE;
4440 m->phys_page = base_page;
4441 if(!m->busy) {
4442 m->busy = TRUE;
4443 }
4444 if(!m->absent) {
4445 m->absent = TRUE;
4446 object->absent_count++;
4447 }
4448 m->list_req_pending = TRUE;
4449 vm_page_unlock_queues();
4450 } else if (m->phys_page != base_page) {
4451 /* pmap call to clear old mapping */
4452 pmap_disconnect(m->phys_page);
4453 m->phys_page = base_page;
4454 }
4455
4456 /*
4457 * ENCRYPTED SWAP:
4458 * We're not pointing to the same
4459 * physical page any longer and the
4460 * contents of the new one are not
4461 * supposed to be encrypted.
4462 * XXX What happens to the original
4463 * physical page. Is it lost ?
4464 */
4465 m->encrypted = FALSE;
4466
4467 } else {
4468 while ((m = vm_page_grab_fictitious())
4469 == VM_PAGE_NULL)
4470 vm_page_more_fictitious();
4471 vm_page_lock_queues();
4472 m->fictitious = FALSE;
4473 m->private = TRUE;
4474 m->phys_page = base_page;
4475 m->list_req_pending = TRUE;
4476 m->absent = TRUE;
4477 m->unusual = TRUE;
4478 object->absent_count++;
4479 vm_page_unlock_queues();
4480 vm_page_insert(m, object, base_offset);
4481 }
4482 base_page++; /* Go to the next physical page */
4483 base_offset += PAGE_SIZE;
4484 size -= PAGE_SIZE;
4485 }
4486 } else {
4487 /* NOTE: we should check the original settings here */
4488 /* if we have a size > zero a pmap call should be made */
4489 /* to disable the range */
4490
4491 /* pmap_? */
4492
4493 /* shadows on contiguous memory are not allowed */
4494 /* we therefore can use the offset field */
4495 object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
4496 object->size = size;
4497 }
4498 vm_object_unlock(object);
4499 return KERN_SUCCESS;
4500 }
4501
4502 /*
4503 * memory_object_free_from_cache:
4504 *
4505 * Walk the vm_object cache list, removing and freeing vm_objects
4506 * which are backed by the pager identified by the caller, (pager_id).
4507 * Remove up to "count" objects, if there are that may available
4508 * in the cache.
4509 *
4510 * Walk the list at most once, return the number of vm_objects
4511 * actually freed.
4512 */
4513
4514 __private_extern__ kern_return_t
4515 memory_object_free_from_cache(
4516 __unused host_t host,
4517 int *pager_id,
4518 int *count)
4519 {
4520
4521 int object_released = 0;
4522
4523 register vm_object_t object = VM_OBJECT_NULL;
4524 vm_object_t shadow;
4525
4526 /*
4527 if(host == HOST_NULL)
4528 return(KERN_INVALID_ARGUMENT);
4529 */
4530
4531 try_again:
4532 vm_object_cache_lock();
4533
4534 queue_iterate(&vm_object_cached_list, object,
4535 vm_object_t, cached_list) {
4536 if (object->pager && (pager_id == object->pager->pager)) {
4537 vm_object_lock(object);
4538 queue_remove(&vm_object_cached_list, object,
4539 vm_object_t, cached_list);
4540 vm_object_cached_count--;
4541
4542 /*
4543 * Since this object is in the cache, we know
4544 * that it is initialized and has only a pager's
4545 * (implicit) reference. Take a reference to avoid
4546 * recursive deallocations.
4547 */
4548
4549 assert(object->pager_initialized);
4550 assert(object->ref_count == 0);
4551 object->ref_count++;
4552
4553 /*
4554 * Terminate the object.
4555 * If the object had a shadow, we let
4556 * vm_object_deallocate deallocate it.
4557 * "pageout" objects have a shadow, but
4558 * maintain a "paging reference" rather
4559 * than a normal reference.
4560 * (We are careful here to limit recursion.)
4561 */
4562 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4563 if ((vm_object_terminate(object) == KERN_SUCCESS)
4564 && (shadow != VM_OBJECT_NULL)) {
4565 vm_object_deallocate(shadow);
4566 }
4567
4568 if(object_released++ == *count)
4569 return KERN_SUCCESS;
4570 goto try_again;
4571 }
4572 }
4573 vm_object_cache_unlock();
4574 *count = object_released;
4575 return KERN_SUCCESS;
4576 }
4577
4578
4579
4580 kern_return_t
4581 memory_object_create_named(
4582 memory_object_t pager,
4583 memory_object_offset_t size,
4584 memory_object_control_t *control)
4585 {
4586 vm_object_t object;
4587 vm_object_hash_entry_t entry;
4588
4589 *control = MEMORY_OBJECT_CONTROL_NULL;
4590 if (pager == MEMORY_OBJECT_NULL)
4591 return KERN_INVALID_ARGUMENT;
4592
4593 vm_object_cache_lock();
4594 entry = vm_object_hash_lookup(pager, FALSE);
4595 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4596 (entry->object != VM_OBJECT_NULL)) {
4597 if (entry->object->named == TRUE)
4598 panic("memory_object_create_named: caller already holds the right"); }
4599
4600 vm_object_cache_unlock();
4601 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4602 == VM_OBJECT_NULL) {
4603 return(KERN_INVALID_OBJECT);
4604 }
4605
4606 /* wait for object (if any) to be ready */
4607 if (object != VM_OBJECT_NULL) {
4608 vm_object_lock(object);
4609 object->named = TRUE;
4610 while (!object->pager_ready) {
4611 vm_object_sleep(object,
4612 VM_OBJECT_EVENT_PAGER_READY,
4613 THREAD_UNINT);
4614 }
4615 *control = object->pager_control;
4616 vm_object_unlock(object);
4617 }
4618 return (KERN_SUCCESS);
4619 }
4620
4621
4622 /*
4623 * Routine: memory_object_recover_named [user interface]
4624 * Purpose:
4625 * Attempt to recover a named reference for a VM object.
4626 * VM will verify that the object has not already started
4627 * down the termination path, and if it has, will optionally
4628 * wait for that to finish.
4629 * Returns:
4630 * KERN_SUCCESS - we recovered a named reference on the object
4631 * KERN_FAILURE - we could not recover a reference (object dead)
4632 * KERN_INVALID_ARGUMENT - bad memory object control
4633 */
4634 kern_return_t
4635 memory_object_recover_named(
4636 memory_object_control_t control,
4637 boolean_t wait_on_terminating)
4638 {
4639 vm_object_t object;
4640
4641 vm_object_cache_lock();
4642 object = memory_object_control_to_vm_object(control);
4643 if (object == VM_OBJECT_NULL) {
4644 vm_object_cache_unlock();
4645 return (KERN_INVALID_ARGUMENT);
4646 }
4647
4648 restart:
4649 vm_object_lock(object);
4650
4651 if (object->terminating && wait_on_terminating) {
4652 vm_object_cache_unlock();
4653 vm_object_wait(object,
4654 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4655 THREAD_UNINT);
4656 vm_object_cache_lock();
4657 goto restart;
4658 }
4659
4660 if (!object->alive) {
4661 vm_object_cache_unlock();
4662 vm_object_unlock(object);
4663 return KERN_FAILURE;
4664 }
4665
4666 if (object->named == TRUE) {
4667 vm_object_cache_unlock();
4668 vm_object_unlock(object);
4669 return KERN_SUCCESS;
4670 }
4671
4672 if((object->ref_count == 0) && (!object->terminating)){
4673 queue_remove(&vm_object_cached_list, object,
4674 vm_object_t, cached_list);
4675 vm_object_cached_count--;
4676 XPR(XPR_VM_OBJECT_CACHE,
4677 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4678 (integer_t)object,
4679 (integer_t)vm_object_cached_list.next,
4680 (integer_t)vm_object_cached_list.prev, 0,0);
4681 }
4682
4683 vm_object_cache_unlock();
4684
4685 object->named = TRUE;
4686 object->ref_count++;
4687 vm_object_res_reference(object);
4688 while (!object->pager_ready) {
4689 vm_object_sleep(object,
4690 VM_OBJECT_EVENT_PAGER_READY,
4691 THREAD_UNINT);
4692 }
4693 vm_object_unlock(object);
4694 return (KERN_SUCCESS);
4695 }
4696
4697
4698 /*
4699 * vm_object_release_name:
4700 *
4701 * Enforces name semantic on memory_object reference count decrement
4702 * This routine should not be called unless the caller holds a name
4703 * reference gained through the memory_object_create_named.
4704 *
4705 * If the TERMINATE_IDLE flag is set, the call will return if the
4706 * reference count is not 1. i.e. idle with the only remaining reference
4707 * being the name.
4708 * If the decision is made to proceed the name field flag is set to
4709 * false and the reference count is decremented. If the RESPECT_CACHE
4710 * flag is set and the reference count has gone to zero, the
4711 * memory_object is checked to see if it is cacheable otherwise when
4712 * the reference count is zero, it is simply terminated.
4713 */
4714
4715 __private_extern__ kern_return_t
4716 vm_object_release_name(
4717 vm_object_t object,
4718 int flags)
4719 {
4720 vm_object_t shadow;
4721 boolean_t original_object = TRUE;
4722
4723 while (object != VM_OBJECT_NULL) {
4724
4725 /*
4726 * The cache holds a reference (uncounted) to
4727 * the object. We must locke it before removing
4728 * the object.
4729 *
4730 */
4731
4732 vm_object_cache_lock();
4733 vm_object_lock(object);
4734 assert(object->alive);
4735 if(original_object)
4736 assert(object->named);
4737 assert(object->ref_count > 0);
4738
4739 /*
4740 * We have to wait for initialization before
4741 * destroying or caching the object.
4742 */
4743
4744 if (object->pager_created && !object->pager_initialized) {
4745 assert(!object->can_persist);
4746 vm_object_assert_wait(object,
4747 VM_OBJECT_EVENT_INITIALIZED,
4748 THREAD_UNINT);
4749 vm_object_unlock(object);
4750 vm_object_cache_unlock();
4751 thread_block(THREAD_CONTINUE_NULL);
4752 continue;
4753 }
4754
4755 if (((object->ref_count > 1)
4756 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4757 || (object->terminating)) {
4758 vm_object_unlock(object);
4759 vm_object_cache_unlock();
4760 return KERN_FAILURE;
4761 } else {
4762 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4763 vm_object_unlock(object);
4764 vm_object_cache_unlock();
4765 return KERN_SUCCESS;
4766 }
4767 }
4768
4769 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4770 (object->ref_count == 1)) {
4771 if(original_object)
4772 object->named = FALSE;
4773 vm_object_unlock(object);
4774 vm_object_cache_unlock();
4775 /* let vm_object_deallocate push this thing into */
4776 /* the cache, if that it is where it is bound */
4777 vm_object_deallocate(object);
4778 return KERN_SUCCESS;
4779 }
4780 VM_OBJ_RES_DECR(object);
4781 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4782 if(object->ref_count == 1) {
4783 if(vm_object_terminate(object) != KERN_SUCCESS) {
4784 if(original_object) {
4785 return KERN_FAILURE;
4786 } else {
4787 return KERN_SUCCESS;
4788 }
4789 }
4790 if (shadow != VM_OBJECT_NULL) {
4791 original_object = FALSE;
4792 object = shadow;
4793 continue;
4794 }
4795 return KERN_SUCCESS;
4796 } else {
4797 object->ref_count--;
4798 assert(object->ref_count > 0);
4799 if(original_object)
4800 object->named = FALSE;
4801 vm_object_unlock(object);
4802 vm_object_cache_unlock();
4803 return KERN_SUCCESS;
4804 }
4805 }
4806 /*NOTREACHED*/
4807 assert(0);
4808 return KERN_FAILURE;
4809 }
4810
4811
4812 __private_extern__ kern_return_t
4813 vm_object_lock_request(
4814 vm_object_t object,
4815 vm_object_offset_t offset,
4816 vm_object_size_t size,
4817 memory_object_return_t should_return,
4818 int flags,
4819 vm_prot_t prot)
4820 {
4821 __unused boolean_t should_flush;
4822
4823 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
4824
4825 XPR(XPR_MEMORY_OBJECT,
4826 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4827 (integer_t)object, offset, size,
4828 (((should_return&1)<<1)|should_flush), prot);
4829
4830 /*
4831 * Check for bogus arguments.
4832 */
4833 if (object == VM_OBJECT_NULL)
4834 return (KERN_INVALID_ARGUMENT);
4835
4836 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4837 return (KERN_INVALID_ARGUMENT);
4838
4839 size = round_page_64(size);
4840
4841 /*
4842 * Lock the object, and acquire a paging reference to
4843 * prevent the memory_object reference from being released.
4844 */
4845 vm_object_lock(object);
4846 vm_object_paging_begin(object);
4847
4848 (void)vm_object_update(object,
4849 offset, size, NULL, NULL, should_return, flags, prot);
4850
4851 vm_object_paging_end(object);
4852 vm_object_unlock(object);
4853
4854 return (KERN_SUCCESS);
4855 }
4856
4857 /*
4858 * Empty a purgable object by grabbing the physical pages assigned to it and
4859 * putting them on the free queue without writing them to backing store, etc.
4860 * When the pages are next touched they will be demand zero-fill pages. We
4861 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
4862 * skip referenced/dirty pages, pages on the active queue, etc. We're more
4863 * than happy to grab these since this is a purgable object. We mark the
4864 * object as "empty" after reaping its pages.
4865 *
4866 * On entry the object and page queues are locked, the object must be a
4867 * purgable object with no delayed copies pending.
4868 */
4869 unsigned int
4870 vm_object_purge(vm_object_t object)
4871 {
4872 vm_page_t p, next;
4873 unsigned int num_purged_pages;
4874 vm_page_t local_freeq;
4875 unsigned long local_freed;
4876 int purge_loop_quota;
4877 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
4878 #define PURGE_BATCH_FREE_LIMIT 50
4879 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
4880 #define PURGE_LOOP_QUOTA 100
4881
4882 num_purged_pages = 0;
4883 if (object->purgable == VM_OBJECT_NONPURGABLE)
4884 return num_purged_pages;
4885
4886 object->purgable = VM_OBJECT_PURGABLE_EMPTY;
4887
4888 assert(object->copy == VM_OBJECT_NULL);
4889 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4890 purge_loop_quota = PURGE_LOOP_QUOTA;
4891
4892 local_freeq = VM_PAGE_NULL;
4893 local_freed = 0;
4894
4895 /*
4896 * Go through the object's resident pages and try and discard them.
4897 */
4898 next = (vm_page_t)queue_first(&object->memq);
4899 while (!queue_end(&object->memq, (queue_entry_t)next)) {
4900 p = next;
4901 next = (vm_page_t)queue_next(&next->listq);
4902
4903 if (purge_loop_quota-- == 0) {
4904 /*
4905 * Avoid holding the page queues lock for too long.
4906 * Let someone else take it for a while if needed.
4907 * Keep holding the object's lock to guarantee that
4908 * the object's page list doesn't change under us
4909 * while we yield.
4910 */
4911 if (local_freeq != VM_PAGE_NULL) {
4912 /*
4913 * Flush our queue of pages to free.
4914 */
4915 vm_page_free_list(local_freeq);
4916 local_freeq = VM_PAGE_NULL;
4917 local_freed = 0;
4918 }
4919 vm_page_unlock_queues();
4920 mutex_pause();
4921 vm_page_lock_queues();
4922
4923 /* resume with the current page and a new quota */
4924 purge_loop_quota = PURGE_LOOP_QUOTA;
4925 }
4926
4927
4928 if (p->busy || p->cleaning || p->laundry ||
4929 p->list_req_pending) {
4930 /* page is being acted upon, so don't mess with it */
4931 continue;
4932 }
4933 if (p->wire_count) {
4934 /* don't discard a wired page */
4935 continue;
4936 }
4937
4938 if (p->tabled) {
4939 /* clean up the object/offset table */
4940 vm_page_remove(p);
4941 }
4942 if (p->absent) {
4943 /* update the object's count of absent pages */
4944 vm_object_absent_release(object);
4945 }
4946
4947 /* we can discard this page */
4948
4949 /* advertize that this page is in a transition state */
4950 p->busy = TRUE;
4951
4952 if (p->no_isync == TRUE) {
4953 /* the page hasn't been mapped yet */
4954 /* (optimization to delay the i-cache sync) */
4955 } else {
4956 /* unmap the page */
4957 int refmod_state;
4958
4959 refmod_state = pmap_disconnect(p->phys_page);
4960 if (refmod_state & VM_MEM_MODIFIED) {
4961 p->dirty = TRUE;
4962 }
4963 }
4964
4965 if (p->dirty || p->precious) {
4966 /* we saved the cost of cleaning this page ! */
4967 num_purged_pages++;
4968 vm_page_purged_count++;
4969 }
4970
4971 /* remove page from active or inactive queue... */
4972 VM_PAGE_QUEUES_REMOVE(p);
4973
4974 /* ... and put it on our queue of pages to free */
4975 assert(!p->laundry);
4976 assert(p->object != kernel_object);
4977 assert(p->pageq.next == NULL &&
4978 p->pageq.prev == NULL);
4979 p->pageq.next = (queue_entry_t) local_freeq;
4980 local_freeq = p;
4981 if (++local_freed >= PURGE_BATCH_FREE_LIMIT) {
4982 /* flush our queue of pages to free */
4983 vm_page_free_list(local_freeq);
4984 local_freeq = VM_PAGE_NULL;
4985 local_freed = 0;
4986 }
4987 }
4988
4989 /* flush our local queue of pages to free one last time */
4990 if (local_freeq != VM_PAGE_NULL) {
4991 vm_page_free_list(local_freeq);
4992 local_freeq = VM_PAGE_NULL;
4993 local_freed = 0;
4994 }
4995
4996 return num_purged_pages;
4997 }
4998
4999 /*
5000 * vm_object_purgable_control() allows the caller to control and investigate the
5001 * state of a purgable object. A purgable object is created via a call to
5002 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will
5003 * never be coalesced with any other object -- even other purgable objects --
5004 * and will thus always remain a distinct object. A purgable object has
5005 * special semantics when its reference count is exactly 1. If its reference
5006 * count is greater than 1, then a purgable object will behave like a normal
5007 * object and attempts to use this interface will result in an error return
5008 * of KERN_INVALID_ARGUMENT.
5009 *
5010 * A purgable object may be put into a "volatile" state which will make the
5011 * object's pages elligable for being reclaimed without paging to backing
5012 * store if the system runs low on memory. If the pages in a volatile
5013 * purgable object are reclaimed, the purgable object is said to have been
5014 * "emptied." When a purgable object is emptied the system will reclaim as
5015 * many pages from the object as it can in a convenient manner (pages already
5016 * en route to backing store or busy for other reasons are left as is). When
5017 * a purgable object is made volatile, its pages will generally be reclaimed
5018 * before other pages in the application's working set. This semantic is
5019 * generally used by applications which can recreate the data in the object
5020 * faster than it can be paged in. One such example might be media assets
5021 * which can be reread from a much faster RAID volume.
5022 *
5023 * A purgable object may be designated as "non-volatile" which means it will
5024 * behave like all other objects in the system with pages being written to and
5025 * read from backing store as needed to satisfy system memory needs. If the
5026 * object was emptied before the object was made non-volatile, that fact will
5027 * be returned as the old state of the purgable object (see
5028 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
5029 * were reclaimed as part of emptying the object will be refaulted in as
5030 * zero-fill on demand. It is up to the application to note that an object
5031 * was emptied and recreate the objects contents if necessary. When a
5032 * purgable object is made non-volatile, its pages will generally not be paged
5033 * out to backing store in the immediate future. A purgable object may also
5034 * be manually emptied.
5035 *
5036 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
5037 * volatile purgable object may be queried at any time. This information may
5038 * be used as a control input to let the application know when the system is
5039 * experiencing memory pressure and is reclaiming memory.
5040 *
5041 * The specified address may be any address within the purgable object. If
5042 * the specified address does not represent any object in the target task's
5043 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
5044 * object containing the specified address is not a purgable object, then
5045 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
5046 * returned.
5047 *
5048 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
5049 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
5050 * state is used to set the new state of the purgable object and return its
5051 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable
5052 * object is returned in the parameter state.
5053 *
5054 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
5055 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
5056 * the non-volatile, volatile and volatile/empty states described above.
5057 * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
5058 * immediately reclaim as many pages in the object as can be conveniently
5059 * collected (some may have already been written to backing store or be
5060 * otherwise busy).
5061 *
5062 * The process of making a purgable object non-volatile and determining its
5063 * previous state is atomic. Thus, if a purgable object is made
5064 * VM_PURGABLE_NONVOLATILE and the old state is returned as
5065 * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
5066 * completely intact and will remain so until the object is made volatile
5067 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
5068 * was reclaimed while it was in a volatile state and its previous contents
5069 * have been lost.
5070 */
5071 /*
5072 * The object must be locked.
5073 */
5074 kern_return_t
5075 vm_object_purgable_control(
5076 vm_object_t object,
5077 vm_purgable_t control,
5078 int *state)
5079 {
5080 int old_state;
5081 vm_page_t p;
5082
5083 if (object == VM_OBJECT_NULL) {
5084 /*
5085 * Object must already be present or it can't be purgable.
5086 */
5087 return KERN_INVALID_ARGUMENT;
5088 }
5089
5090 /*
5091 * Get current state of the purgable object.
5092 */
5093 switch (object->purgable) {
5094 case VM_OBJECT_NONPURGABLE:
5095 return KERN_INVALID_ARGUMENT;
5096
5097 case VM_OBJECT_PURGABLE_NONVOLATILE:
5098 old_state = VM_PURGABLE_NONVOLATILE;
5099 break;
5100
5101 case VM_OBJECT_PURGABLE_VOLATILE:
5102 old_state = VM_PURGABLE_VOLATILE;
5103 break;
5104
5105 case VM_OBJECT_PURGABLE_EMPTY:
5106 old_state = VM_PURGABLE_EMPTY;
5107 break;
5108
5109 default:
5110 old_state = VM_PURGABLE_NONVOLATILE;
5111 panic("Bad state (%d) for purgable object!\n",
5112 object->purgable);
5113 /*NOTREACHED*/
5114 }
5115
5116 /* purgable cant have delayed copies - now or in the future */
5117 assert(object->copy == VM_OBJECT_NULL);
5118 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5119
5120 /*
5121 * Execute the desired operation.
5122 */
5123 if (control == VM_PURGABLE_GET_STATE) {
5124 *state = old_state;
5125 return KERN_SUCCESS;
5126 }
5127
5128 switch (*state) {
5129 case VM_PURGABLE_NONVOLATILE:
5130 vm_page_lock_queues();
5131 if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) {
5132 assert(vm_page_purgeable_count >=
5133 object->resident_page_count);
5134 vm_page_purgeable_count -= object->resident_page_count;
5135 }
5136
5137 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
5138
5139 /*
5140 * If the object wasn't emptied, then mark all pages of the
5141 * object as referenced in order to give them a complete turn
5142 * of the virtual memory "clock" before becoming candidates
5143 * for paging out (if the system is suffering from memory
5144 * pressure). We don't really need to set the pmap reference
5145 * bits (which would be expensive) since the software copies
5146 * are believed if they're set to true ...
5147 */
5148 if (old_state != VM_PURGABLE_EMPTY) {
5149 for (p = (vm_page_t)queue_first(&object->memq);
5150 !queue_end(&object->memq, (queue_entry_t)p);
5151 p = (vm_page_t)queue_next(&p->listq))
5152 p->reference = TRUE;
5153 }
5154
5155 vm_page_unlock_queues();
5156
5157 break;
5158
5159 case VM_PURGABLE_VOLATILE:
5160 vm_page_lock_queues();
5161
5162 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5163 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5164 vm_page_purgeable_count += object->resident_page_count;
5165 }
5166
5167 object->purgable = VM_OBJECT_PURGABLE_VOLATILE;
5168
5169 /*
5170 * We want the newly volatile purgable object to be a
5171 * candidate for the pageout scan before other pages in the
5172 * application if the system is suffering from memory
5173 * pressure. To do this, we move a page of the object from
5174 * the active queue onto the inactive queue in order to
5175 * promote the object for early reclaim. We only need to move
5176 * a single page since the pageout scan will reap the entire
5177 * purgable object if it finds a single page in a volatile
5178 * state. Obviously we don't do this if there are no pages
5179 * associated with the object or we find a page of the object
5180 * already on the inactive queue.
5181 */
5182 for (p = (vm_page_t)queue_first(&object->memq);
5183 !queue_end(&object->memq, (queue_entry_t)p);
5184 p = (vm_page_t)queue_next(&p->listq)) {
5185 if (p->inactive) {
5186 /* already a page on the inactive queue */
5187 break;
5188 }
5189 if (p->active && !p->busy) {
5190 /* found one we can move */
5191 vm_page_deactivate(p);
5192 break;
5193 }
5194 }
5195 vm_page_unlock_queues();
5196
5197 break;
5198
5199
5200 case VM_PURGABLE_EMPTY:
5201 vm_page_lock_queues();
5202 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5203 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5204 vm_page_purgeable_count += object->resident_page_count;
5205 }
5206 (void) vm_object_purge(object);
5207 vm_page_unlock_queues();
5208 break;
5209
5210 }
5211 *state = old_state;
5212
5213 return KERN_SUCCESS;
5214 }
5215
5216 #if TASK_SWAPPER
5217 /*
5218 * vm_object_res_deallocate
5219 *
5220 * (recursively) decrement residence counts on vm objects and their shadows.
5221 * Called from vm_object_deallocate and when swapping out an object.
5222 *
5223 * The object is locked, and remains locked throughout the function,
5224 * even as we iterate down the shadow chain. Locks on intermediate objects
5225 * will be dropped, but not the original object.
5226 *
5227 * NOTE: this function used to use recursion, rather than iteration.
5228 */
5229
5230 __private_extern__ void
5231 vm_object_res_deallocate(
5232 vm_object_t object)
5233 {
5234 vm_object_t orig_object = object;
5235 /*
5236 * Object is locked so it can be called directly
5237 * from vm_object_deallocate. Original object is never
5238 * unlocked.
5239 */
5240 assert(object->res_count > 0);
5241 while (--object->res_count == 0) {
5242 assert(object->ref_count >= object->res_count);
5243 vm_object_deactivate_all_pages(object);
5244 /* iterate on shadow, if present */
5245 if (object->shadow != VM_OBJECT_NULL) {
5246 vm_object_t tmp_object = object->shadow;
5247 vm_object_lock(tmp_object);
5248 if (object != orig_object)
5249 vm_object_unlock(object);
5250 object = tmp_object;
5251 assert(object->res_count > 0);
5252 } else
5253 break;
5254 }
5255 if (object != orig_object)
5256 vm_object_unlock(object);
5257 }
5258
5259 /*
5260 * vm_object_res_reference
5261 *
5262 * Internal function to increment residence count on a vm object
5263 * and its shadows. It is called only from vm_object_reference, and
5264 * when swapping in a vm object, via vm_map_swap.
5265 *
5266 * The object is locked, and remains locked throughout the function,
5267 * even as we iterate down the shadow chain. Locks on intermediate objects
5268 * will be dropped, but not the original object.
5269 *
5270 * NOTE: this function used to use recursion, rather than iteration.
5271 */
5272
5273 __private_extern__ void
5274 vm_object_res_reference(
5275 vm_object_t object)
5276 {
5277 vm_object_t orig_object = object;
5278 /*
5279 * Object is locked, so this can be called directly
5280 * from vm_object_reference. This lock is never released.
5281 */
5282 while ((++object->res_count == 1) &&
5283 (object->shadow != VM_OBJECT_NULL)) {
5284 vm_object_t tmp_object = object->shadow;
5285
5286 assert(object->ref_count >= object->res_count);
5287 vm_object_lock(tmp_object);
5288 if (object != orig_object)
5289 vm_object_unlock(object);
5290 object = tmp_object;
5291 }
5292 if (object != orig_object)
5293 vm_object_unlock(object);
5294 assert(orig_object->ref_count >= orig_object->res_count);
5295 }
5296 #endif /* TASK_SWAPPER */
5297
5298 /*
5299 * vm_object_reference:
5300 *
5301 * Gets another reference to the given object.
5302 */
5303 #ifdef vm_object_reference
5304 #undef vm_object_reference
5305 #endif
5306 __private_extern__ void
5307 vm_object_reference(
5308 register vm_object_t object)
5309 {
5310 if (object == VM_OBJECT_NULL)
5311 return;
5312
5313 vm_object_lock(object);
5314 assert(object->ref_count > 0);
5315 vm_object_reference_locked(object);
5316 vm_object_unlock(object);
5317 }
5318
5319 #ifdef MACH_BSD
5320 /*
5321 * Scale the vm_object_cache
5322 * This is required to make sure that the vm_object_cache is big
5323 * enough to effectively cache the mapped file.
5324 * This is really important with UBC as all the regular file vnodes
5325 * have memory object associated with them. Havving this cache too
5326 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5327 *
5328 * This is also needed as number of vnodes can be dynamically scaled.
5329 */
5330 kern_return_t
5331 adjust_vm_object_cache(
5332 __unused vm_size_t oval,
5333 vm_size_t nval)
5334 {
5335 vm_object_cached_max = nval;
5336 vm_object_cache_trim(FALSE);
5337 return (KERN_SUCCESS);
5338 }
5339 #endif /* MACH_BSD */
5340
5341
5342 /*
5343 * vm_object_transpose
5344 *
5345 * This routine takes two VM objects of the same size and exchanges
5346 * their backing store.
5347 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5348 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5349 *
5350 * The VM objects must not be locked by caller.
5351 */
5352 kern_return_t
5353 vm_object_transpose(
5354 vm_object_t object1,
5355 vm_object_t object2,
5356 vm_object_size_t transpose_size)
5357 {
5358 vm_object_t tmp_object;
5359 kern_return_t retval;
5360 boolean_t object1_locked, object2_locked;
5361 boolean_t object1_paging, object2_paging;
5362 vm_page_t page;
5363 vm_object_offset_t page_offset;
5364
5365 tmp_object = VM_OBJECT_NULL;
5366 object1_locked = FALSE; object2_locked = FALSE;
5367 object1_paging = FALSE; object2_paging = FALSE;
5368
5369 if (object1 == object2 ||
5370 object1 == VM_OBJECT_NULL ||
5371 object2 == VM_OBJECT_NULL) {
5372 /*
5373 * If the 2 VM objects are the same, there's
5374 * no point in exchanging their backing store.
5375 */
5376 retval = KERN_INVALID_VALUE;
5377 goto done;
5378 }
5379
5380 vm_object_lock(object1);
5381 object1_locked = TRUE;
5382 if (object1->copy || object1->shadow || object1->shadowed ||
5383 object1->purgable != VM_OBJECT_NONPURGABLE) {
5384 /*
5385 * We don't deal with copy or shadow objects (yet).
5386 */
5387 retval = KERN_INVALID_VALUE;
5388 goto done;
5389 }
5390 /*
5391 * Since we're about to mess with the object's backing store,
5392 * mark it as "paging_in_progress". Note that this is not enough
5393 * to prevent any paging activity on this object, so the caller should
5394 * have "quiesced" the objects beforehand, via a UPL operation with
5395 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5396 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5397 */
5398 vm_object_paging_begin(object1);
5399 object1_paging = TRUE;
5400 vm_object_unlock(object1);
5401 object1_locked = FALSE;
5402
5403 /*
5404 * Same as above for the 2nd object...
5405 */
5406 vm_object_lock(object2);
5407 object2_locked = TRUE;
5408 if (object2->copy || object2->shadow || object2->shadowed ||
5409 object2->purgable != VM_OBJECT_NONPURGABLE) {
5410 retval = KERN_INVALID_VALUE;
5411 goto done;
5412 }
5413 vm_object_paging_begin(object2);
5414 object2_paging = TRUE;
5415 vm_object_unlock(object2);
5416 object2_locked = FALSE;
5417
5418 /*
5419 * Allocate a temporary VM object to hold object1's contents
5420 * while we copy object2 to object1.
5421 */
5422 tmp_object = vm_object_allocate(transpose_size);
5423 vm_object_lock(tmp_object);
5424 vm_object_paging_begin(tmp_object);
5425 tmp_object->can_persist = FALSE;
5426
5427 /*
5428 * Since we need to lock both objects at the same time,
5429 * make sure we always lock them in the same order to
5430 * avoid deadlocks.
5431 */
5432 if (object1 < object2) {
5433 vm_object_lock(object1);
5434 vm_object_lock(object2);
5435 } else {
5436 vm_object_lock(object2);
5437 vm_object_lock(object1);
5438 }
5439 object1_locked = TRUE;
5440 object2_locked = TRUE;
5441
5442 if (object1->size != object2->size ||
5443 object1->size != transpose_size) {
5444 /*
5445 * If the 2 objects don't have the same size, we can't
5446 * exchange their backing stores or one would overflow.
5447 * If their size doesn't match the caller's
5448 * "transpose_size", we can't do it either because the
5449 * transpose operation will affect the entire span of
5450 * the objects.
5451 */
5452 retval = KERN_INVALID_VALUE;
5453 goto done;
5454 }
5455
5456
5457 /*
5458 * Transpose the lists of resident pages.
5459 */
5460 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
5461 /*
5462 * No pages in object1, just transfer pages
5463 * from object2 to object1. No need to go through
5464 * an intermediate object.
5465 */
5466 while (!queue_empty(&object2->memq)) {
5467 page = (vm_page_t) queue_first(&object2->memq);
5468 vm_page_rename(page, object1, page->offset);
5469 }
5470 assert(queue_empty(&object2->memq));
5471 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
5472 /*
5473 * No pages in object2, just transfer pages
5474 * from object1 to object2. No need to go through
5475 * an intermediate object.
5476 */
5477 while (!queue_empty(&object1->memq)) {
5478 page = (vm_page_t) queue_first(&object1->memq);
5479 vm_page_rename(page, object2, page->offset);
5480 }
5481 assert(queue_empty(&object1->memq));
5482 } else {
5483 /* transfer object1's pages to tmp_object */
5484 vm_page_lock_queues();
5485 while (!queue_empty(&object1->memq)) {
5486 page = (vm_page_t) queue_first(&object1->memq);
5487 page_offset = page->offset;
5488 vm_page_remove(page);
5489 page->offset = page_offset;
5490 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
5491 }
5492 vm_page_unlock_queues();
5493 assert(queue_empty(&object1->memq));
5494 /* transfer object2's pages to object1 */
5495 while (!queue_empty(&object2->memq)) {
5496 page = (vm_page_t) queue_first(&object2->memq);
5497 vm_page_rename(page, object1, page->offset);
5498 }
5499 assert(queue_empty(&object2->memq));
5500 /* transfer tmp_object's pages to object1 */
5501 while (!queue_empty(&tmp_object->memq)) {
5502 page = (vm_page_t) queue_first(&tmp_object->memq);
5503 queue_remove(&tmp_object->memq, page,
5504 vm_page_t, listq);
5505 vm_page_insert(page, object2, page->offset);
5506 }
5507 assert(queue_empty(&tmp_object->memq));
5508 }
5509
5510 /* no need to transpose the size: they should be identical */
5511 assert(object1->size == object2->size);
5512
5513 #define __TRANSPOSE_FIELD(field) \
5514 MACRO_BEGIN \
5515 tmp_object->field = object1->field; \
5516 object1->field = object2->field; \
5517 object2->field = tmp_object->field; \
5518 MACRO_END
5519
5520 assert(!object1->copy);
5521 assert(!object2->copy);
5522
5523 assert(!object1->shadow);
5524 assert(!object2->shadow);
5525
5526 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
5527 __TRANSPOSE_FIELD(pager);
5528 __TRANSPOSE_FIELD(paging_offset);
5529
5530 __TRANSPOSE_FIELD(pager_control);
5531 /* update the memory_objects' pointers back to the VM objects */
5532 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5533 memory_object_control_collapse(object1->pager_control,
5534 object1);
5535 }
5536 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5537 memory_object_control_collapse(object2->pager_control,
5538 object2);
5539 }
5540
5541 __TRANSPOSE_FIELD(absent_count);
5542
5543 assert(object1->paging_in_progress);
5544 assert(object2->paging_in_progress);
5545
5546 __TRANSPOSE_FIELD(pager_created);
5547 __TRANSPOSE_FIELD(pager_initialized);
5548 __TRANSPOSE_FIELD(pager_ready);
5549 __TRANSPOSE_FIELD(pager_trusted);
5550 __TRANSPOSE_FIELD(internal);
5551 __TRANSPOSE_FIELD(temporary);
5552 __TRANSPOSE_FIELD(private);
5553 __TRANSPOSE_FIELD(pageout);
5554 __TRANSPOSE_FIELD(true_share);
5555 __TRANSPOSE_FIELD(phys_contiguous);
5556 __TRANSPOSE_FIELD(nophyscache);
5557 __TRANSPOSE_FIELD(last_alloc);
5558 __TRANSPOSE_FIELD(sequential);
5559 __TRANSPOSE_FIELD(cluster_size);
5560 __TRANSPOSE_FIELD(existence_map);
5561 __TRANSPOSE_FIELD(cow_hint);
5562 __TRANSPOSE_FIELD(wimg_bits);
5563
5564 #undef __TRANSPOSE_FIELD
5565
5566 retval = KERN_SUCCESS;
5567
5568 done:
5569 /*
5570 * Cleanup.
5571 */
5572 if (tmp_object != VM_OBJECT_NULL) {
5573 vm_object_paging_end(tmp_object);
5574 vm_object_unlock(tmp_object);
5575 /*
5576 * Re-initialize the temporary object to avoid
5577 * deallocating a real pager.
5578 */
5579 _vm_object_allocate(transpose_size, tmp_object);
5580 vm_object_deallocate(tmp_object);
5581 tmp_object = VM_OBJECT_NULL;
5582 }
5583
5584 if (object1_locked) {
5585 vm_object_unlock(object1);
5586 object1_locked = FALSE;
5587 }
5588 if (object2_locked) {
5589 vm_object_unlock(object2);
5590 object2_locked = FALSE;
5591 }
5592 if (object1_paging) {
5593 vm_object_lock(object1);
5594 vm_object_paging_end(object1);
5595 vm_object_unlock(object1);
5596 object1_paging = FALSE;
5597 }
5598 if (object2_paging) {
5599 vm_object_lock(object2);
5600 vm_object_paging_end(object2);
5601 vm_object_unlock(object2);
5602 object2_paging = FALSE;
5603 }
5604
5605 return retval;
5606 }