]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
xnu-344.49.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * @OSF_COPYRIGHT@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53 /*
54 */
55 /*
56 * File: vm/vm_object.c
57 * Author: Avadis Tevanian, Jr., Michael Wayne Young
58 *
59 * Virtual memory object module.
60 */
61
62 #ifdef MACH_BSD
63 /* remove as part of compoenent support merge */
64 extern int vnode_pager_workaround;
65 #endif
66
67 #include <mach_pagemap.h>
68 #include <task_swapper.h>
69
70 #include <mach/mach_types.h>
71 #include <mach/memory_object.h>
72 #include <mach/memory_object_default.h>
73 #include <mach/memory_object_control_server.h>
74 #include <mach/vm_param.h>
75 #include <ipc/ipc_port.h>
76 #include <kern/assert.h>
77 #include <kern/lock.h>
78 #include <kern/queue.h>
79 #include <kern/xpr.h>
80 #include <kern/zalloc.h>
81 #include <kern/host.h>
82 #include <kern/host_statistics.h>
83 #include <kern/processor.h>
84 #include <vm/memory_object.h>
85 #include <vm/vm_fault.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_pageout.h>
90 #include <kern/misc_protos.h>
91
92
93
94 /*
95 * Virtual memory objects maintain the actual data
96 * associated with allocated virtual memory. A given
97 * page of memory exists within exactly one object.
98 *
99 * An object is only deallocated when all "references"
100 * are given up.
101 *
102 * Associated with each object is a list of all resident
103 * memory pages belonging to that object; this list is
104 * maintained by the "vm_page" module, but locked by the object's
105 * lock.
106 *
107 * Each object also records the memory object reference
108 * that is used by the kernel to request and write
109 * back data (the memory object, field "pager"), etc...
110 *
111 * Virtual memory objects are allocated to provide
112 * zero-filled memory (vm_allocate) or map a user-defined
113 * memory object into a virtual address space (vm_map).
114 *
115 * Virtual memory objects that refer to a user-defined
116 * memory object are called "permanent", because all changes
117 * made in virtual memory are reflected back to the
118 * memory manager, which may then store it permanently.
119 * Other virtual memory objects are called "temporary",
120 * meaning that changes need be written back only when
121 * necessary to reclaim pages, and that storage associated
122 * with the object can be discarded once it is no longer
123 * mapped.
124 *
125 * A permanent memory object may be mapped into more
126 * than one virtual address space. Moreover, two threads
127 * may attempt to make the first mapping of a memory
128 * object concurrently. Only one thread is allowed to
129 * complete this mapping; all others wait for the
130 * "pager_initialized" field is asserted, indicating
131 * that the first thread has initialized all of the
132 * necessary fields in the virtual memory object structure.
133 *
134 * The kernel relies on a *default memory manager* to
135 * provide backing storage for the zero-filled virtual
136 * memory objects. The pager memory objects associated
137 * with these temporary virtual memory objects are only
138 * requested from the default memory manager when it
139 * becomes necessary. Virtual memory objects
140 * that depend on the default memory manager are called
141 * "internal". The "pager_created" field is provided to
142 * indicate whether these ports have ever been allocated.
143 *
144 * The kernel may also create virtual memory objects to
145 * hold changed pages after a copy-on-write operation.
146 * In this case, the virtual memory object (and its
147 * backing storage -- its memory object) only contain
148 * those pages that have been changed. The "shadow"
149 * field refers to the virtual memory object that contains
150 * the remainder of the contents. The "shadow_offset"
151 * field indicates where in the "shadow" these contents begin.
152 * The "copy" field refers to a virtual memory object
153 * to which changed pages must be copied before changing
154 * this object, in order to implement another form
155 * of copy-on-write optimization.
156 *
157 * The virtual memory object structure also records
158 * the attributes associated with its memory object.
159 * The "pager_ready", "can_persist" and "copy_strategy"
160 * fields represent those attributes. The "cached_list"
161 * field is used in the implementation of the persistence
162 * attribute.
163 *
164 * ZZZ Continue this comment.
165 */
166
167 /* Forward declarations for internal functions. */
168 static void _vm_object_allocate(
169 vm_object_size_t size,
170 vm_object_t object);
171
172 static kern_return_t vm_object_terminate(
173 vm_object_t object);
174
175 extern void vm_object_remove(
176 vm_object_t object);
177
178 static vm_object_t vm_object_cache_trim(
179 boolean_t called_from_vm_object_deallocate);
180
181 static void vm_object_deactivate_all_pages(
182 vm_object_t object);
183
184 static void vm_object_abort_activity(
185 vm_object_t object);
186
187 static kern_return_t vm_object_copy_call(
188 vm_object_t src_object,
189 vm_object_offset_t src_offset,
190 vm_object_size_t size,
191 vm_object_t *_result_object);
192
193 static void vm_object_do_collapse(
194 vm_object_t object,
195 vm_object_t backing_object);
196
197 static void vm_object_do_bypass(
198 vm_object_t object,
199 vm_object_t backing_object);
200
201 static void vm_object_release_pager(
202 memory_object_t pager);
203
204 static zone_t vm_object_zone; /* vm backing store zone */
205
206 /*
207 * All wired-down kernel memory belongs to a single virtual
208 * memory object (kernel_object) to avoid wasting data structures.
209 */
210 static struct vm_object kernel_object_store;
211 __private_extern__ vm_object_t kernel_object = &kernel_object_store;
212
213 /*
214 * The submap object is used as a placeholder for vm_map_submap
215 * operations. The object is declared in vm_map.c because it
216 * is exported by the vm_map module. The storage is declared
217 * here because it must be initialized here.
218 */
219 static struct vm_object vm_submap_object_store;
220
221 /*
222 * Virtual memory objects are initialized from
223 * a template (see vm_object_allocate).
224 *
225 * When adding a new field to the virtual memory
226 * object structure, be sure to add initialization
227 * (see _vm_object_allocate()).
228 */
229 static struct vm_object vm_object_template;
230
231 /*
232 * Virtual memory objects that are not referenced by
233 * any address maps, but that are allowed to persist
234 * (an attribute specified by the associated memory manager),
235 * are kept in a queue (vm_object_cached_list).
236 *
237 * When an object from this queue is referenced again,
238 * for example to make another address space mapping,
239 * it must be removed from the queue. That is, the
240 * queue contains *only* objects with zero references.
241 *
242 * The kernel may choose to terminate objects from this
243 * queue in order to reclaim storage. The current policy
244 * is to permit a fixed maximum number of unreferenced
245 * objects (vm_object_cached_max).
246 *
247 * A spin lock (accessed by routines
248 * vm_object_cache_{lock,lock_try,unlock}) governs the
249 * object cache. It must be held when objects are
250 * added to or removed from the cache (in vm_object_terminate).
251 * The routines that acquire a reference to a virtual
252 * memory object based on one of the memory object ports
253 * must also lock the cache.
254 *
255 * Ideally, the object cache should be more isolated
256 * from the reference mechanism, so that the lock need
257 * not be held to make simple references.
258 */
259 static queue_head_t vm_object_cached_list;
260 static int vm_object_cached_count=0;
261 static int vm_object_cached_high; /* highest # cached objects */
262 static int vm_object_cached_max = 512; /* may be patched*/
263
264 static decl_mutex_data(,vm_object_cached_lock_data)
265
266 #define vm_object_cache_lock() \
267 mutex_lock(&vm_object_cached_lock_data)
268 #define vm_object_cache_lock_try() \
269 mutex_try(&vm_object_cached_lock_data)
270 #define vm_object_cache_unlock() \
271 mutex_unlock(&vm_object_cached_lock_data)
272
273 #define VM_OBJECT_HASH_COUNT 1024
274 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
275 static struct zone *vm_object_hash_zone;
276
277 struct vm_object_hash_entry {
278 queue_chain_t hash_link; /* hash chain link */
279 memory_object_t pager; /* pager we represent */
280 vm_object_t object; /* corresponding object */
281 boolean_t waiting; /* someone waiting for
282 * termination */
283 };
284
285 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
286 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
287
288 #define VM_OBJECT_HASH_SHIFT 8
289 #define vm_object_hash(pager) \
290 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
291
292 /*
293 * vm_object_hash_lookup looks up a pager in the hashtable
294 * and returns the corresponding entry, with optional removal.
295 */
296
297 static vm_object_hash_entry_t
298 vm_object_hash_lookup(
299 memory_object_t pager,
300 boolean_t remove_entry)
301 {
302 register queue_t bucket;
303 register vm_object_hash_entry_t entry;
304
305 bucket = &vm_object_hashtable[vm_object_hash(pager)];
306
307 entry = (vm_object_hash_entry_t)queue_first(bucket);
308 while (!queue_end(bucket, (queue_entry_t)entry)) {
309 if (entry->pager == pager && !remove_entry)
310 return(entry);
311 else if (entry->pager == pager) {
312 queue_remove(bucket, entry,
313 vm_object_hash_entry_t, hash_link);
314 return(entry);
315 }
316
317 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
318 }
319
320 return(VM_OBJECT_HASH_ENTRY_NULL);
321 }
322
323 /*
324 * vm_object_hash_enter enters the specified
325 * pager / cache object association in the hashtable.
326 */
327
328 static void
329 vm_object_hash_insert(
330 vm_object_hash_entry_t entry)
331 {
332 register queue_t bucket;
333
334 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
335
336 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
337 }
338
339 static vm_object_hash_entry_t
340 vm_object_hash_entry_alloc(
341 memory_object_t pager)
342 {
343 vm_object_hash_entry_t entry;
344
345 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
346 entry->pager = pager;
347 entry->object = VM_OBJECT_NULL;
348 entry->waiting = FALSE;
349
350 return(entry);
351 }
352
353 void
354 vm_object_hash_entry_free(
355 vm_object_hash_entry_t entry)
356 {
357 zfree(vm_object_hash_zone, (vm_offset_t)entry);
358 }
359
360 /*
361 * vm_object_allocate:
362 *
363 * Returns a new object with the given size.
364 */
365
366 static void
367 _vm_object_allocate(
368 vm_object_size_t size,
369 vm_object_t object)
370 {
371 XPR(XPR_VM_OBJECT,
372 "vm_object_allocate, object 0x%X size 0x%X\n",
373 (integer_t)object, size, 0,0,0);
374
375 *object = vm_object_template;
376 queue_init(&object->memq);
377 queue_init(&object->msr_q);
378 #ifdef UBC_DEBUG
379 queue_init(&object->uplq);
380 #endif /* UBC_DEBUG */
381 vm_object_lock_init(object);
382 object->size = size;
383 }
384
385 __private_extern__ vm_object_t
386 vm_object_allocate(
387 vm_object_size_t size)
388 {
389 register vm_object_t object;
390
391 object = (vm_object_t) zalloc(vm_object_zone);
392
393 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
394
395 if (object != VM_OBJECT_NULL)
396 _vm_object_allocate(size, object);
397
398 return object;
399 }
400
401 /*
402 * vm_object_bootstrap:
403 *
404 * Initialize the VM objects module.
405 */
406 __private_extern__ void
407 vm_object_bootstrap(void)
408 {
409 register i;
410
411 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
412 round_page(512*1024),
413 round_page(12*1024),
414 "vm objects");
415
416 queue_init(&vm_object_cached_list);
417 mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE);
418
419 vm_object_hash_zone =
420 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
421 round_page(512*1024),
422 round_page(12*1024),
423 "vm object hash entries");
424
425 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
426 queue_init(&vm_object_hashtable[i]);
427
428 /*
429 * Fill in a template object, for quick initialization
430 */
431
432 /* memq; Lock; init after allocation */
433 vm_object_template.size = 0;
434 vm_object_template.frozen_size = 0;
435 vm_object_template.ref_count = 1;
436 #if TASK_SWAPPER
437 vm_object_template.res_count = 1;
438 #endif /* TASK_SWAPPER */
439 vm_object_template.resident_page_count = 0;
440 vm_object_template.copy = VM_OBJECT_NULL;
441 vm_object_template.shadow = VM_OBJECT_NULL;
442 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
443 vm_object_template.cow_hint = 0;
444 vm_object_template.true_share = FALSE;
445
446 vm_object_template.pager = MEMORY_OBJECT_NULL;
447 vm_object_template.paging_offset = 0;
448 vm_object_template.pager_request = PAGER_REQUEST_NULL;
449 /* msr_q; init after allocation */
450
451 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
452 vm_object_template.absent_count = 0;
453 vm_object_template.paging_in_progress = 0;
454
455 /* Begin bitfields */
456 vm_object_template.all_wanted = 0; /* all bits FALSE */
457 vm_object_template.pager_created = FALSE;
458 vm_object_template.pager_initialized = FALSE;
459 vm_object_template.pager_ready = FALSE;
460 vm_object_template.pager_trusted = FALSE;
461 vm_object_template.can_persist = FALSE;
462 vm_object_template.internal = TRUE;
463 vm_object_template.temporary = TRUE;
464 vm_object_template.private = FALSE;
465 vm_object_template.pageout = FALSE;
466 vm_object_template.alive = TRUE;
467 vm_object_template.lock_in_progress = FALSE;
468 vm_object_template.lock_restart = FALSE;
469 vm_object_template.silent_overwrite = FALSE;
470 vm_object_template.advisory_pageout = FALSE;
471 vm_object_template.shadowed = FALSE;
472 vm_object_template.terminating = FALSE;
473 vm_object_template.shadow_severed = FALSE;
474 vm_object_template.phys_contiguous = FALSE;
475 vm_object_template.nophyscache = FALSE;
476 /* End bitfields */
477
478 /* cache bitfields */
479 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
480
481 /* cached_list; init after allocation */
482 vm_object_template.last_alloc = (vm_object_offset_t) 0;
483 vm_object_template.cluster_size = 0;
484 #if MACH_PAGEMAP
485 vm_object_template.existence_map = VM_EXTERNAL_NULL;
486 #endif /* MACH_PAGEMAP */
487 #if MACH_ASSERT
488 vm_object_template.paging_object = VM_OBJECT_NULL;
489 #endif /* MACH_ASSERT */
490
491 /*
492 * Initialize the "kernel object"
493 */
494
495 kernel_object = &kernel_object_store;
496
497 /*
498 * Note that in the following size specifications, we need to add 1 because
499 * VM_MAX_KERNEL_ADDRESS is a maximum address, not a size.
500 */
501 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
502 kernel_object);
503
504 /*
505 * Initialize the "submap object". Make it as large as the
506 * kernel object so that no limit is imposed on submap sizes.
507 */
508
509 vm_submap_object = &vm_submap_object_store;
510 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
511 vm_submap_object);
512 /*
513 * Create an "extra" reference to this object so that we never
514 * try to deallocate it; zfree doesn't like to be called with
515 * non-zone memory.
516 */
517 vm_object_reference(vm_submap_object);
518
519 #if MACH_PAGEMAP
520 vm_external_module_initialize();
521 #endif /* MACH_PAGEMAP */
522 }
523
524 __private_extern__ void
525 vm_object_init(void)
526 {
527 /*
528 * Finish initializing the kernel object.
529 */
530 }
531
532 /* remove the typedef below when emergency work-around is taken out */
533 typedef struct vnode_pager {
534 memory_object_t pager;
535 memory_object_t pager_handle; /* pager */
536 memory_object_control_t control_handle; /* memory object's control handle */
537 void *vnode_handle; /* vnode handle */
538 } *vnode_pager_t;
539
540 #define MIGHT_NOT_CACHE_SHADOWS 1
541 #if MIGHT_NOT_CACHE_SHADOWS
542 static int cache_shadows = TRUE;
543 #endif /* MIGHT_NOT_CACHE_SHADOWS */
544
545 /*
546 * vm_object_deallocate:
547 *
548 * Release a reference to the specified object,
549 * gained either through a vm_object_allocate
550 * or a vm_object_reference call. When all references
551 * are gone, storage associated with this object
552 * may be relinquished.
553 *
554 * No object may be locked.
555 */
556 __private_extern__ void
557 vm_object_deallocate(
558 register vm_object_t object)
559 {
560 boolean_t retry_cache_trim = FALSE;
561 vm_object_t shadow;
562
563 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
564 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
565
566
567 while (object != VM_OBJECT_NULL) {
568
569 /*
570 * The cache holds a reference (uncounted) to
571 * the object; we must lock it before removing
572 * the object.
573 */
574
575 vm_object_cache_lock();
576 vm_object_lock(object);
577
578 assert(object->ref_count > 0);
579
580 /*
581 * If the object has a named reference, and only
582 * that reference would remain, inform the pager
583 * about the last "mapping" reference going away.
584 */
585 if ((object->ref_count == 2) && (object->named)) {
586 memory_object_t pager = object->pager;
587
588 /* Notify the Pager that there are no */
589 /* more mappers for this object */
590
591 if (pager != MEMORY_OBJECT_NULL) {
592 vm_object_unlock(object);
593 vm_object_cache_unlock();
594
595 memory_object_unmap(pager);
596
597 vm_object_cache_lock();
598 vm_object_lock(object);
599 assert(object->ref_count > 0);
600 }
601 }
602
603 /*
604 * Lose the reference. If other references
605 * remain, then we are done, unless we need
606 * to retry a cache trim.
607 * If it is the last reference, then keep it
608 * until any pending initialization is completed.
609 */
610
611 /* if the object is terminating, it cannot go into */
612 /* the cache and we obviously should not call */
613 /* terminate again. */
614
615 if ((object->ref_count > 1) || object->terminating) {
616 object->ref_count--;
617 vm_object_res_deallocate(object);
618 vm_object_unlock(object);
619 vm_object_cache_unlock();
620 if (retry_cache_trim &&
621 ((object = vm_object_cache_trim(TRUE)) !=
622 VM_OBJECT_NULL)) {
623 continue;
624 }
625 return;
626 }
627
628 /*
629 * We have to wait for initialization
630 * before destroying or caching the object.
631 */
632
633 if (object->pager_created && ! object->pager_initialized) {
634 assert(! object->can_persist);
635 vm_object_assert_wait(object,
636 VM_OBJECT_EVENT_INITIALIZED,
637 THREAD_UNINT);
638 vm_object_unlock(object);
639 vm_object_cache_unlock();
640 thread_block(THREAD_CONTINUE_NULL);
641 continue;
642 }
643
644 /*
645 * If this object can persist, then enter it in
646 * the cache. Otherwise, terminate it.
647 *
648 * NOTE: Only permanent objects are cached, and
649 * permanent objects cannot have shadows. This
650 * affects the residence counting logic in a minor
651 * way (can do it in-line, mostly).
652 */
653
654 if ((object->can_persist) && (object->alive)) {
655 /*
656 * Now it is safe to decrement reference count,
657 * and to return if reference count is > 0.
658 */
659 if (--object->ref_count > 0) {
660 vm_object_res_deallocate(object);
661 vm_object_unlock(object);
662 vm_object_cache_unlock();
663 if (retry_cache_trim &&
664 ((object = vm_object_cache_trim(TRUE)) !=
665 VM_OBJECT_NULL)) {
666 continue;
667 }
668 return;
669 }
670
671 #if MIGHT_NOT_CACHE_SHADOWS
672 /*
673 * Remove shadow now if we don't
674 * want to cache shadows.
675 */
676 if (! cache_shadows) {
677 shadow = object->shadow;
678 object->shadow = VM_OBJECT_NULL;
679 }
680 #endif /* MIGHT_NOT_CACHE_SHADOWS */
681
682 /*
683 * Enter the object onto the queue of
684 * cached objects, and deactivate
685 * all of its pages.
686 */
687 assert(object->shadow == VM_OBJECT_NULL);
688 VM_OBJ_RES_DECR(object);
689 XPR(XPR_VM_OBJECT,
690 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
691 (integer_t)object,
692 (integer_t)vm_object_cached_list.next,
693 (integer_t)vm_object_cached_list.prev,0,0);
694
695 vm_object_cached_count++;
696 if (vm_object_cached_count > vm_object_cached_high)
697 vm_object_cached_high = vm_object_cached_count;
698 queue_enter(&vm_object_cached_list, object,
699 vm_object_t, cached_list);
700 vm_object_cache_unlock();
701 vm_object_deactivate_all_pages(object);
702 vm_object_unlock(object);
703
704 #if MIGHT_NOT_CACHE_SHADOWS
705 /*
706 * If we have a shadow that we need
707 * to deallocate, do so now, remembering
708 * to trim the cache later.
709 */
710 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
711 object = shadow;
712 retry_cache_trim = TRUE;
713 continue;
714 }
715 #endif /* MIGHT_NOT_CACHE_SHADOWS */
716
717 /*
718 * Trim the cache. If the cache trim
719 * returns with a shadow for us to deallocate,
720 * then remember to retry the cache trim
721 * when we are done deallocating the shadow.
722 * Otherwise, we are done.
723 */
724
725 object = vm_object_cache_trim(TRUE);
726 if (object == VM_OBJECT_NULL) {
727 return;
728 }
729 retry_cache_trim = TRUE;
730
731 } else {
732 /*
733 * This object is not cachable; terminate it.
734 */
735 XPR(XPR_VM_OBJECT,
736 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n",
737 (integer_t)object, object->resident_page_count,
738 object->paging_in_progress,
739 (natural_t)current_thread(),object->ref_count);
740
741 VM_OBJ_RES_DECR(object); /* XXX ? */
742 /*
743 * Terminate this object. If it had a shadow,
744 * then deallocate it; otherwise, if we need
745 * to retry a cache trim, do so now; otherwise,
746 * we are done. "pageout" objects have a shadow,
747 * but maintain a "paging reference" rather than
748 * a normal reference.
749 */
750 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
751 if(vm_object_terminate(object) != KERN_SUCCESS) {
752 return;
753 }
754 if (shadow != VM_OBJECT_NULL) {
755 object = shadow;
756 continue;
757 }
758 if (retry_cache_trim &&
759 ((object = vm_object_cache_trim(TRUE)) !=
760 VM_OBJECT_NULL)) {
761 continue;
762 }
763 return;
764 }
765 }
766 assert(! retry_cache_trim);
767 }
768
769 /*
770 * Check to see whether we really need to trim
771 * down the cache. If so, remove an object from
772 * the cache, terminate it, and repeat.
773 *
774 * Called with, and returns with, cache lock unlocked.
775 */
776 vm_object_t
777 vm_object_cache_trim(
778 boolean_t called_from_vm_object_deallocate)
779 {
780 register vm_object_t object = VM_OBJECT_NULL;
781 vm_object_t shadow;
782
783 for (;;) {
784
785 /*
786 * If we no longer need to trim the cache,
787 * then we are done.
788 */
789
790 vm_object_cache_lock();
791 if (vm_object_cached_count <= vm_object_cached_max) {
792 vm_object_cache_unlock();
793 return VM_OBJECT_NULL;
794 }
795
796 /*
797 * We must trim down the cache, so remove
798 * the first object in the cache.
799 */
800 XPR(XPR_VM_OBJECT,
801 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
802 (integer_t)vm_object_cached_list.next,
803 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
804
805 object = (vm_object_t) queue_first(&vm_object_cached_list);
806 if(object == (vm_object_t) &vm_object_cached_list) {
807 /* something's wrong with the calling parameter or */
808 /* the value of vm_object_cached_count, just fix */
809 /* and return */
810 if(vm_object_cached_max < 0)
811 vm_object_cached_max = 0;
812 vm_object_cached_count = 0;
813 vm_object_cache_unlock();
814 return VM_OBJECT_NULL;
815 }
816 vm_object_lock(object);
817 queue_remove(&vm_object_cached_list, object, vm_object_t,
818 cached_list);
819 vm_object_cached_count--;
820
821 /*
822 * Since this object is in the cache, we know
823 * that it is initialized and has no references.
824 * Take a reference to avoid recursive deallocations.
825 */
826
827 assert(object->pager_initialized);
828 assert(object->ref_count == 0);
829 object->ref_count++;
830
831 /*
832 * Terminate the object.
833 * If the object had a shadow, we let vm_object_deallocate
834 * deallocate it. "pageout" objects have a shadow, but
835 * maintain a "paging reference" rather than a normal
836 * reference.
837 * (We are careful here to limit recursion.)
838 */
839 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
840 if(vm_object_terminate(object) != KERN_SUCCESS)
841 continue;
842 if (shadow != VM_OBJECT_NULL) {
843 if (called_from_vm_object_deallocate) {
844 return shadow;
845 } else {
846 vm_object_deallocate(shadow);
847 }
848 }
849 }
850 }
851
852 boolean_t vm_object_terminate_remove_all = FALSE;
853
854 /*
855 * Routine: vm_object_terminate
856 * Purpose:
857 * Free all resources associated with a vm_object.
858 * In/out conditions:
859 * Upon entry, the object must be locked,
860 * and the object must have exactly one reference.
861 *
862 * The shadow object reference is left alone.
863 *
864 * The object must be unlocked if its found that pages
865 * must be flushed to a backing object. If someone
866 * manages to map the object while it is being flushed
867 * the object is returned unlocked and unchanged. Otherwise,
868 * upon exit, the cache will be unlocked, and the
869 * object will cease to exist.
870 */
871 static kern_return_t
872 vm_object_terminate(
873 register vm_object_t object)
874 {
875 memory_object_t pager;
876 register vm_page_t p;
877 vm_object_t shadow_object;
878
879 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
880 (integer_t)object, object->ref_count, 0, 0, 0);
881
882 if (!object->pageout && (!object->temporary || object->can_persist)
883 && (object->pager != NULL || object->shadow_severed)) {
884 vm_object_cache_unlock();
885 while (!queue_empty(&object->memq)) {
886 /*
887 * Clear pager_trusted bit so that the pages get yanked
888 * out of the object instead of cleaned in place. This
889 * prevents a deadlock in XMM and makes more sense anyway.
890 */
891 object->pager_trusted = FALSE;
892
893 p = (vm_page_t) queue_first(&object->memq);
894
895 VM_PAGE_CHECK(p);
896
897 if (p->busy || p->cleaning) {
898 if(p->cleaning || p->absent) {
899 vm_object_paging_wait(object, THREAD_UNINT);
900 continue;
901 } else {
902 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
903 }
904 }
905
906 vm_page_lock_queues();
907 VM_PAGE_QUEUES_REMOVE(p);
908 vm_page_unlock_queues();
909
910 if (p->absent || p->private) {
911
912 /*
913 * For private pages, VM_PAGE_FREE just
914 * leaves the page structure around for
915 * its owner to clean up. For absent
916 * pages, the structure is returned to
917 * the appropriate pool.
918 */
919
920 goto free_page;
921 }
922
923 if (p->fictitious)
924 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
925
926 if (!p->dirty)
927 p->dirty = pmap_is_modified(p->phys_addr);
928
929 if ((p->dirty || p->precious) && !p->error && object->alive) {
930 p->busy = TRUE;
931 vm_object_paging_begin(object);
932 /* protect the object from re-use/caching while it */
933 /* is unlocked */
934 vm_object_unlock(object);
935 vm_pageout_cluster(p); /* flush page */
936 vm_object_lock(object);
937 vm_object_paging_wait(object, THREAD_UNINT);
938 XPR(XPR_VM_OBJECT,
939 "vm_object_terminate restart, object 0x%X ref %d\n",
940 (integer_t)object, object->ref_count, 0, 0, 0);
941 } else {
942 free_page:
943 VM_PAGE_FREE(p);
944 }
945 }
946 vm_object_unlock(object);
947 vm_object_cache_lock();
948 vm_object_lock(object);
949 }
950
951 /*
952 * Make sure the object isn't already being terminated
953 */
954 if(object->terminating) {
955 object->ref_count -= 1;
956 assert(object->ref_count > 0);
957 vm_object_cache_unlock();
958 vm_object_unlock(object);
959 return KERN_FAILURE;
960 }
961
962 /*
963 * Did somebody get a reference to the object while we were
964 * cleaning it?
965 */
966 if(object->ref_count != 1) {
967 object->ref_count -= 1;
968 assert(object->ref_count > 0);
969 vm_object_res_deallocate(object);
970 vm_object_cache_unlock();
971 vm_object_unlock(object);
972 return KERN_FAILURE;
973 }
974
975 /*
976 * Make sure no one can look us up now.
977 */
978
979 object->terminating = TRUE;
980 object->alive = FALSE;
981 vm_object_remove(object);
982
983 /*
984 * Detach the object from its shadow if we are the shadow's
985 * copy.
986 */
987 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
988 !(object->pageout)) {
989 vm_object_lock(shadow_object);
990 assert((shadow_object->copy == object) ||
991 (shadow_object->copy == VM_OBJECT_NULL));
992 shadow_object->copy = VM_OBJECT_NULL;
993 vm_object_unlock(shadow_object);
994 }
995
996 /*
997 * The pageout daemon might be playing with our pages.
998 * Now that the object is dead, it won't touch any more
999 * pages, but some pages might already be on their way out.
1000 * Hence, we wait until the active paging activities have ceased
1001 * before we break the association with the pager itself.
1002 */
1003 while (object->paging_in_progress != 0) {
1004 vm_object_cache_unlock();
1005 vm_object_wait(object,
1006 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1007 THREAD_UNINT);
1008 vm_object_cache_lock();
1009 vm_object_lock(object);
1010 }
1011
1012 pager = object->pager;
1013 object->pager = MEMORY_OBJECT_NULL;
1014
1015 if (pager != MEMORY_OBJECT_NULL)
1016 memory_object_control_disable(object->pager_request);
1017 vm_object_cache_unlock();
1018
1019 object->ref_count--;
1020 #if TASK_SWAPPER
1021 assert(object->res_count == 0);
1022 #endif /* TASK_SWAPPER */
1023
1024 assert (object->ref_count == 0);
1025
1026 /*
1027 * Clean or free the pages, as appropriate.
1028 * It is possible for us to find busy/absent pages,
1029 * if some faults on this object were aborted.
1030 */
1031 if (object->pageout) {
1032 assert(shadow_object != VM_OBJECT_NULL);
1033 assert(shadow_object == object->shadow);
1034
1035 vm_pageout_object_terminate(object);
1036
1037 } else if ((object->temporary && !object->can_persist) ||
1038 (pager == MEMORY_OBJECT_NULL)) {
1039 while (!queue_empty(&object->memq)) {
1040 p = (vm_page_t) queue_first(&object->memq);
1041
1042 VM_PAGE_CHECK(p);
1043 VM_PAGE_FREE(p);
1044 }
1045 } else if (!queue_empty(&object->memq)) {
1046 panic("vm_object_terminate: queue just emptied isn't");
1047 }
1048
1049 assert(object->paging_in_progress == 0);
1050 assert(object->ref_count == 0);
1051
1052 /*
1053 * If the pager has not already been released by
1054 * vm_object_destroy, we need to terminate it and
1055 * release our reference to it here.
1056 */
1057 if (pager != MEMORY_OBJECT_NULL) {
1058 vm_object_unlock(object);
1059 vm_object_release_pager(pager);
1060 vm_object_lock(object);
1061 }
1062
1063 /* kick off anyone waiting on terminating */
1064 object->terminating = FALSE;
1065 vm_object_paging_begin(object);
1066 vm_object_paging_end(object);
1067 vm_object_unlock(object);
1068
1069 #if MACH_PAGEMAP
1070 vm_external_destroy(object->existence_map, object->size);
1071 #endif /* MACH_PAGEMAP */
1072
1073 /*
1074 * Free the space for the object.
1075 */
1076 zfree(vm_object_zone, (vm_offset_t) object);
1077 return KERN_SUCCESS;
1078 }
1079
1080 /*
1081 * Routine: vm_object_pager_wakeup
1082 * Purpose: Wake up anyone waiting for termination of a pager.
1083 */
1084
1085 static void
1086 vm_object_pager_wakeup(
1087 memory_object_t pager)
1088 {
1089 vm_object_hash_entry_t entry;
1090 boolean_t waiting = FALSE;
1091
1092 /*
1093 * If anyone was waiting for the memory_object_terminate
1094 * to be queued, wake them up now.
1095 */
1096 vm_object_cache_lock();
1097 entry = vm_object_hash_lookup(pager, TRUE);
1098 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1099 waiting = entry->waiting;
1100 vm_object_cache_unlock();
1101 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1102 if (waiting)
1103 thread_wakeup((event_t) pager);
1104 vm_object_hash_entry_free(entry);
1105 }
1106 }
1107
1108 /*
1109 * Routine: vm_object_release_pager
1110 * Purpose: Terminate the pager and, upon completion,
1111 * release our last reference to it.
1112 * just like memory_object_terminate, except
1113 * that we wake up anyone blocked in vm_object_enter
1114 * waiting for termination message to be queued
1115 * before calling memory_object_init.
1116 */
1117 static void
1118 vm_object_release_pager(
1119 memory_object_t pager)
1120 {
1121
1122 /*
1123 * Terminate the pager.
1124 */
1125
1126 (void) memory_object_terminate(pager);
1127
1128 /*
1129 * Wakeup anyone waiting for this terminate
1130 */
1131 vm_object_pager_wakeup(pager);
1132
1133 /*
1134 * Release reference to pager.
1135 */
1136 memory_object_deallocate(pager);
1137 }
1138
1139 /*
1140 * Routine: vm_object_abort_activity [internal use only]
1141 * Purpose:
1142 * Abort paging requests pending on this object.
1143 * In/out conditions:
1144 * The object is locked on entry and exit.
1145 */
1146 static void
1147 vm_object_abort_activity(
1148 vm_object_t object)
1149 {
1150 register
1151 vm_page_t p;
1152 vm_page_t next;
1153
1154 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
1155 (integer_t)object, 0, 0, 0, 0);
1156
1157 /*
1158 * Abort all activity that would be waiting
1159 * for a result on this memory object.
1160 *
1161 * We could also choose to destroy all pages
1162 * that we have in memory for this object, but
1163 * we don't.
1164 */
1165
1166 p = (vm_page_t) queue_first(&object->memq);
1167 while (!queue_end(&object->memq, (queue_entry_t) p)) {
1168 next = (vm_page_t) queue_next(&p->listq);
1169
1170 /*
1171 * If it's being paged in, destroy it.
1172 * If an unlock has been requested, start it again.
1173 */
1174
1175 if (p->busy && p->absent) {
1176 VM_PAGE_FREE(p);
1177 }
1178 else {
1179 if (p->unlock_request != VM_PROT_NONE)
1180 p->unlock_request = VM_PROT_NONE;
1181 PAGE_WAKEUP(p);
1182 }
1183
1184 p = next;
1185 }
1186
1187 /*
1188 * Wake up threads waiting for the memory object to
1189 * become ready.
1190 */
1191
1192 object->pager_ready = TRUE;
1193 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1194 }
1195
1196 /*
1197 * Routine: vm_object_destroy
1198 * Purpose:
1199 * Shut down a VM object, despite the
1200 * presence of address map (or other) references
1201 * to the vm_object.
1202 */
1203 kern_return_t
1204 vm_object_destroy(
1205 vm_object_t object,
1206 kern_return_t reason)
1207 {
1208 memory_object_t old_pager;
1209
1210 if (object == VM_OBJECT_NULL)
1211 return(KERN_SUCCESS);
1212
1213 /*
1214 * Remove the pager association immediately.
1215 *
1216 * This will prevent the memory manager from further
1217 * meddling. [If it wanted to flush data or make
1218 * other changes, it should have done so before performing
1219 * the destroy call.]
1220 */
1221
1222 vm_object_cache_lock();
1223 vm_object_lock(object);
1224 object->can_persist = FALSE;
1225 object->named = FALSE;
1226 object->alive = FALSE;
1227
1228 /*
1229 * Rip out the pager from the vm_object now...
1230 */
1231
1232 vm_object_remove(object);
1233 old_pager = object->pager;
1234 object->pager = MEMORY_OBJECT_NULL;
1235 if (old_pager != MEMORY_OBJECT_NULL)
1236 memory_object_control_disable(object->pager_request);
1237 vm_object_cache_unlock();
1238
1239 /*
1240 * Wait for the existing paging activity (that got
1241 * through before we nulled out the pager) to subside.
1242 */
1243
1244 vm_object_paging_wait(object, THREAD_UNINT);
1245 vm_object_unlock(object);
1246
1247 /*
1248 * Terminate the object now.
1249 */
1250 if (old_pager != MEMORY_OBJECT_NULL) {
1251 vm_object_release_pager(old_pager);
1252
1253 /*
1254 * JMM - Release the caller's reference. This assumes the
1255 * caller had a reference to release, which is a big (but
1256 * currently valid) assumption if this is driven from the
1257 * vnode pager (it is holding a named reference when making
1258 * this call)..
1259 */
1260 vm_object_deallocate(object);
1261
1262 }
1263 return(KERN_SUCCESS);
1264 }
1265
1266 /*
1267 * vm_object_deactivate_pages
1268 *
1269 * Deactivate all pages in the specified object. (Keep its pages
1270 * in memory even though it is no longer referenced.)
1271 *
1272 * The object must be locked.
1273 */
1274 static void
1275 vm_object_deactivate_all_pages(
1276 register vm_object_t object)
1277 {
1278 register vm_page_t p;
1279
1280 queue_iterate(&object->memq, p, vm_page_t, listq) {
1281 vm_page_lock_queues();
1282 if (!p->busy)
1283 vm_page_deactivate(p);
1284 vm_page_unlock_queues();
1285 }
1286 }
1287
1288 __private_extern__ void
1289 vm_object_deactivate_pages(
1290 vm_object_t object,
1291 vm_object_offset_t offset,
1292 vm_object_size_t size,
1293 boolean_t kill_page)
1294 {
1295 vm_object_t orig_object;
1296 int pages_moved = 0;
1297 int pages_found = 0;
1298
1299 /*
1300 * entered with object lock held, acquire a paging reference to
1301 * prevent the memory_object and control ports from
1302 * being destroyed.
1303 */
1304 orig_object = object;
1305
1306 for (;;) {
1307 register vm_page_t m;
1308 vm_object_offset_t toffset;
1309 vm_object_size_t tsize;
1310
1311 vm_object_paging_begin(object);
1312 vm_page_lock_queues();
1313
1314 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1315
1316 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1317
1318 pages_found++;
1319
1320 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1321
1322 m->reference = FALSE;
1323 pmap_clear_reference(m->phys_addr);
1324
1325 if ((kill_page) && (object->internal)) {
1326 m->precious = FALSE;
1327 m->dirty = FALSE;
1328 pmap_clear_modify(m->phys_addr);
1329 vm_external_state_clr(object->existence_map, offset);
1330 }
1331 VM_PAGE_QUEUES_REMOVE(m);
1332
1333 if(m->zero_fill) {
1334 queue_enter_first(
1335 &vm_page_queue_zf,
1336 m, vm_page_t, pageq);
1337 } else {
1338 queue_enter_first(
1339 &vm_page_queue_inactive,
1340 m, vm_page_t, pageq);
1341 }
1342
1343 m->inactive = TRUE;
1344 if (!m->fictitious)
1345 vm_page_inactive_count++;
1346
1347 pages_moved++;
1348 }
1349 }
1350 }
1351 vm_page_unlock_queues();
1352 vm_object_paging_end(object);
1353
1354 if (object->shadow) {
1355 vm_object_t tmp_object;
1356
1357 kill_page = 0;
1358
1359 offset += object->shadow_offset;
1360
1361 tmp_object = object->shadow;
1362 vm_object_lock(tmp_object);
1363
1364 if (object != orig_object)
1365 vm_object_unlock(object);
1366 object = tmp_object;
1367 } else
1368 break;
1369 }
1370 if (object != orig_object)
1371 vm_object_unlock(object);
1372 }
1373
1374 /*
1375 * Routine: vm_object_pmap_protect
1376 *
1377 * Purpose:
1378 * Reduces the permission for all physical
1379 * pages in the specified object range.
1380 *
1381 * If removing write permission only, it is
1382 * sufficient to protect only the pages in
1383 * the top-level object; only those pages may
1384 * have write permission.
1385 *
1386 * If removing all access, we must follow the
1387 * shadow chain from the top-level object to
1388 * remove access to all pages in shadowed objects.
1389 *
1390 * The object must *not* be locked. The object must
1391 * be temporary/internal.
1392 *
1393 * If pmap is not NULL, this routine assumes that
1394 * the only mappings for the pages are in that
1395 * pmap.
1396 */
1397
1398 __private_extern__ void
1399 vm_object_pmap_protect(
1400 register vm_object_t object,
1401 register vm_object_offset_t offset,
1402 vm_size_t size,
1403 pmap_t pmap,
1404 vm_offset_t pmap_start,
1405 vm_prot_t prot)
1406 {
1407 if (object == VM_OBJECT_NULL)
1408 return;
1409 size = round_page_64(size);
1410 offset = trunc_page_64(offset);
1411
1412 vm_object_lock(object);
1413
1414 assert(object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
1415
1416 while (TRUE) {
1417 if (object->resident_page_count > atop(size) / 2 &&
1418 pmap != PMAP_NULL) {
1419 vm_object_unlock(object);
1420 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1421 return;
1422 }
1423
1424 /* if we are doing large ranges with respect to resident */
1425 /* page count then we should interate over pages otherwise */
1426 /* inverse page look-up will be faster */
1427 if ((object->resident_page_count / 4) < atop(size)) {
1428 vm_page_t p;
1429 vm_object_offset_t end;
1430
1431 end = offset + size;
1432
1433 if (pmap != PMAP_NULL) {
1434 queue_iterate(&object->memq, p, vm_page_t, listq) {
1435 if (!p->fictitious &&
1436 (offset <= p->offset) && (p->offset < end)) {
1437
1438 vm_offset_t start = pmap_start +
1439 (vm_offset_t)(p->offset - offset);
1440
1441 pmap_protect(pmap, start, start + PAGE_SIZE, prot);
1442 }
1443 }
1444 } else {
1445 queue_iterate(&object->memq, p, vm_page_t, listq) {
1446 if (!p->fictitious &&
1447 (offset <= p->offset) && (p->offset < end)) {
1448
1449 pmap_page_protect(p->phys_addr,
1450 prot & ~p->page_lock);
1451 }
1452 }
1453 }
1454 } else {
1455 vm_page_t p;
1456 vm_object_offset_t end;
1457 vm_object_offset_t target_off;
1458
1459 end = offset + size;
1460
1461 if (pmap != PMAP_NULL) {
1462 for(target_off = offset;
1463 target_off < end; target_off += PAGE_SIZE) {
1464 if(p = vm_page_lookup(object, target_off)) {
1465 vm_offset_t start = pmap_start +
1466 (vm_offset_t)(p->offset - offset);
1467 pmap_protect(pmap, start,
1468 start + PAGE_SIZE, prot);
1469 }
1470 }
1471 } else {
1472 for(target_off = offset;
1473 target_off < end; target_off += PAGE_SIZE) {
1474 if(p = vm_page_lookup(object, target_off)) {
1475 pmap_page_protect(p->phys_addr,
1476 prot & ~p->page_lock);
1477 }
1478 }
1479 }
1480 }
1481
1482 if (prot == VM_PROT_NONE) {
1483 /*
1484 * Must follow shadow chain to remove access
1485 * to pages in shadowed objects.
1486 */
1487 register vm_object_t next_object;
1488
1489 next_object = object->shadow;
1490 if (next_object != VM_OBJECT_NULL) {
1491 offset += object->shadow_offset;
1492 vm_object_lock(next_object);
1493 vm_object_unlock(object);
1494 object = next_object;
1495 }
1496 else {
1497 /*
1498 * End of chain - we are done.
1499 */
1500 break;
1501 }
1502 }
1503 else {
1504 /*
1505 * Pages in shadowed objects may never have
1506 * write permission - we may stop here.
1507 */
1508 break;
1509 }
1510 }
1511
1512 vm_object_unlock(object);
1513 }
1514
1515 /*
1516 * Routine: vm_object_copy_slowly
1517 *
1518 * Description:
1519 * Copy the specified range of the source
1520 * virtual memory object without using
1521 * protection-based optimizations (such
1522 * as copy-on-write). The pages in the
1523 * region are actually copied.
1524 *
1525 * In/out conditions:
1526 * The caller must hold a reference and a lock
1527 * for the source virtual memory object. The source
1528 * object will be returned *unlocked*.
1529 *
1530 * Results:
1531 * If the copy is completed successfully, KERN_SUCCESS is
1532 * returned. If the caller asserted the interruptible
1533 * argument, and an interruption occurred while waiting
1534 * for a user-generated event, MACH_SEND_INTERRUPTED is
1535 * returned. Other values may be returned to indicate
1536 * hard errors during the copy operation.
1537 *
1538 * A new virtual memory object is returned in a
1539 * parameter (_result_object). The contents of this
1540 * new object, starting at a zero offset, are a copy
1541 * of the source memory region. In the event of
1542 * an error, this parameter will contain the value
1543 * VM_OBJECT_NULL.
1544 */
1545 __private_extern__ kern_return_t
1546 vm_object_copy_slowly(
1547 register vm_object_t src_object,
1548 vm_object_offset_t src_offset,
1549 vm_object_size_t size,
1550 boolean_t interruptible,
1551 vm_object_t *_result_object) /* OUT */
1552 {
1553 vm_object_t new_object;
1554 vm_object_offset_t new_offset;
1555
1556 vm_object_offset_t src_lo_offset = src_offset;
1557 vm_object_offset_t src_hi_offset = src_offset + size;
1558
1559 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1560 src_object, src_offset, size, 0, 0);
1561
1562 if (size == 0) {
1563 vm_object_unlock(src_object);
1564 *_result_object = VM_OBJECT_NULL;
1565 return(KERN_INVALID_ARGUMENT);
1566 }
1567
1568 /*
1569 * Prevent destruction of the source object while we copy.
1570 */
1571
1572 assert(src_object->ref_count > 0);
1573 src_object->ref_count++;
1574 VM_OBJ_RES_INCR(src_object);
1575 vm_object_unlock(src_object);
1576
1577 /*
1578 * Create a new object to hold the copied pages.
1579 * A few notes:
1580 * We fill the new object starting at offset 0,
1581 * regardless of the input offset.
1582 * We don't bother to lock the new object within
1583 * this routine, since we have the only reference.
1584 */
1585
1586 new_object = vm_object_allocate(size);
1587 new_offset = 0;
1588
1589 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1590
1591 for ( ;
1592 size != 0 ;
1593 src_offset += PAGE_SIZE_64,
1594 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1595 ) {
1596 vm_page_t new_page;
1597 vm_fault_return_t result;
1598
1599 while ((new_page = vm_page_alloc(new_object, new_offset))
1600 == VM_PAGE_NULL) {
1601 if (!vm_page_wait(interruptible)) {
1602 vm_object_deallocate(new_object);
1603 *_result_object = VM_OBJECT_NULL;
1604 return(MACH_SEND_INTERRUPTED);
1605 }
1606 }
1607
1608 do {
1609 vm_prot_t prot = VM_PROT_READ;
1610 vm_page_t _result_page;
1611 vm_page_t top_page;
1612 register
1613 vm_page_t result_page;
1614 kern_return_t error_code;
1615
1616 vm_object_lock(src_object);
1617 vm_object_paging_begin(src_object);
1618
1619 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1620 result = vm_fault_page(src_object, src_offset,
1621 VM_PROT_READ, FALSE, interruptible,
1622 src_lo_offset, src_hi_offset,
1623 VM_BEHAVIOR_SEQUENTIAL,
1624 &prot, &_result_page, &top_page,
1625 (int *)0,
1626 &error_code, FALSE, FALSE, NULL, 0);
1627
1628 switch(result) {
1629 case VM_FAULT_SUCCESS:
1630 result_page = _result_page;
1631
1632 /*
1633 * We don't need to hold the object
1634 * lock -- the busy page will be enough.
1635 * [We don't care about picking up any
1636 * new modifications.]
1637 *
1638 * Copy the page to the new object.
1639 *
1640 * POLICY DECISION:
1641 * If result_page is clean,
1642 * we could steal it instead
1643 * of copying.
1644 */
1645
1646 vm_object_unlock(result_page->object);
1647 vm_page_copy(result_page, new_page);
1648
1649 /*
1650 * Let go of both pages (make them
1651 * not busy, perform wakeup, activate).
1652 */
1653
1654 new_page->busy = FALSE;
1655 new_page->dirty = TRUE;
1656 vm_object_lock(result_page->object);
1657 PAGE_WAKEUP_DONE(result_page);
1658
1659 vm_page_lock_queues();
1660 if (!result_page->active &&
1661 !result_page->inactive)
1662 vm_page_activate(result_page);
1663 vm_page_activate(new_page);
1664 vm_page_unlock_queues();
1665
1666 /*
1667 * Release paging references and
1668 * top-level placeholder page, if any.
1669 */
1670
1671 vm_fault_cleanup(result_page->object,
1672 top_page);
1673
1674 break;
1675
1676 case VM_FAULT_RETRY:
1677 break;
1678
1679 case VM_FAULT_FICTITIOUS_SHORTAGE:
1680 vm_page_more_fictitious();
1681 break;
1682
1683 case VM_FAULT_MEMORY_SHORTAGE:
1684 if (vm_page_wait(interruptible))
1685 break;
1686 /* fall thru */
1687
1688 case VM_FAULT_INTERRUPTED:
1689 vm_page_free(new_page);
1690 vm_object_deallocate(new_object);
1691 vm_object_deallocate(src_object);
1692 *_result_object = VM_OBJECT_NULL;
1693 return(MACH_SEND_INTERRUPTED);
1694
1695 case VM_FAULT_MEMORY_ERROR:
1696 /*
1697 * A policy choice:
1698 * (a) ignore pages that we can't
1699 * copy
1700 * (b) return the null object if
1701 * any page fails [chosen]
1702 */
1703
1704 vm_page_lock_queues();
1705 vm_page_free(new_page);
1706 vm_page_unlock_queues();
1707 vm_object_deallocate(new_object);
1708 vm_object_deallocate(src_object);
1709 *_result_object = VM_OBJECT_NULL;
1710 return(error_code ? error_code:
1711 KERN_MEMORY_ERROR);
1712 }
1713 } while (result != VM_FAULT_SUCCESS);
1714 }
1715
1716 /*
1717 * Lose the extra reference, and return our object.
1718 */
1719
1720 vm_object_deallocate(src_object);
1721 *_result_object = new_object;
1722 return(KERN_SUCCESS);
1723 }
1724
1725 /*
1726 * Routine: vm_object_copy_quickly
1727 *
1728 * Purpose:
1729 * Copy the specified range of the source virtual
1730 * memory object, if it can be done without waiting
1731 * for user-generated events.
1732 *
1733 * Results:
1734 * If the copy is successful, the copy is returned in
1735 * the arguments; otherwise, the arguments are not
1736 * affected.
1737 *
1738 * In/out conditions:
1739 * The object should be unlocked on entry and exit.
1740 */
1741
1742 /*ARGSUSED*/
1743 __private_extern__ boolean_t
1744 vm_object_copy_quickly(
1745 vm_object_t *_object, /* INOUT */
1746 vm_object_offset_t offset, /* IN */
1747 vm_object_size_t size, /* IN */
1748 boolean_t *_src_needs_copy, /* OUT */
1749 boolean_t *_dst_needs_copy) /* OUT */
1750 {
1751 vm_object_t object = *_object;
1752 memory_object_copy_strategy_t copy_strategy;
1753
1754 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1755 *_object, offset, size, 0, 0);
1756 if (object == VM_OBJECT_NULL) {
1757 *_src_needs_copy = FALSE;
1758 *_dst_needs_copy = FALSE;
1759 return(TRUE);
1760 }
1761
1762 vm_object_lock(object);
1763
1764 copy_strategy = object->copy_strategy;
1765
1766 switch (copy_strategy) {
1767 case MEMORY_OBJECT_COPY_SYMMETRIC:
1768
1769 /*
1770 * Symmetric copy strategy.
1771 * Make another reference to the object.
1772 * Leave object/offset unchanged.
1773 */
1774
1775 assert(object->ref_count > 0);
1776 object->ref_count++;
1777 vm_object_res_reference(object);
1778 object->shadowed = TRUE;
1779 vm_object_unlock(object);
1780
1781 /*
1782 * Both source and destination must make
1783 * shadows, and the source must be made
1784 * read-only if not already.
1785 */
1786
1787 *_src_needs_copy = TRUE;
1788 *_dst_needs_copy = TRUE;
1789
1790 break;
1791
1792 case MEMORY_OBJECT_COPY_DELAY:
1793 vm_object_unlock(object);
1794 return(FALSE);
1795
1796 default:
1797 vm_object_unlock(object);
1798 return(FALSE);
1799 }
1800 return(TRUE);
1801 }
1802
1803 static int copy_call_count = 0;
1804 static int copy_call_sleep_count = 0;
1805 static int copy_call_restart_count = 0;
1806
1807 /*
1808 * Routine: vm_object_copy_call [internal]
1809 *
1810 * Description:
1811 * Copy the source object (src_object), using the
1812 * user-managed copy algorithm.
1813 *
1814 * In/out conditions:
1815 * The source object must be locked on entry. It
1816 * will be *unlocked* on exit.
1817 *
1818 * Results:
1819 * If the copy is successful, KERN_SUCCESS is returned.
1820 * A new object that represents the copied virtual
1821 * memory is returned in a parameter (*_result_object).
1822 * If the return value indicates an error, this parameter
1823 * is not valid.
1824 */
1825 static kern_return_t
1826 vm_object_copy_call(
1827 vm_object_t src_object,
1828 vm_object_offset_t src_offset,
1829 vm_object_size_t size,
1830 vm_object_t *_result_object) /* OUT */
1831 {
1832 kern_return_t kr;
1833 vm_object_t copy;
1834 boolean_t check_ready = FALSE;
1835
1836 /*
1837 * If a copy is already in progress, wait and retry.
1838 *
1839 * XXX
1840 * Consider making this call interruptable, as Mike
1841 * intended it to be.
1842 *
1843 * XXXO
1844 * Need a counter or version or something to allow
1845 * us to use the copy that the currently requesting
1846 * thread is obtaining -- is it worth adding to the
1847 * vm object structure? Depends how common this case it.
1848 */
1849 copy_call_count++;
1850 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1851 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1852 THREAD_UNINT);
1853 copy_call_restart_count++;
1854 }
1855
1856 /*
1857 * Indicate (for the benefit of memory_object_create_copy)
1858 * that we want a copy for src_object. (Note that we cannot
1859 * do a real assert_wait before calling memory_object_copy,
1860 * so we simply set the flag.)
1861 */
1862
1863 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1864 vm_object_unlock(src_object);
1865
1866 /*
1867 * Ask the memory manager to give us a memory object
1868 * which represents a copy of the src object.
1869 * The memory manager may give us a memory object
1870 * which we already have, or it may give us a
1871 * new memory object. This memory object will arrive
1872 * via memory_object_create_copy.
1873 */
1874
1875 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1876 if (kr != KERN_SUCCESS) {
1877 return kr;
1878 }
1879
1880 /*
1881 * Wait for the copy to arrive.
1882 */
1883 vm_object_lock(src_object);
1884 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1885 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1886 THREAD_UNINT);
1887 copy_call_sleep_count++;
1888 }
1889 Retry:
1890 assert(src_object->copy != VM_OBJECT_NULL);
1891 copy = src_object->copy;
1892 if (!vm_object_lock_try(copy)) {
1893 vm_object_unlock(src_object);
1894 mutex_pause(); /* wait a bit */
1895 vm_object_lock(src_object);
1896 goto Retry;
1897 }
1898 if (copy->size < src_offset+size)
1899 copy->size = src_offset+size;
1900
1901 if (!copy->pager_ready)
1902 check_ready = TRUE;
1903
1904 /*
1905 * Return the copy.
1906 */
1907 *_result_object = copy;
1908 vm_object_unlock(copy);
1909 vm_object_unlock(src_object);
1910
1911 /* Wait for the copy to be ready. */
1912 if (check_ready == TRUE) {
1913 vm_object_lock(copy);
1914 while (!copy->pager_ready) {
1915 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1916 }
1917 vm_object_unlock(copy);
1918 }
1919
1920 return KERN_SUCCESS;
1921 }
1922
1923 static int copy_delayed_lock_collisions = 0;
1924 static int copy_delayed_max_collisions = 0;
1925 static int copy_delayed_lock_contention = 0;
1926 static int copy_delayed_protect_iterate = 0;
1927 static int copy_delayed_protect_lookup = 0;
1928 static int copy_delayed_protect_lookup_wait = 0;
1929
1930 /*
1931 * Routine: vm_object_copy_delayed [internal]
1932 *
1933 * Description:
1934 * Copy the specified virtual memory object, using
1935 * the asymmetric copy-on-write algorithm.
1936 *
1937 * In/out conditions:
1938 * The object must be unlocked on entry.
1939 *
1940 * This routine will not block waiting for user-generated
1941 * events. It is not interruptible.
1942 */
1943 __private_extern__ vm_object_t
1944 vm_object_copy_delayed(
1945 vm_object_t src_object,
1946 vm_object_offset_t src_offset,
1947 vm_object_size_t size)
1948 {
1949 vm_object_t new_copy = VM_OBJECT_NULL;
1950 vm_object_t old_copy;
1951 vm_page_t p;
1952 vm_object_size_t copy_size;
1953
1954 int collisions = 0;
1955 /*
1956 * The user-level memory manager wants to see all of the changes
1957 * to this object, but it has promised not to make any changes on
1958 * its own.
1959 *
1960 * Perform an asymmetric copy-on-write, as follows:
1961 * Create a new object, called a "copy object" to hold
1962 * pages modified by the new mapping (i.e., the copy,
1963 * not the original mapping).
1964 * Record the original object as the backing object for
1965 * the copy object. If the original mapping does not
1966 * change a page, it may be used read-only by the copy.
1967 * Record the copy object in the original object.
1968 * When the original mapping causes a page to be modified,
1969 * it must be copied to a new page that is "pushed" to
1970 * the copy object.
1971 * Mark the new mapping (the copy object) copy-on-write.
1972 * This makes the copy object itself read-only, allowing
1973 * it to be reused if the original mapping makes no
1974 * changes, and simplifying the synchronization required
1975 * in the "push" operation described above.
1976 *
1977 * The copy-on-write is said to be assymetric because the original
1978 * object is *not* marked copy-on-write. A copied page is pushed
1979 * to the copy object, regardless which party attempted to modify
1980 * the page.
1981 *
1982 * Repeated asymmetric copy operations may be done. If the
1983 * original object has not been changed since the last copy, its
1984 * copy object can be reused. Otherwise, a new copy object can be
1985 * inserted between the original object and its previous copy
1986 * object. Since any copy object is read-only, this cannot affect
1987 * affect the contents of the previous copy object.
1988 *
1989 * Note that a copy object is higher in the object tree than the
1990 * original object; therefore, use of the copy object recorded in
1991 * the original object must be done carefully, to avoid deadlock.
1992 */
1993
1994 Retry:
1995 vm_object_lock(src_object);
1996
1997 /*
1998 * See whether we can reuse the result of a previous
1999 * copy operation.
2000 */
2001
2002 old_copy = src_object->copy;
2003 if (old_copy != VM_OBJECT_NULL) {
2004 /*
2005 * Try to get the locks (out of order)
2006 */
2007 if (!vm_object_lock_try(old_copy)) {
2008 vm_object_unlock(src_object);
2009 mutex_pause();
2010
2011 /* Heisenberg Rules */
2012 copy_delayed_lock_collisions++;
2013 if (collisions++ == 0)
2014 copy_delayed_lock_contention++;
2015
2016 if (collisions > copy_delayed_max_collisions)
2017 copy_delayed_max_collisions = collisions;
2018
2019 goto Retry;
2020 }
2021
2022 /*
2023 * Determine whether the old copy object has
2024 * been modified.
2025 */
2026
2027 if (old_copy->resident_page_count == 0 &&
2028 !old_copy->pager_created) {
2029 /*
2030 * It has not been modified.
2031 *
2032 * Return another reference to
2033 * the existing copy-object.
2034 */
2035 assert(old_copy->ref_count > 0);
2036 old_copy->ref_count++;
2037
2038 if (old_copy->size < src_offset+size)
2039 old_copy->size = src_offset+size;
2040
2041 #if TASK_SWAPPER
2042 /*
2043 * We have to reproduce some of the code from
2044 * vm_object_res_reference because we've taken
2045 * the locks out of order here, and deadlock
2046 * would result if we simply called that function.
2047 */
2048 if (++old_copy->res_count == 1) {
2049 assert(old_copy->shadow == src_object);
2050 vm_object_res_reference(src_object);
2051 }
2052 #endif /* TASK_SWAPPER */
2053
2054 vm_object_unlock(old_copy);
2055 vm_object_unlock(src_object);
2056
2057 if (new_copy != VM_OBJECT_NULL) {
2058 vm_object_unlock(new_copy);
2059 vm_object_deallocate(new_copy);
2060 }
2061
2062 return(old_copy);
2063 }
2064 if (new_copy == VM_OBJECT_NULL) {
2065 vm_object_unlock(old_copy);
2066 vm_object_unlock(src_object);
2067 new_copy = vm_object_allocate(src_offset + size);
2068 vm_object_lock(new_copy);
2069 goto Retry;
2070 }
2071
2072 /*
2073 * Adjust the size argument so that the newly-created
2074 * copy object will be large enough to back either the
2075 * new old copy object or the new mapping.
2076 */
2077 if (old_copy->size > src_offset+size)
2078 size = old_copy->size - src_offset;
2079
2080 /*
2081 * The copy-object is always made large enough to
2082 * completely shadow the original object, since
2083 * it may have several users who want to shadow
2084 * the original object at different points.
2085 */
2086
2087 assert((old_copy->shadow == src_object) &&
2088 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2089
2090 /*
2091 * Make the old copy-object shadow the new one.
2092 * It will receive no more pages from the original
2093 * object.
2094 */
2095
2096 src_object->ref_count--; /* remove ref. from old_copy */
2097 assert(src_object->ref_count > 0);
2098 old_copy->shadow = new_copy;
2099 assert(new_copy->ref_count > 0);
2100 new_copy->ref_count++; /* for old_copy->shadow ref. */
2101
2102 #if TASK_SWAPPER
2103 if (old_copy->res_count) {
2104 VM_OBJ_RES_INCR(new_copy);
2105 VM_OBJ_RES_DECR(src_object);
2106 }
2107 #endif
2108
2109 vm_object_unlock(old_copy); /* done with old_copy */
2110 } else if (new_copy == VM_OBJECT_NULL) {
2111 vm_object_unlock(src_object);
2112 new_copy = vm_object_allocate(src_offset + size);
2113 vm_object_lock(new_copy);
2114 goto Retry;
2115 }
2116
2117 /*
2118 * Readjust the copy-object size if necessary.
2119 */
2120 copy_size = new_copy->size;
2121 if (copy_size < src_offset+size) {
2122 copy_size = src_offset+size;
2123 new_copy->size = copy_size;
2124 }
2125
2126 /*
2127 * Point the new copy at the existing object.
2128 */
2129
2130 new_copy->shadow = src_object;
2131 new_copy->shadow_offset = 0;
2132 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2133 assert(src_object->ref_count > 0);
2134 src_object->ref_count++;
2135 VM_OBJ_RES_INCR(src_object);
2136 src_object->copy = new_copy;
2137 vm_object_unlock(new_copy);
2138
2139 /*
2140 * Mark all (current) pages of the existing object copy-on-write.
2141 * This object may have a shadow chain below it, but
2142 * those pages will already be marked copy-on-write.
2143 */
2144
2145 vm_object_paging_wait(src_object, THREAD_UNINT);
2146 copy_delayed_protect_iterate++;
2147 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2148 if (!p->fictitious)
2149 pmap_page_protect(p->phys_addr,
2150 (VM_PROT_ALL & ~VM_PROT_WRITE &
2151 ~p->page_lock));
2152 }
2153 vm_object_unlock(src_object);
2154 XPR(XPR_VM_OBJECT,
2155 "vm_object_copy_delayed: used copy object %X for source %X\n",
2156 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2157
2158 return(new_copy);
2159 }
2160
2161 /*
2162 * Routine: vm_object_copy_strategically
2163 *
2164 * Purpose:
2165 * Perform a copy according to the source object's
2166 * declared strategy. This operation may block,
2167 * and may be interrupted.
2168 */
2169 __private_extern__ kern_return_t
2170 vm_object_copy_strategically(
2171 register vm_object_t src_object,
2172 vm_object_offset_t src_offset,
2173 vm_object_size_t size,
2174 vm_object_t *dst_object, /* OUT */
2175 vm_object_offset_t *dst_offset, /* OUT */
2176 boolean_t *dst_needs_copy) /* OUT */
2177 {
2178 boolean_t result;
2179 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2180 memory_object_copy_strategy_t copy_strategy;
2181
2182 assert(src_object != VM_OBJECT_NULL);
2183
2184 vm_object_lock(src_object);
2185
2186 /*
2187 * The copy strategy is only valid if the memory manager
2188 * is "ready". Internal objects are always ready.
2189 */
2190
2191 while (!src_object->internal && !src_object->pager_ready) {
2192 wait_result_t wait_result;
2193
2194 wait_result = vm_object_sleep( src_object,
2195 VM_OBJECT_EVENT_PAGER_READY,
2196 interruptible);
2197 if (wait_result != THREAD_AWAKENED) {
2198 vm_object_unlock(src_object);
2199 *dst_object = VM_OBJECT_NULL;
2200 *dst_offset = 0;
2201 *dst_needs_copy = FALSE;
2202 return(MACH_SEND_INTERRUPTED);
2203 }
2204 }
2205
2206 copy_strategy = src_object->copy_strategy;
2207
2208 /*
2209 * Use the appropriate copy strategy.
2210 */
2211
2212 switch (copy_strategy) {
2213 case MEMORY_OBJECT_COPY_NONE:
2214 result = vm_object_copy_slowly(src_object, src_offset, size,
2215 interruptible, dst_object);
2216 if (result == KERN_SUCCESS) {
2217 *dst_offset = 0;
2218 *dst_needs_copy = FALSE;
2219 }
2220 break;
2221
2222 case MEMORY_OBJECT_COPY_CALL:
2223 result = vm_object_copy_call(src_object, src_offset, size,
2224 dst_object);
2225 if (result == KERN_SUCCESS) {
2226 *dst_offset = src_offset;
2227 *dst_needs_copy = TRUE;
2228 }
2229 break;
2230
2231 case MEMORY_OBJECT_COPY_DELAY:
2232 vm_object_unlock(src_object);
2233 *dst_object = vm_object_copy_delayed(src_object,
2234 src_offset, size);
2235 *dst_offset = src_offset;
2236 *dst_needs_copy = TRUE;
2237 result = KERN_SUCCESS;
2238 break;
2239
2240 case MEMORY_OBJECT_COPY_SYMMETRIC:
2241 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2242 vm_object_unlock(src_object);
2243 result = KERN_MEMORY_RESTART_COPY;
2244 break;
2245
2246 default:
2247 panic("copy_strategically: bad strategy");
2248 result = KERN_INVALID_ARGUMENT;
2249 }
2250 return(result);
2251 }
2252
2253 /*
2254 * vm_object_shadow:
2255 *
2256 * Create a new object which is backed by the
2257 * specified existing object range. The source
2258 * object reference is deallocated.
2259 *
2260 * The new object and offset into that object
2261 * are returned in the source parameters.
2262 */
2263 boolean_t vm_object_shadow_check = FALSE;
2264
2265 __private_extern__ boolean_t
2266 vm_object_shadow(
2267 vm_object_t *object, /* IN/OUT */
2268 vm_object_offset_t *offset, /* IN/OUT */
2269 vm_object_size_t length)
2270 {
2271 register vm_object_t source;
2272 register vm_object_t result;
2273
2274 source = *object;
2275 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2276
2277 /*
2278 * Determine if we really need a shadow.
2279 */
2280
2281 if (vm_object_shadow_check && source->ref_count == 1 &&
2282 (source->shadow == VM_OBJECT_NULL ||
2283 source->shadow->copy == VM_OBJECT_NULL))
2284 {
2285 source->shadowed = FALSE;
2286 return FALSE;
2287 }
2288
2289 /*
2290 * Allocate a new object with the given length
2291 */
2292
2293 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2294 panic("vm_object_shadow: no object for shadowing");
2295
2296 /*
2297 * The new object shadows the source object, adding
2298 * a reference to it. Our caller changes his reference
2299 * to point to the new object, removing a reference to
2300 * the source object. Net result: no change of reference
2301 * count.
2302 */
2303 result->shadow = source;
2304
2305 /*
2306 * Store the offset into the source object,
2307 * and fix up the offset into the new object.
2308 */
2309
2310 result->shadow_offset = *offset;
2311
2312 /*
2313 * Return the new things
2314 */
2315
2316 *offset = 0;
2317 *object = result;
2318 return TRUE;
2319 }
2320
2321 /*
2322 * The relationship between vm_object structures and
2323 * the memory_object requires careful synchronization.
2324 *
2325 * All associations are created by memory_object_create_named
2326 * for external pagers and vm_object_pager_create for internal
2327 * objects as follows:
2328 *
2329 * pager: the memory_object itself, supplied by
2330 * the user requesting a mapping (or the kernel,
2331 * when initializing internal objects); the
2332 * kernel simulates holding send rights by keeping
2333 * a port reference;
2334 *
2335 * pager_request:
2336 * the memory object control port,
2337 * created by the kernel; the kernel holds
2338 * receive (and ownership) rights to this
2339 * port, but no other references.
2340 *
2341 * When initialization is complete, the "initialized" field
2342 * is asserted. Other mappings using a particular memory object,
2343 * and any references to the vm_object gained through the
2344 * port association must wait for this initialization to occur.
2345 *
2346 * In order to allow the memory manager to set attributes before
2347 * requests (notably virtual copy operations, but also data or
2348 * unlock requests) are made, a "ready" attribute is made available.
2349 * Only the memory manager may affect the value of this attribute.
2350 * Its value does not affect critical kernel functions, such as
2351 * internal object initialization or destruction. [Furthermore,
2352 * memory objects created by the kernel are assumed to be ready
2353 * immediately; the default memory manager need not explicitly
2354 * set the "ready" attribute.]
2355 *
2356 * [Both the "initialized" and "ready" attribute wait conditions
2357 * use the "pager" field as the wait event.]
2358 *
2359 * The port associations can be broken down by any of the
2360 * following routines:
2361 * vm_object_terminate:
2362 * No references to the vm_object remain, and
2363 * the object cannot (or will not) be cached.
2364 * This is the normal case, and is done even
2365 * though one of the other cases has already been
2366 * done.
2367 * memory_object_destroy:
2368 * The memory manager has requested that the
2369 * kernel relinquish references to the memory
2370 * object. [The memory manager may not want to
2371 * destroy the memory object, but may wish to
2372 * refuse or tear down existing memory mappings.]
2373 *
2374 * Each routine that breaks an association must break all of
2375 * them at once. At some later time, that routine must clear
2376 * the pager field and release the memory object references.
2377 * [Furthermore, each routine must cope with the simultaneous
2378 * or previous operations of the others.]
2379 *
2380 * In addition to the lock on the object, the vm_object_cache_lock
2381 * governs the associations. References gained through the
2382 * association require use of the cache lock.
2383 *
2384 * Because the pager field may be cleared spontaneously, it
2385 * cannot be used to determine whether a memory object has
2386 * ever been associated with a particular vm_object. [This
2387 * knowledge is important to the shadow object mechanism.]
2388 * For this reason, an additional "created" attribute is
2389 * provided.
2390 *
2391 * During various paging operations, the pager reference found in the
2392 * vm_object must be valid. To prevent this from being released,
2393 * (other than being removed, i.e., made null), routines may use
2394 * the vm_object_paging_begin/end routines [actually, macros].
2395 * The implementation uses the "paging_in_progress" and "wanted" fields.
2396 * [Operations that alter the validity of the pager values include the
2397 * termination routines and vm_object_collapse.]
2398 */
2399
2400 #if 0
2401 /*
2402 * Routine: vm_object_pager_dead
2403 *
2404 * Purpose:
2405 * A port is being destroy, and the IPC kobject code
2406 * can't tell if it represents a pager port or not.
2407 * So this function is called each time it sees a port
2408 * die.
2409 * THIS IS HORRIBLY INEFFICIENT. We should only call
2410 * this routine if we had requested a notification on
2411 * the port.
2412 */
2413
2414 __private_extern__ void
2415 vm_object_pager_dead(
2416 ipc_port_t pager)
2417 {
2418 vm_object_t object;
2419 vm_object_hash_entry_t entry;
2420
2421 /*
2422 * Perform essentially the same operations as in vm_object_lookup,
2423 * except that this time we look up based on the memory_object
2424 * port, not the control port.
2425 */
2426 vm_object_cache_lock();
2427 entry = vm_object_hash_lookup(pager, FALSE);
2428 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2429 entry->object == VM_OBJECT_NULL) {
2430 vm_object_cache_unlock();
2431 return;
2432 }
2433
2434 object = entry->object;
2435 entry->object = VM_OBJECT_NULL;
2436
2437 vm_object_lock(object);
2438 if (object->ref_count == 0) {
2439 XPR(XPR_VM_OBJECT_CACHE,
2440 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2441 (integer_t)object,
2442 (integer_t)vm_object_cached_list.next,
2443 (integer_t)vm_object_cached_list.prev, 0,0);
2444
2445 queue_remove(&vm_object_cached_list, object,
2446 vm_object_t, cached_list);
2447 vm_object_cached_count--;
2448 }
2449 object->ref_count++;
2450 vm_object_res_reference(object);
2451
2452 object->can_persist = FALSE;
2453
2454 assert(object->pager == pager);
2455
2456 /*
2457 * Remove the pager association.
2458 *
2459 * Note that the memory_object itself is dead, so
2460 * we don't bother with it.
2461 */
2462
2463 object->pager = MEMORY_OBJECT_NULL;
2464
2465 vm_object_unlock(object);
2466 vm_object_cache_unlock();
2467
2468 vm_object_pager_wakeup(pager);
2469
2470 /*
2471 * Release the pager reference. Note that there's no
2472 * point in trying the memory_object_terminate call
2473 * because the memory_object itself is dead. Also
2474 * release the memory_object_control reference, since
2475 * the pager didn't do that either.
2476 */
2477
2478 memory_object_deallocate(pager);
2479 memory_object_control_deallocate(object->pager_request);
2480
2481
2482 /*
2483 * Restart pending page requests
2484 */
2485 vm_object_lock(object);
2486 vm_object_abort_activity(object);
2487 vm_object_unlock(object);
2488
2489 /*
2490 * Lose the object reference.
2491 */
2492
2493 vm_object_deallocate(object);
2494 }
2495 #endif
2496
2497 /*
2498 * Routine: vm_object_enter
2499 * Purpose:
2500 * Find a VM object corresponding to the given
2501 * pager; if no such object exists, create one,
2502 * and initialize the pager.
2503 */
2504 vm_object_t
2505 vm_object_enter(
2506 memory_object_t pager,
2507 vm_object_size_t size,
2508 boolean_t internal,
2509 boolean_t init,
2510 boolean_t named)
2511 {
2512 register vm_object_t object;
2513 vm_object_t new_object;
2514 boolean_t must_init;
2515 vm_object_hash_entry_t entry, new_entry;
2516
2517 if (pager == MEMORY_OBJECT_NULL)
2518 return(vm_object_allocate(size));
2519
2520 new_object = VM_OBJECT_NULL;
2521 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2522 must_init = init;
2523
2524 /*
2525 * Look for an object associated with this port.
2526 */
2527
2528 restart:
2529 vm_object_cache_lock();
2530 for (;;) {
2531 entry = vm_object_hash_lookup(pager, FALSE);
2532
2533 /*
2534 * If a previous object is being terminated,
2535 * we must wait for the termination message
2536 * to be queued.
2537 *
2538 * We set kobject to a non-null value to let the
2539 * terminator know that someone is waiting.
2540 * Among the possibilities is that the port
2541 * could die while we're waiting. Must restart
2542 * instead of continuing the loop.
2543 */
2544
2545 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
2546 if (entry->object != VM_OBJECT_NULL)
2547 break;
2548
2549 entry->waiting = TRUE;
2550 assert_wait((event_t) pager, THREAD_UNINT);
2551 vm_object_cache_unlock();
2552 thread_block((void (*)(void))0);
2553 goto restart;
2554 }
2555
2556 /*
2557 * We must unlock to create a new object;
2558 * if we do so, we must try the lookup again.
2559 */
2560
2561 if (new_object == VM_OBJECT_NULL) {
2562 vm_object_cache_unlock();
2563 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2564 new_entry = vm_object_hash_entry_alloc(pager);
2565 new_object = vm_object_allocate(size);
2566 vm_object_cache_lock();
2567 } else {
2568 /*
2569 * Lookup failed twice, and we have something
2570 * to insert; set the object.
2571 */
2572
2573 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2574 vm_object_hash_insert(new_entry);
2575 entry = new_entry;
2576 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2577 }
2578
2579 entry->object = new_object;
2580 new_object = VM_OBJECT_NULL;
2581 must_init = TRUE;
2582 }
2583 }
2584
2585 object = entry->object;
2586 assert(object != VM_OBJECT_NULL);
2587
2588 if (!must_init) {
2589 vm_object_lock(object);
2590 assert(object->pager_created);
2591 assert(!internal || object->internal);
2592 if (named) {
2593 assert(!object->named);
2594 object->named = TRUE;
2595 }
2596 if (object->ref_count == 0) {
2597 XPR(XPR_VM_OBJECT_CACHE,
2598 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2599 (integer_t)object,
2600 (integer_t)vm_object_cached_list.next,
2601 (integer_t)vm_object_cached_list.prev, 0,0);
2602 queue_remove(&vm_object_cached_list, object,
2603 vm_object_t, cached_list);
2604 vm_object_cached_count--;
2605 }
2606 object->ref_count++;
2607 vm_object_res_reference(object);
2608 vm_object_unlock(object);
2609
2610 VM_STAT(hits++);
2611 }
2612 assert(object->ref_count > 0);
2613
2614 VM_STAT(lookups++);
2615
2616 vm_object_cache_unlock();
2617
2618 XPR(XPR_VM_OBJECT,
2619 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2620 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2621
2622 /*
2623 * If we raced to create a vm_object but lost, let's
2624 * throw away ours.
2625 */
2626
2627 if (new_object != VM_OBJECT_NULL)
2628 vm_object_deallocate(new_object);
2629
2630 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2631 vm_object_hash_entry_free(new_entry);
2632
2633 if (must_init) {
2634 pager_request_t pager_request;
2635
2636 /*
2637 * Allocate request port.
2638 */
2639
2640 pager_request = memory_object_control_allocate(object);
2641 assert (pager_request != PAGER_REQUEST_NULL);
2642
2643 vm_object_lock(object);
2644
2645 /*
2646 * Copy the reference we were given.
2647 */
2648
2649 memory_object_reference(pager);
2650 object->pager_created = TRUE;
2651 object->pager = pager;
2652 object->internal = internal;
2653 object->pager_trusted = internal;
2654 if (!internal) {
2655 /* copy strategy invalid until set by memory manager */
2656 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2657 }
2658 object->pager_request = pager_request;
2659 object->pager_ready = FALSE;
2660
2661 vm_object_unlock(object);
2662
2663 /*
2664 * Let the pager know we're using it.
2665 */
2666
2667 (void) memory_object_init(pager,
2668 object->pager_request,
2669 PAGE_SIZE);
2670
2671 vm_object_lock(object);
2672 if (named)
2673 object->named = TRUE;
2674 if (internal) {
2675 object->pager_ready = TRUE;
2676 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2677 }
2678
2679 object->pager_initialized = TRUE;
2680 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2681 } else {
2682 vm_object_lock(object);
2683 }
2684
2685 /*
2686 * [At this point, the object must be locked]
2687 */
2688
2689 /*
2690 * Wait for the work above to be done by the first
2691 * thread to map this object.
2692 */
2693
2694 while (!object->pager_initialized) {
2695 vm_object_sleep(object,
2696 VM_OBJECT_EVENT_INITIALIZED,
2697 THREAD_UNINT);
2698 }
2699 vm_object_unlock(object);
2700
2701 XPR(XPR_VM_OBJECT,
2702 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2703 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2704 return(object);
2705 }
2706
2707 /*
2708 * Routine: vm_object_pager_create
2709 * Purpose:
2710 * Create a memory object for an internal object.
2711 * In/out conditions:
2712 * The object is locked on entry and exit;
2713 * it may be unlocked within this call.
2714 * Limitations:
2715 * Only one thread may be performing a
2716 * vm_object_pager_create on an object at
2717 * a time. Presumably, only the pageout
2718 * daemon will be using this routine.
2719 */
2720
2721 void
2722 vm_object_pager_create(
2723 register vm_object_t object)
2724 {
2725 memory_object_t pager;
2726 vm_object_hash_entry_t entry;
2727 #if MACH_PAGEMAP
2728 vm_object_size_t size;
2729 vm_external_map_t map;
2730 #endif /* MACH_PAGEMAP */
2731
2732 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2733 (integer_t)object, 0,0,0,0);
2734
2735 if (memory_manager_default_check() != KERN_SUCCESS)
2736 return;
2737
2738 /*
2739 * Prevent collapse or termination by holding a paging reference
2740 */
2741
2742 vm_object_paging_begin(object);
2743 if (object->pager_created) {
2744 /*
2745 * Someone else got to it first...
2746 * wait for them to finish initializing the ports
2747 */
2748 while (!object->pager_initialized) {
2749 vm_object_sleep(object,
2750 VM_OBJECT_EVENT_INITIALIZED,
2751 THREAD_UNINT);
2752 }
2753 vm_object_paging_end(object);
2754 return;
2755 }
2756
2757 /*
2758 * Indicate that a memory object has been assigned
2759 * before dropping the lock, to prevent a race.
2760 */
2761
2762 object->pager_created = TRUE;
2763 object->paging_offset = 0;
2764
2765 #if MACH_PAGEMAP
2766 size = object->size;
2767 #endif /* MACH_PAGEMAP */
2768 vm_object_unlock(object);
2769
2770 #if MACH_PAGEMAP
2771 map = vm_external_create(size);
2772 vm_object_lock(object);
2773 assert(object->size == size);
2774 object->existence_map = map;
2775 vm_object_unlock(object);
2776 #endif /* MACH_PAGEMAP */
2777
2778 /*
2779 * Create the [internal] pager, and associate it with this object.
2780 *
2781 * We make the association here so that vm_object_enter()
2782 * can look up the object to complete initializing it. No
2783 * user will ever map this object.
2784 */
2785 {
2786 memory_object_default_t dmm;
2787 vm_size_t cluster_size;
2788
2789 /* acquire a reference for the default memory manager */
2790 dmm = memory_manager_default_reference(&cluster_size);
2791 assert(cluster_size >= PAGE_SIZE);
2792
2793 object->cluster_size = cluster_size; /* XXX ??? */
2794 assert(object->temporary);
2795
2796 /* create our new memory object */
2797 (void) memory_object_create(dmm, object->size, &pager);
2798
2799 memory_object_default_deallocate(dmm);
2800 }
2801
2802 entry = vm_object_hash_entry_alloc(pager);
2803
2804 vm_object_cache_lock();
2805 vm_object_hash_insert(entry);
2806
2807 entry->object = object;
2808 vm_object_cache_unlock();
2809
2810 /*
2811 * A reference was returned by
2812 * memory_object_create(), and it is
2813 * copied by vm_object_enter().
2814 */
2815
2816 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2817 panic("vm_object_pager_create: mismatch");
2818
2819 /*
2820 * Drop the reference we were passed.
2821 */
2822 memory_object_deallocate(pager);
2823
2824 vm_object_lock(object);
2825
2826 /*
2827 * Release the paging reference
2828 */
2829 vm_object_paging_end(object);
2830 }
2831
2832 /*
2833 * Routine: vm_object_remove
2834 * Purpose:
2835 * Eliminate the pager/object association
2836 * for this pager.
2837 * Conditions:
2838 * The object cache must be locked.
2839 */
2840 __private_extern__ void
2841 vm_object_remove(
2842 vm_object_t object)
2843 {
2844 memory_object_t pager;
2845 pager_request_t pager_request;
2846
2847 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
2848 vm_object_hash_entry_t entry;
2849
2850 entry = vm_object_hash_lookup(pager, FALSE);
2851 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2852 entry->object = VM_OBJECT_NULL;
2853 }
2854
2855 }
2856
2857 /*
2858 * Global variables for vm_object_collapse():
2859 *
2860 * Counts for normal collapses and bypasses.
2861 * Debugging variables, to watch or disable collapse.
2862 */
2863 static long object_collapses = 0;
2864 static long object_bypasses = 0;
2865
2866 static boolean_t vm_object_collapse_allowed = TRUE;
2867 static boolean_t vm_object_bypass_allowed = TRUE;
2868
2869 static int vm_external_discarded;
2870 static int vm_external_collapsed;
2871
2872 /*
2873 * Routine: vm_object_do_collapse
2874 * Purpose:
2875 * Collapse an object with the object backing it.
2876 * Pages in the backing object are moved into the
2877 * parent, and the backing object is deallocated.
2878 * Conditions:
2879 * Both objects and the cache are locked; the page
2880 * queues are unlocked.
2881 *
2882 */
2883 static void
2884 vm_object_do_collapse(
2885 vm_object_t object,
2886 vm_object_t backing_object)
2887 {
2888 vm_page_t p, pp;
2889 vm_object_offset_t new_offset, backing_offset;
2890 vm_object_size_t size;
2891
2892 backing_offset = object->shadow_offset;
2893 size = object->size;
2894
2895 /*
2896 * Move all in-memory pages from backing_object
2897 * to the parent. Pages that have been paged out
2898 * will be overwritten by any of the parent's
2899 * pages that shadow them.
2900 */
2901
2902 while (!queue_empty(&backing_object->memq)) {
2903
2904 p = (vm_page_t) queue_first(&backing_object->memq);
2905
2906 new_offset = (p->offset - backing_offset);
2907
2908 assert(!p->busy || p->absent);
2909
2910 /*
2911 * If the parent has a page here, or if
2912 * this page falls outside the parent,
2913 * dispose of it.
2914 *
2915 * Otherwise, move it as planned.
2916 */
2917
2918 if (p->offset < backing_offset || new_offset >= size) {
2919 VM_PAGE_FREE(p);
2920 } else {
2921 pp = vm_page_lookup(object, new_offset);
2922 if (pp == VM_PAGE_NULL) {
2923
2924 /*
2925 * Parent now has no page.
2926 * Move the backing object's page up.
2927 */
2928
2929 vm_page_rename(p, object, new_offset);
2930 #if MACH_PAGEMAP
2931 } else if (pp->absent) {
2932
2933 /*
2934 * Parent has an absent page...
2935 * it's not being paged in, so
2936 * it must really be missing from
2937 * the parent.
2938 *
2939 * Throw out the absent page...
2940 * any faults looking for that
2941 * page will restart with the new
2942 * one.
2943 */
2944
2945 VM_PAGE_FREE(pp);
2946 vm_page_rename(p, object, new_offset);
2947 #endif /* MACH_PAGEMAP */
2948 } else {
2949 assert(! pp->absent);
2950
2951 /*
2952 * Parent object has a real page.
2953 * Throw away the backing object's
2954 * page.
2955 */
2956 VM_PAGE_FREE(p);
2957 }
2958 }
2959 }
2960
2961 assert(object->pager == MEMORY_OBJECT_NULL ||
2962 backing_object->pager == MEMORY_OBJECT_NULL);
2963
2964 if (backing_object->pager != MEMORY_OBJECT_NULL) {
2965 vm_object_hash_entry_t entry;
2966
2967 /*
2968 * Move the pager from backing_object to object.
2969 *
2970 * XXX We're only using part of the paging space
2971 * for keeps now... we ought to discard the
2972 * unused portion.
2973 */
2974
2975 object->pager = backing_object->pager;
2976 entry = vm_object_hash_lookup(object->pager, FALSE);
2977 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
2978 entry->object = object;
2979 object->pager_created = backing_object->pager_created;
2980 object->pager_request = backing_object->pager_request;
2981 object->pager_ready = backing_object->pager_ready;
2982 object->pager_initialized = backing_object->pager_initialized;
2983 object->cluster_size = backing_object->cluster_size;
2984 object->paging_offset =
2985 backing_object->paging_offset + backing_offset;
2986 if (object->pager_request != PAGER_REQUEST_NULL) {
2987 memory_object_control_collapse(object->pager_request,
2988 object);
2989 }
2990 }
2991
2992 vm_object_cache_unlock();
2993
2994 object->paging_offset = backing_object->paging_offset + backing_offset;
2995
2996 #if MACH_PAGEMAP
2997 /*
2998 * If the shadow offset is 0, the use the existence map from
2999 * the backing object if there is one. If the shadow offset is
3000 * not zero, toss it.
3001 *
3002 * XXX - If the shadow offset is not 0 then a bit copy is needed
3003 * if the map is to be salvaged. For now, we just just toss the
3004 * old map, giving the collapsed object no map. This means that
3005 * the pager is invoked for zero fill pages. If analysis shows
3006 * that this happens frequently and is a performance hit, then
3007 * this code should be fixed to salvage the map.
3008 */
3009 assert(object->existence_map == VM_EXTERNAL_NULL);
3010 if (backing_offset || (size != backing_object->size)) {
3011 vm_external_discarded++;
3012 vm_external_destroy(backing_object->existence_map,
3013 backing_object->size);
3014 }
3015 else {
3016 vm_external_collapsed++;
3017 object->existence_map = backing_object->existence_map;
3018 }
3019 backing_object->existence_map = VM_EXTERNAL_NULL;
3020 #endif /* MACH_PAGEMAP */
3021
3022 /*
3023 * Object now shadows whatever backing_object did.
3024 * Note that the reference to backing_object->shadow
3025 * moves from within backing_object to within object.
3026 */
3027
3028 object->shadow = backing_object->shadow;
3029 object->shadow_offset += backing_object->shadow_offset;
3030 assert((object->shadow == VM_OBJECT_NULL) ||
3031 (object->shadow->copy == VM_OBJECT_NULL));
3032
3033 /*
3034 * Discard backing_object.
3035 *
3036 * Since the backing object has no pages, no
3037 * pager left, and no object references within it,
3038 * all that is necessary is to dispose of it.
3039 */
3040
3041 assert((backing_object->ref_count == 1) &&
3042 (backing_object->resident_page_count == 0) &&
3043 (backing_object->paging_in_progress == 0));
3044
3045 backing_object->alive = FALSE;
3046 vm_object_unlock(backing_object);
3047
3048 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3049 (integer_t)backing_object, 0,0,0,0);
3050
3051 zfree(vm_object_zone, (vm_offset_t) backing_object);
3052
3053 object_collapses++;
3054 }
3055
3056 static void
3057 vm_object_do_bypass(
3058 vm_object_t object,
3059 vm_object_t backing_object)
3060 {
3061 /*
3062 * Make the parent shadow the next object
3063 * in the chain.
3064 */
3065
3066 #if TASK_SWAPPER
3067 /*
3068 * Do object reference in-line to
3069 * conditionally increment shadow's
3070 * residence count. If object is not
3071 * resident, leave residence count
3072 * on shadow alone.
3073 */
3074 if (backing_object->shadow != VM_OBJECT_NULL) {
3075 vm_object_lock(backing_object->shadow);
3076 backing_object->shadow->ref_count++;
3077 if (object->res_count != 0)
3078 vm_object_res_reference(backing_object->shadow);
3079 vm_object_unlock(backing_object->shadow);
3080 }
3081 #else /* TASK_SWAPPER */
3082 vm_object_reference(backing_object->shadow);
3083 #endif /* TASK_SWAPPER */
3084
3085 object->shadow = backing_object->shadow;
3086 object->shadow_offset += backing_object->shadow_offset;
3087
3088 /*
3089 * Backing object might have had a copy pointer
3090 * to us. If it did, clear it.
3091 */
3092 if (backing_object->copy == object) {
3093 backing_object->copy = VM_OBJECT_NULL;
3094 }
3095
3096 /*
3097 * Drop the reference count on backing_object.
3098 #if TASK_SWAPPER
3099 * Since its ref_count was at least 2, it
3100 * will not vanish; so we don't need to call
3101 * vm_object_deallocate.
3102 * [FBDP: that doesn't seem to be true any more]
3103 *
3104 * The res_count on the backing object is
3105 * conditionally decremented. It's possible
3106 * (via vm_pageout_scan) to get here with
3107 * a "swapped" object, which has a 0 res_count,
3108 * in which case, the backing object res_count
3109 * is already down by one.
3110 #else
3111 * Don't call vm_object_deallocate unless
3112 * ref_count drops to zero.
3113 *
3114 * The ref_count can drop to zero here if the
3115 * backing object could be bypassed but not
3116 * collapsed, such as when the backing object
3117 * is temporary and cachable.
3118 #endif
3119 */
3120 if (backing_object->ref_count > 1) {
3121 backing_object->ref_count--;
3122 #if TASK_SWAPPER
3123 if (object->res_count != 0)
3124 vm_object_res_deallocate(backing_object);
3125 assert(backing_object->ref_count > 0);
3126 #endif /* TASK_SWAPPER */
3127 vm_object_unlock(backing_object);
3128 } else {
3129
3130 /*
3131 * Drop locks so that we can deallocate
3132 * the backing object.
3133 */
3134
3135 #if TASK_SWAPPER
3136 if (object->res_count == 0) {
3137 /* XXX get a reference for the deallocate below */
3138 vm_object_res_reference(backing_object);
3139 }
3140 #endif /* TASK_SWAPPER */
3141 vm_object_unlock(object);
3142 vm_object_unlock(backing_object);
3143 vm_object_deallocate(backing_object);
3144
3145 /*
3146 * Relock object. We don't have to reverify
3147 * its state since vm_object_collapse will
3148 * do that for us as it starts at the
3149 * top of its loop.
3150 */
3151
3152 vm_object_lock(object);
3153 }
3154
3155 object_bypasses++;
3156 }
3157
3158
3159 /*
3160 * vm_object_collapse:
3161 *
3162 * Perform an object collapse or an object bypass if appropriate.
3163 * The real work of collapsing and bypassing is performed in
3164 * the routines vm_object_do_collapse and vm_object_do_bypass.
3165 *
3166 * Requires that the object be locked and the page queues be unlocked.
3167 *
3168 */
3169 __private_extern__ void
3170 vm_object_collapse(
3171 register vm_object_t object)
3172 {
3173 register vm_object_t backing_object;
3174 register vm_object_offset_t backing_offset;
3175 register vm_object_size_t size;
3176 register vm_object_offset_t new_offset;
3177 register vm_page_t p;
3178
3179 vm_offset_t current_offset;
3180
3181 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3182 return;
3183 }
3184
3185 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3186 (integer_t)object, 0,0,0,0);
3187
3188 while (TRUE) {
3189 /*
3190 * Verify that the conditions are right for either
3191 * collapse or bypass:
3192 *
3193 * The object exists and no pages in it are currently
3194 * being paged out, and
3195 */
3196 if (object == VM_OBJECT_NULL ||
3197 object->paging_in_progress != 0 ||
3198 object->absent_count != 0)
3199 return;
3200
3201 /*
3202 * There is a backing object, and
3203 */
3204
3205 if ((backing_object = object->shadow) == VM_OBJECT_NULL)
3206 return;
3207
3208 vm_object_lock(backing_object);
3209
3210 /*
3211 * ...
3212 * The backing object is not read_only,
3213 * and no pages in the backing object are
3214 * currently being paged out.
3215 * The backing object is internal.
3216 *
3217 */
3218
3219 if (!backing_object->internal ||
3220 backing_object->paging_in_progress != 0) {
3221 vm_object_unlock(backing_object);
3222 return;
3223 }
3224
3225 /*
3226 * The backing object can't be a copy-object:
3227 * the shadow_offset for the copy-object must stay
3228 * as 0. Furthermore (for the 'we have all the
3229 * pages' case), if we bypass backing_object and
3230 * just shadow the next object in the chain, old
3231 * pages from that object would then have to be copied
3232 * BOTH into the (former) backing_object and into the
3233 * parent object.
3234 */
3235 if (backing_object->shadow != VM_OBJECT_NULL &&
3236 backing_object->shadow->copy != VM_OBJECT_NULL) {
3237 vm_object_unlock(backing_object);
3238 return;
3239 }
3240
3241 /*
3242 * We can now try to either collapse the backing
3243 * object (if the parent is the only reference to
3244 * it) or (perhaps) remove the parent's reference
3245 * to it.
3246 *
3247 * If there is exactly one reference to the backing
3248 * object, we may be able to collapse it into the
3249 * parent.
3250 *
3251 * The backing object must not have a pager
3252 * created for it, since collapsing an object
3253 * into a backing_object dumps new pages into
3254 * the backing_object that its pager doesn't
3255 * know about.
3256 */
3257
3258 if (backing_object->ref_count == 1 &&
3259 ! object->pager_created &&
3260 vm_object_collapse_allowed) {
3261
3262 XPR(XPR_VM_OBJECT,
3263 "vm_object_collapse: %x to %x, pager %x, pager_request %x\n",
3264 (integer_t)backing_object, (integer_t)object,
3265 (integer_t)backing_object->pager,
3266 (integer_t)backing_object->pager_request, 0);
3267
3268 /*
3269 * We need the cache lock for collapsing,
3270 * but we must not deadlock.
3271 */
3272
3273 if (! vm_object_cache_lock_try()) {
3274 vm_object_unlock(backing_object);
3275 return;
3276 }
3277
3278 /*
3279 * Collapse the object with its backing
3280 * object, and try again with the object's
3281 * new backing object.
3282 */
3283
3284 vm_object_do_collapse(object, backing_object);
3285 continue;
3286 }
3287
3288
3289 /*
3290 * Collapsing the backing object was not possible
3291 * or permitted, so let's try bypassing it.
3292 */
3293
3294 if (! vm_object_bypass_allowed) {
3295 vm_object_unlock(backing_object);
3296 return;
3297 }
3298
3299
3300 /*
3301 * If the backing object has a pager but no pagemap,
3302 * then we cannot bypass it, because we don't know
3303 * what pages it has.
3304 */
3305 if (backing_object->pager_created
3306 #if MACH_PAGEMAP
3307 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3308 #endif /* MACH_PAGEMAP */
3309 ) {
3310 vm_object_unlock(backing_object);
3311 return;
3312 }
3313
3314 /*
3315 * If the object has a pager but no pagemap,
3316 * then we cannot bypass it, because we don't know
3317 * what pages it has.
3318 */
3319 if (object->pager_created
3320 #if MACH_PAGEMAP
3321 && (object->existence_map == VM_EXTERNAL_NULL)
3322 #endif /* MACH_PAGEMAP */
3323 ) {
3324 vm_object_unlock(backing_object);
3325 return;
3326 }
3327
3328 backing_offset = object->shadow_offset;
3329 size = object->size;
3330
3331 /*
3332 * If all of the pages in the backing object are
3333 * shadowed by the parent object, the parent
3334 * object no longer has to shadow the backing
3335 * object; it can shadow the next one in the
3336 * chain.
3337 *
3338 * If the backing object has existence info,
3339 * we must check examine its existence info
3340 * as well.
3341 *
3342 */
3343
3344 if(object->cow_hint >= size)
3345 object->cow_hint = 0;
3346 current_offset = object->cow_hint;
3347 while(TRUE) {
3348 if (vm_page_lookup(object,
3349 (vm_object_offset_t)current_offset)
3350 != VM_PAGE_NULL) {
3351 current_offset+=PAGE_SIZE;
3352 } else if ((object->pager_created) &&
3353 (object->existence_map != NULL) &&
3354 (vm_external_state_get(object->existence_map,
3355 current_offset)
3356 != VM_EXTERNAL_STATE_ABSENT)) {
3357 current_offset+=PAGE_SIZE;
3358 } else if (vm_page_lookup(backing_object,
3359 (vm_object_offset_t)current_offset
3360 + backing_offset)!= VM_PAGE_NULL) {
3361 /* found a dependency */
3362 object->cow_hint = current_offset;
3363 vm_object_unlock(backing_object);
3364 return;
3365 } else if ((backing_object->pager_created) &&
3366 (backing_object->existence_map != NULL) &&
3367 (vm_external_state_get(
3368 backing_object->existence_map,
3369 current_offset + backing_offset)
3370 != VM_EXTERNAL_STATE_ABSENT)) {
3371 /* found a dependency */
3372 object->cow_hint = current_offset;
3373 vm_object_unlock(backing_object);
3374 return;
3375 } else {
3376 current_offset+=PAGE_SIZE;
3377 }
3378 if(current_offset >= size) {
3379 /* wrap at end of object */
3380 current_offset = 0;
3381 }
3382 if(current_offset == object->cow_hint) {
3383 /* we are free of shadow influence */
3384 break;
3385 }
3386 }
3387 /* reset the cow_hint for any objects deeper in the chain */
3388 object->cow_hint = 0;
3389
3390
3391
3392 /*
3393 * All interesting pages in the backing object
3394 * already live in the parent or its pager.
3395 * Thus we can bypass the backing object.
3396 */
3397
3398 vm_object_do_bypass(object, backing_object);
3399
3400 /*
3401 * Try again with this object's new backing object.
3402 */
3403
3404 continue;
3405 }
3406 }
3407
3408 /*
3409 * Routine: vm_object_page_remove: [internal]
3410 * Purpose:
3411 * Removes all physical pages in the specified
3412 * object range from the object's list of pages.
3413 *
3414 * In/out conditions:
3415 * The object must be locked.
3416 * The object must not have paging_in_progress, usually
3417 * guaranteed by not having a pager.
3418 */
3419 unsigned int vm_object_page_remove_lookup = 0;
3420 unsigned int vm_object_page_remove_iterate = 0;
3421
3422 __private_extern__ void
3423 vm_object_page_remove(
3424 register vm_object_t object,
3425 register vm_object_offset_t start,
3426 register vm_object_offset_t end)
3427 {
3428 register vm_page_t p, next;
3429
3430 /*
3431 * One and two page removals are most popular.
3432 * The factor of 16 here is somewhat arbitrary.
3433 * It balances vm_object_lookup vs iteration.
3434 */
3435
3436 if (atop(end - start) < (unsigned)object->resident_page_count/16) {
3437 vm_object_page_remove_lookup++;
3438
3439 for (; start < end; start += PAGE_SIZE_64) {
3440 p = vm_page_lookup(object, start);
3441 if (p != VM_PAGE_NULL) {
3442 assert(!p->cleaning && !p->pageout);
3443 if (!p->fictitious)
3444 pmap_page_protect(p->phys_addr,
3445 VM_PROT_NONE);
3446 VM_PAGE_FREE(p);
3447 }
3448 }
3449 } else {
3450 vm_object_page_remove_iterate++;
3451
3452 p = (vm_page_t) queue_first(&object->memq);
3453 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3454 next = (vm_page_t) queue_next(&p->listq);
3455 if ((start <= p->offset) && (p->offset < end)) {
3456 assert(!p->cleaning && !p->pageout);
3457 if (!p->fictitious)
3458 pmap_page_protect(p->phys_addr,
3459 VM_PROT_NONE);
3460 VM_PAGE_FREE(p);
3461 }
3462 p = next;
3463 }
3464 }
3465 }
3466
3467
3468 /*
3469 * Routine: vm_object_coalesce
3470 * Function: Coalesces two objects backing up adjoining
3471 * regions of memory into a single object.
3472 *
3473 * returns TRUE if objects were combined.
3474 *
3475 * NOTE: Only works at the moment if the second object is NULL -
3476 * if it's not, which object do we lock first?
3477 *
3478 * Parameters:
3479 * prev_object First object to coalesce
3480 * prev_offset Offset into prev_object
3481 * next_object Second object into coalesce
3482 * next_offset Offset into next_object
3483 *
3484 * prev_size Size of reference to prev_object
3485 * next_size Size of reference to next_object
3486 *
3487 * Conditions:
3488 * The object(s) must *not* be locked. The map must be locked
3489 * to preserve the reference to the object(s).
3490 */
3491 static int vm_object_coalesce_count = 0;
3492
3493 __private_extern__ boolean_t
3494 vm_object_coalesce(
3495 register vm_object_t prev_object,
3496 vm_object_t next_object,
3497 vm_object_offset_t prev_offset,
3498 vm_object_offset_t next_offset,
3499 vm_object_size_t prev_size,
3500 vm_object_size_t next_size)
3501 {
3502 vm_object_size_t newsize;
3503
3504 #ifdef lint
3505 next_offset++;
3506 #endif /* lint */
3507
3508 if (next_object != VM_OBJECT_NULL) {
3509 return(FALSE);
3510 }
3511
3512 if (prev_object == VM_OBJECT_NULL) {
3513 return(TRUE);
3514 }
3515
3516 XPR(XPR_VM_OBJECT,
3517 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3518 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3519
3520 vm_object_lock(prev_object);
3521
3522 /*
3523 * Try to collapse the object first
3524 */
3525 vm_object_collapse(prev_object);
3526
3527 /*
3528 * Can't coalesce if pages not mapped to
3529 * prev_entry may be in use any way:
3530 * . more than one reference
3531 * . paged out
3532 * . shadows another object
3533 * . has a copy elsewhere
3534 * . paging references (pages might be in page-list)
3535 */
3536
3537 if ((prev_object->ref_count > 1) ||
3538 prev_object->pager_created ||
3539 (prev_object->shadow != VM_OBJECT_NULL) ||
3540 (prev_object->copy != VM_OBJECT_NULL) ||
3541 (prev_object->true_share != FALSE) ||
3542 (prev_object->paging_in_progress != 0)) {
3543 vm_object_unlock(prev_object);
3544 return(FALSE);
3545 }
3546
3547 vm_object_coalesce_count++;
3548
3549 /*
3550 * Remove any pages that may still be in the object from
3551 * a previous deallocation.
3552 */
3553 vm_object_page_remove(prev_object,
3554 prev_offset + prev_size,
3555 prev_offset + prev_size + next_size);
3556
3557 /*
3558 * Extend the object if necessary.
3559 */
3560 newsize = prev_offset + prev_size + next_size;
3561 if (newsize > prev_object->size) {
3562 #if MACH_PAGEMAP
3563 /*
3564 * We cannot extend an object that has existence info,
3565 * since the existence info might then fail to cover
3566 * the entire object.
3567 *
3568 * This assertion must be true because the object
3569 * has no pager, and we only create existence info
3570 * for objects with pagers.
3571 */
3572 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3573 #endif /* MACH_PAGEMAP */
3574 prev_object->size = newsize;
3575 }
3576
3577 vm_object_unlock(prev_object);
3578 return(TRUE);
3579 }
3580
3581 /*
3582 * Attach a set of physical pages to an object, so that they can
3583 * be mapped by mapping the object. Typically used to map IO memory.
3584 *
3585 * The mapping function and its private data are used to obtain the
3586 * physical addresses for each page to be mapped.
3587 */
3588 void
3589 vm_object_page_map(
3590 vm_object_t object,
3591 vm_object_offset_t offset,
3592 vm_object_size_t size,
3593 vm_object_offset_t (*map_fn)(void *map_fn_data,
3594 vm_object_offset_t offset),
3595 void *map_fn_data) /* private to map_fn */
3596 {
3597 int num_pages;
3598 int i;
3599 vm_page_t m;
3600 vm_page_t old_page;
3601 vm_object_offset_t addr;
3602
3603 num_pages = atop(size);
3604
3605 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3606
3607 addr = (*map_fn)(map_fn_data, offset);
3608
3609 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3610 vm_page_more_fictitious();
3611
3612 vm_object_lock(object);
3613 if ((old_page = vm_page_lookup(object, offset))
3614 != VM_PAGE_NULL)
3615 {
3616 vm_page_lock_queues();
3617 vm_page_free(old_page);
3618 vm_page_unlock_queues();
3619 }
3620
3621 vm_page_init(m, addr);
3622 /* private normally requires lock_queues but since we */
3623 /* are initializing the page, its not necessary here */
3624 m->private = TRUE; /* don`t free page */
3625 m->wire_count = 1;
3626 vm_page_insert(m, object, offset);
3627
3628 PAGE_WAKEUP_DONE(m);
3629 vm_object_unlock(object);
3630 }
3631 }
3632
3633 #include <mach_kdb.h>
3634
3635 #if MACH_KDB
3636 #include <ddb/db_output.h>
3637 #include <vm/vm_print.h>
3638
3639 #define printf kdbprintf
3640
3641 extern boolean_t vm_object_cached(
3642 vm_object_t object);
3643
3644 extern void print_bitstring(
3645 char byte);
3646
3647 boolean_t vm_object_print_pages = FALSE;
3648
3649 void
3650 print_bitstring(
3651 char byte)
3652 {
3653 printf("%c%c%c%c%c%c%c%c",
3654 ((byte & (1 << 0)) ? '1' : '0'),
3655 ((byte & (1 << 1)) ? '1' : '0'),
3656 ((byte & (1 << 2)) ? '1' : '0'),
3657 ((byte & (1 << 3)) ? '1' : '0'),
3658 ((byte & (1 << 4)) ? '1' : '0'),
3659 ((byte & (1 << 5)) ? '1' : '0'),
3660 ((byte & (1 << 6)) ? '1' : '0'),
3661 ((byte & (1 << 7)) ? '1' : '0'));
3662 }
3663
3664 boolean_t
3665 vm_object_cached(
3666 register vm_object_t object)
3667 {
3668 register vm_object_t o;
3669
3670 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3671 if (object == o) {
3672 return TRUE;
3673 }
3674 }
3675 return FALSE;
3676 }
3677
3678 #if MACH_PAGEMAP
3679 /*
3680 * vm_external_print: [ debug ]
3681 */
3682 void
3683 vm_external_print(
3684 vm_external_map_t map,
3685 vm_size_t size)
3686 {
3687 if (map == VM_EXTERNAL_NULL) {
3688 printf("0 ");
3689 } else {
3690 vm_size_t existence_size = stob(size);
3691 printf("{ size=%d, map=[", existence_size);
3692 if (existence_size > 0) {
3693 print_bitstring(map[0]);
3694 }
3695 if (existence_size > 1) {
3696 print_bitstring(map[1]);
3697 }
3698 if (existence_size > 2) {
3699 printf("...");
3700 print_bitstring(map[existence_size-1]);
3701 }
3702 printf("] }\n");
3703 }
3704 return;
3705 }
3706 #endif /* MACH_PAGEMAP */
3707
3708 int
3709 vm_follow_object(
3710 vm_object_t object)
3711 {
3712 extern db_indent;
3713
3714 int count = 0;
3715 int orig_db_indent = db_indent;
3716
3717 while (TRUE) {
3718 if (object == VM_OBJECT_NULL) {
3719 db_indent = orig_db_indent;
3720 return count;
3721 }
3722
3723 count += 1;
3724
3725 iprintf("object 0x%x", object);
3726 printf(", shadow=0x%x", object->shadow);
3727 printf(", copy=0x%x", object->copy);
3728 printf(", pager=0x%x", object->pager);
3729 printf(", ref=%d\n", object->ref_count);
3730
3731 db_indent += 2;
3732 object = object->shadow;
3733 }
3734
3735 }
3736
3737 /*
3738 * vm_object_print: [ debug ]
3739 */
3740 void
3741 vm_object_print(
3742 vm_object_t object,
3743 boolean_t have_addr,
3744 int arg_count,
3745 char *modif)
3746 {
3747 register vm_page_t p;
3748 extern db_indent;
3749 char *s;
3750
3751 register int count;
3752
3753 if (object == VM_OBJECT_NULL)
3754 return;
3755
3756 iprintf("object 0x%x\n", object);
3757
3758 db_indent += 2;
3759
3760 iprintf("size=0x%x", object->size);
3761 printf(", cluster=0x%x", object->cluster_size);
3762 printf(", frozen=0x%x", object->frozen_size);
3763 printf(", ref_count=%d\n", object->ref_count);
3764 iprintf("");
3765 #if TASK_SWAPPER
3766 printf("res_count=%d, ", object->res_count);
3767 #endif /* TASK_SWAPPER */
3768 printf("resident_page_count=%d\n", object->resident_page_count);
3769
3770 iprintf("shadow=0x%x", object->shadow);
3771 if (object->shadow) {
3772 register int i = 0;
3773 vm_object_t shadow = object;
3774 while(shadow = shadow->shadow)
3775 i++;
3776 printf(" (depth %d)", i);
3777 }
3778 printf(", copy=0x%x", object->copy);
3779 printf(", shadow_offset=0x%x", object->shadow_offset);
3780 printf(", last_alloc=0x%x\n", object->last_alloc);
3781
3782 iprintf("pager=0x%x", object->pager);
3783 printf(", paging_offset=0x%x", object->paging_offset);
3784 printf(", pager_request=0x%x\n", object->pager_request);
3785
3786 iprintf("copy_strategy=%d[", object->copy_strategy);
3787 switch (object->copy_strategy) {
3788 case MEMORY_OBJECT_COPY_NONE:
3789 printf("copy_none");
3790 break;
3791
3792 case MEMORY_OBJECT_COPY_CALL:
3793 printf("copy_call");
3794 break;
3795
3796 case MEMORY_OBJECT_COPY_DELAY:
3797 printf("copy_delay");
3798 break;
3799
3800 case MEMORY_OBJECT_COPY_SYMMETRIC:
3801 printf("copy_symmetric");
3802 break;
3803
3804 case MEMORY_OBJECT_COPY_INVALID:
3805 printf("copy_invalid");
3806 break;
3807
3808 default:
3809 printf("?");
3810 }
3811 printf("]");
3812 printf(", absent_count=%d\n", object->absent_count);
3813
3814 iprintf("all_wanted=0x%x<", object->all_wanted);
3815 s = "";
3816 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
3817 printf("%sinit", s);
3818 s = ",";
3819 }
3820 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
3821 printf("%sready", s);
3822 s = ",";
3823 }
3824 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
3825 printf("%spaging", s);
3826 s = ",";
3827 }
3828 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
3829 printf("%sabsent", s);
3830 s = ",";
3831 }
3832 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
3833 printf("%slock", s);
3834 s = ",";
3835 }
3836 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
3837 printf("%suncaching", s);
3838 s = ",";
3839 }
3840 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
3841 printf("%scopy_call", s);
3842 s = ",";
3843 }
3844 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
3845 printf("%scaching", s);
3846 s = ",";
3847 }
3848 printf(">");
3849 printf(", paging_in_progress=%d\n", object->paging_in_progress);
3850
3851 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
3852 (object->pager_created ? "" : "!"),
3853 (object->pager_initialized ? "" : "!"),
3854 (object->pager_ready ? "" : "!"),
3855 (object->can_persist ? "" : "!"),
3856 (object->pager_trusted ? "" : "!"),
3857 (object->pageout ? "" : "!"),
3858 (object->internal ? "internal" : "external"),
3859 (object->temporary ? "temporary" : "permanent"));
3860 iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n",
3861 (object->alive ? "" : "!"),
3862 (object->lock_in_progress ? "" : "!"),
3863 (object->lock_restart ? "" : "!"),
3864 (object->shadowed ? "" : "!"),
3865 (vm_object_cached(object) ? "" : "!"),
3866 (object->private ? "" : "!"));
3867 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
3868 (object->advisory_pageout ? "" : "!"),
3869 (object->silent_overwrite ? "" : "!"));
3870
3871 #if MACH_PAGEMAP
3872 iprintf("existence_map=");
3873 vm_external_print(object->existence_map, object->size);
3874 #endif /* MACH_PAGEMAP */
3875 #if MACH_ASSERT
3876 iprintf("paging_object=0x%x\n", object->paging_object);
3877 #endif /* MACH_ASSERT */
3878
3879 if (vm_object_print_pages) {
3880 count = 0;
3881 p = (vm_page_t) queue_first(&object->memq);
3882 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3883 if (count == 0) {
3884 iprintf("memory:=");
3885 } else if (count == 2) {
3886 printf("\n");
3887 iprintf(" ...");
3888 count = 0;
3889 } else {
3890 printf(",");
3891 }
3892 count++;
3893
3894 printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p);
3895 p = (vm_page_t) queue_next(&p->listq);
3896 }
3897 if (count != 0) {
3898 printf("\n");
3899 }
3900 }
3901 db_indent -= 2;
3902 }
3903
3904
3905 /*
3906 * vm_object_find [ debug ]
3907 *
3908 * Find all tasks which reference the given vm_object.
3909 */
3910
3911 boolean_t vm_object_find(vm_object_t object);
3912 boolean_t vm_object_print_verbose = FALSE;
3913
3914 boolean_t
3915 vm_object_find(
3916 vm_object_t object)
3917 {
3918 task_t task;
3919 vm_map_t map;
3920 vm_map_entry_t entry;
3921 processor_set_t pset = &default_pset;
3922 boolean_t found = FALSE;
3923
3924 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
3925 map = task->map;
3926 for (entry = vm_map_first_entry(map);
3927 entry && entry != vm_map_to_entry(map);
3928 entry = entry->vme_next) {
3929
3930 vm_object_t obj;
3931
3932 /*
3933 * For the time being skip submaps,
3934 * only the kernel can have submaps,
3935 * and unless we are interested in
3936 * kernel objects, we can simply skip
3937 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
3938 * for a full solution.
3939 */
3940 if (entry->is_sub_map)
3941 continue;
3942 if (entry)
3943 obj = entry->object.vm_object;
3944 else
3945 continue;
3946
3947 while (obj != VM_OBJECT_NULL) {
3948 if (obj == object) {
3949 if (!found) {
3950 printf("TASK\t\tMAP\t\tENTRY\n");
3951 found = TRUE;
3952 }
3953 printf("0x%x\t0x%x\t0x%x\n",
3954 task, map, entry);
3955 }
3956 obj = obj->shadow;
3957 }
3958 }
3959 }
3960
3961 return(found);
3962 }
3963
3964 #endif /* MACH_KDB */
3965
3966 kern_return_t
3967 vm_object_populate_with_private(
3968 vm_object_t object,
3969 vm_object_offset_t offset,
3970 vm_offset_t phys_addr,
3971 vm_size_t size)
3972 {
3973 vm_offset_t base_addr;
3974 vm_object_offset_t base_offset;
3975
3976
3977 if(!object->private)
3978 return KERN_FAILURE;
3979
3980 if((base_addr = trunc_page(phys_addr)) != phys_addr) {
3981 return KERN_FAILURE;
3982 }
3983
3984
3985 vm_object_lock(object);
3986 if(!object->phys_contiguous) {
3987 vm_page_t m;
3988 if((base_offset = trunc_page(offset)) != offset) {
3989 vm_object_unlock(object);
3990 return KERN_FAILURE;
3991 }
3992 base_offset += object->paging_offset;
3993 while(size) {
3994 m = vm_page_lookup(object, base_offset);
3995 if(m != VM_PAGE_NULL) {
3996 if(m->fictitious) {
3997 vm_page_lock_queues();
3998 m->fictitious = FALSE;
3999 m->private = TRUE;
4000 m->phys_addr = base_addr;
4001 if(!m->busy) {
4002 m->busy = TRUE;
4003 }
4004 if(!m->absent) {
4005 m->absent = TRUE;
4006 object->absent_count++;
4007 }
4008 m->list_req_pending = TRUE;
4009 vm_page_unlock_queues();
4010 } else if (m->phys_addr != base_addr) {
4011 /* pmap call to clear old mapping */
4012 pmap_page_protect(m->phys_addr,
4013 VM_PROT_NONE);
4014 m->phys_addr = base_addr;
4015 }
4016 } else {
4017 while ((m = vm_page_grab_fictitious())
4018 == VM_PAGE_NULL)
4019 vm_page_more_fictitious();
4020 vm_page_lock_queues();
4021 m->fictitious = FALSE;
4022 m->private = TRUE;
4023 m->phys_addr = base_addr;
4024 m->list_req_pending = TRUE;
4025 m->absent = TRUE;
4026 m->unusual = TRUE;
4027 object->absent_count++;
4028 vm_page_unlock_queues();
4029 vm_page_insert(m, object, base_offset);
4030 }
4031 base_addr += PAGE_SIZE;
4032 base_offset += PAGE_SIZE;
4033 size -= PAGE_SIZE;
4034 }
4035 } else {
4036 /* NOTE: we should check the original settings here */
4037 /* if we have a size > zero a pmap call should be made */
4038 /* to disable the range */
4039
4040 /* pmap_? */
4041
4042 /* shadows on contiguous memory are not allowed */
4043 /* we therefore can use the offset field */
4044 object->shadow_offset = (vm_object_offset_t)phys_addr;
4045 object->size = size;
4046 }
4047 vm_object_unlock(object);
4048 return KERN_SUCCESS;
4049 }
4050
4051 /*
4052 * memory_object_free_from_cache:
4053 *
4054 * Walk the vm_object cache list, removing and freeing vm_objects
4055 * which are backed by the pager identified by the caller, (pager_id).
4056 * Remove up to "count" objects, if there are that may available
4057 * in the cache.
4058 *
4059 * Walk the list at most once, return the number of vm_objects
4060 * actually freed.
4061 */
4062
4063 __private_extern__ kern_return_t
4064 memory_object_free_from_cache(
4065 host_t host,
4066 int *pager_id,
4067 int *count)
4068 {
4069
4070 int object_released = 0;
4071 int i;
4072
4073 register vm_object_t object = VM_OBJECT_NULL;
4074 vm_object_t shadow;
4075
4076 /*
4077 if(host == HOST_NULL)
4078 return(KERN_INVALID_ARGUMENT);
4079 */
4080
4081 try_again:
4082 vm_object_cache_lock();
4083
4084 queue_iterate(&vm_object_cached_list, object,
4085 vm_object_t, cached_list) {
4086 if (object->pager && (pager_id == object->pager->pager)) {
4087 vm_object_lock(object);
4088 queue_remove(&vm_object_cached_list, object,
4089 vm_object_t, cached_list);
4090 vm_object_cached_count--;
4091
4092 /*
4093 * Since this object is in the cache, we know
4094 * that it is initialized and has only a pager's
4095 * (implicit) reference. Take a reference to avoid
4096 * recursive deallocations.
4097 */
4098
4099 assert(object->pager_initialized);
4100 assert(object->ref_count == 0);
4101 object->ref_count++;
4102
4103 /*
4104 * Terminate the object.
4105 * If the object had a shadow, we let
4106 * vm_object_deallocate deallocate it.
4107 * "pageout" objects have a shadow, but
4108 * maintain a "paging reference" rather
4109 * than a normal reference.
4110 * (We are careful here to limit recursion.)
4111 */
4112 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4113 if ((vm_object_terminate(object) == KERN_SUCCESS)
4114 && (shadow != VM_OBJECT_NULL)) {
4115 vm_object_deallocate(shadow);
4116 }
4117
4118 if(object_released++ == *count)
4119 return KERN_SUCCESS;
4120 goto try_again;
4121 }
4122 }
4123 vm_object_cache_unlock();
4124 *count = object_released;
4125 return KERN_SUCCESS;
4126 }
4127
4128
4129
4130 kern_return_t
4131 memory_object_create_named(
4132 memory_object_t pager,
4133 memory_object_offset_t size,
4134 memory_object_control_t *control)
4135 {
4136 vm_object_t object;
4137 vm_object_hash_entry_t entry;
4138
4139 *control = MEMORY_OBJECT_CONTROL_NULL;
4140 if (pager == MEMORY_OBJECT_NULL)
4141 return KERN_INVALID_ARGUMENT;
4142
4143 vm_object_cache_lock();
4144 entry = vm_object_hash_lookup(pager, FALSE);
4145 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4146 (entry->object != VM_OBJECT_NULL)) {
4147 if (entry->object->named == TRUE)
4148 panic("memory_object_create_named: caller already holds the right"); }
4149
4150 vm_object_cache_unlock();
4151 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4152 == VM_OBJECT_NULL) {
4153 return(KERN_INVALID_OBJECT);
4154 }
4155
4156 /* wait for object (if any) to be ready */
4157 if (object != VM_OBJECT_NULL) {
4158 vm_object_lock(object);
4159 object->named = TRUE;
4160 while (!object->pager_ready) {
4161 vm_object_sleep(object,
4162 VM_OBJECT_EVENT_PAGER_READY,
4163 THREAD_UNINT);
4164 }
4165 *control = object->pager_request;
4166 vm_object_unlock(object);
4167 }
4168 return (KERN_SUCCESS);
4169 }
4170
4171
4172 /*
4173 * Routine: memory_object_recover_named [user interface]
4174 * Purpose:
4175 * Attempt to recover a named reference for a VM object.
4176 * VM will verify that the object has not already started
4177 * down the termination path, and if it has, will optionally
4178 * wait for that to finish.
4179 * Returns:
4180 * KERN_SUCCESS - we recovered a named reference on the object
4181 * KERN_FAILURE - we could not recover a reference (object dead)
4182 * KERN_INVALID_ARGUMENT - bad memory object control
4183 */
4184 kern_return_t
4185 memory_object_recover_named(
4186 memory_object_control_t control,
4187 boolean_t wait_on_terminating)
4188 {
4189 vm_object_t object;
4190
4191 vm_object_cache_lock();
4192 object = memory_object_control_to_vm_object(control);
4193 if (object == VM_OBJECT_NULL) {
4194 vm_object_cache_unlock();
4195 return (KERN_INVALID_ARGUMENT);
4196 }
4197
4198 restart:
4199 vm_object_lock(object);
4200
4201 if (object->terminating && wait_on_terminating) {
4202 vm_object_cache_unlock();
4203 vm_object_wait(object,
4204 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4205 THREAD_UNINT);
4206 vm_object_cache_lock();
4207 goto restart;
4208 }
4209
4210 if (!object->alive) {
4211 vm_object_cache_unlock();
4212 vm_object_unlock(object);
4213 return KERN_FAILURE;
4214 }
4215
4216 if (object->named == TRUE) {
4217 vm_object_cache_unlock();
4218 vm_object_unlock(object);
4219 return KERN_SUCCESS;
4220 }
4221
4222 if((object->ref_count == 0) && (!object->terminating)){
4223 queue_remove(&vm_object_cached_list, object,
4224 vm_object_t, cached_list);
4225 vm_object_cached_count--;
4226 XPR(XPR_VM_OBJECT_CACHE,
4227 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4228 (integer_t)object,
4229 (integer_t)vm_object_cached_list.next,
4230 (integer_t)vm_object_cached_list.prev, 0,0);
4231 }
4232
4233 vm_object_cache_unlock();
4234
4235 object->named = TRUE;
4236 object->ref_count++;
4237 vm_object_res_reference(object);
4238 while (!object->pager_ready) {
4239 vm_object_sleep(object,
4240 VM_OBJECT_EVENT_PAGER_READY,
4241 THREAD_UNINT);
4242 }
4243 vm_object_unlock(object);
4244 return (KERN_SUCCESS);
4245 }
4246
4247
4248 /*
4249 * vm_object_release_name:
4250 *
4251 * Enforces name semantic on memory_object reference count decrement
4252 * This routine should not be called unless the caller holds a name
4253 * reference gained through the memory_object_create_named.
4254 *
4255 * If the TERMINATE_IDLE flag is set, the call will return if the
4256 * reference count is not 1. i.e. idle with the only remaining reference
4257 * being the name.
4258 * If the decision is made to proceed the name field flag is set to
4259 * false and the reference count is decremented. If the RESPECT_CACHE
4260 * flag is set and the reference count has gone to zero, the
4261 * memory_object is checked to see if it is cacheable otherwise when
4262 * the reference count is zero, it is simply terminated.
4263 */
4264
4265 __private_extern__ kern_return_t
4266 vm_object_release_name(
4267 vm_object_t object,
4268 int flags)
4269 {
4270 vm_object_t shadow;
4271 boolean_t original_object = TRUE;
4272
4273 while (object != VM_OBJECT_NULL) {
4274
4275 /*
4276 * The cache holds a reference (uncounted) to
4277 * the object. We must locke it before removing
4278 * the object.
4279 *
4280 */
4281
4282 vm_object_cache_lock();
4283 vm_object_lock(object);
4284 assert(object->alive);
4285 if(original_object)
4286 assert(object->named);
4287 assert(object->ref_count > 0);
4288
4289 /*
4290 * We have to wait for initialization before
4291 * destroying or caching the object.
4292 */
4293
4294 if (object->pager_created && !object->pager_initialized) {
4295 assert(!object->can_persist);
4296 vm_object_assert_wait(object,
4297 VM_OBJECT_EVENT_INITIALIZED,
4298 THREAD_UNINT);
4299 vm_object_unlock(object);
4300 vm_object_cache_unlock();
4301 thread_block(THREAD_CONTINUE_NULL);
4302 continue;
4303 }
4304
4305 if (((object->ref_count > 1)
4306 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4307 || (object->terminating)) {
4308 vm_object_unlock(object);
4309 vm_object_cache_unlock();
4310 return KERN_FAILURE;
4311 } else {
4312 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4313 vm_object_unlock(object);
4314 vm_object_cache_unlock();
4315 return KERN_SUCCESS;
4316 }
4317 }
4318
4319 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4320 (object->ref_count == 1)) {
4321 if(original_object)
4322 object->named = FALSE;
4323 vm_object_unlock(object);
4324 vm_object_cache_unlock();
4325 /* let vm_object_deallocate push this thing into */
4326 /* the cache, if that it is where it is bound */
4327 vm_object_deallocate(object);
4328 return KERN_SUCCESS;
4329 }
4330 VM_OBJ_RES_DECR(object);
4331 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4332 if(object->ref_count == 1) {
4333 if(vm_object_terminate(object) != KERN_SUCCESS) {
4334 if(original_object) {
4335 return KERN_FAILURE;
4336 } else {
4337 return KERN_SUCCESS;
4338 }
4339 }
4340 if (shadow != VM_OBJECT_NULL) {
4341 original_object = FALSE;
4342 object = shadow;
4343 continue;
4344 }
4345 return KERN_SUCCESS;
4346 } else {
4347 object->ref_count--;
4348 assert(object->ref_count > 0);
4349 if(original_object)
4350 object->named = FALSE;
4351 vm_object_unlock(object);
4352 vm_object_cache_unlock();
4353 return KERN_SUCCESS;
4354 }
4355 }
4356 }
4357
4358
4359 __private_extern__ kern_return_t
4360 vm_object_lock_request(
4361 vm_object_t object,
4362 vm_object_offset_t offset,
4363 vm_object_size_t size,
4364 memory_object_return_t should_return,
4365 int flags,
4366 vm_prot_t prot)
4367 {
4368 vm_object_offset_t original_offset = offset;
4369 boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH;
4370
4371 XPR(XPR_MEMORY_OBJECT,
4372 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4373 (integer_t)object, offset, size,
4374 (((should_return&1)<<1)|should_flush), prot);
4375
4376 /*
4377 * Check for bogus arguments.
4378 */
4379 if (object == VM_OBJECT_NULL)
4380 return (KERN_INVALID_ARGUMENT);
4381
4382 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4383 return (KERN_INVALID_ARGUMENT);
4384
4385 size = round_page(size);
4386
4387 /*
4388 * Lock the object, and acquire a paging reference to
4389 * prevent the memory_object reference from being released.
4390 */
4391 vm_object_lock(object);
4392 vm_object_paging_begin(object);
4393 offset -= object->paging_offset;
4394
4395 (void)vm_object_update(object,
4396 offset, size, should_return, flags, prot);
4397
4398 vm_object_paging_end(object);
4399 vm_object_unlock(object);
4400
4401 return (KERN_SUCCESS);
4402 }
4403
4404
4405
4406 #if TASK_SWAPPER
4407 /*
4408 * vm_object_res_deallocate
4409 *
4410 * (recursively) decrement residence counts on vm objects and their shadows.
4411 * Called from vm_object_deallocate and when swapping out an object.
4412 *
4413 * The object is locked, and remains locked throughout the function,
4414 * even as we iterate down the shadow chain. Locks on intermediate objects
4415 * will be dropped, but not the original object.
4416 *
4417 * NOTE: this function used to use recursion, rather than iteration.
4418 */
4419
4420 __private_extern__ void
4421 vm_object_res_deallocate(
4422 vm_object_t object)
4423 {
4424 vm_object_t orig_object = object;
4425 /*
4426 * Object is locked so it can be called directly
4427 * from vm_object_deallocate. Original object is never
4428 * unlocked.
4429 */
4430 assert(object->res_count > 0);
4431 while (--object->res_count == 0) {
4432 assert(object->ref_count >= object->res_count);
4433 vm_object_deactivate_all_pages(object);
4434 /* iterate on shadow, if present */
4435 if (object->shadow != VM_OBJECT_NULL) {
4436 vm_object_t tmp_object = object->shadow;
4437 vm_object_lock(tmp_object);
4438 if (object != orig_object)
4439 vm_object_unlock(object);
4440 object = tmp_object;
4441 assert(object->res_count > 0);
4442 } else
4443 break;
4444 }
4445 if (object != orig_object)
4446 vm_object_unlock(object);
4447 }
4448
4449 /*
4450 * vm_object_res_reference
4451 *
4452 * Internal function to increment residence count on a vm object
4453 * and its shadows. It is called only from vm_object_reference, and
4454 * when swapping in a vm object, via vm_map_swap.
4455 *
4456 * The object is locked, and remains locked throughout the function,
4457 * even as we iterate down the shadow chain. Locks on intermediate objects
4458 * will be dropped, but not the original object.
4459 *
4460 * NOTE: this function used to use recursion, rather than iteration.
4461 */
4462
4463 __private_extern__ void
4464 vm_object_res_reference(
4465 vm_object_t object)
4466 {
4467 vm_object_t orig_object = object;
4468 /*
4469 * Object is locked, so this can be called directly
4470 * from vm_object_reference. This lock is never released.
4471 */
4472 while ((++object->res_count == 1) &&
4473 (object->shadow != VM_OBJECT_NULL)) {
4474 vm_object_t tmp_object = object->shadow;
4475
4476 assert(object->ref_count >= object->res_count);
4477 vm_object_lock(tmp_object);
4478 if (object != orig_object)
4479 vm_object_unlock(object);
4480 object = tmp_object;
4481 }
4482 if (object != orig_object)
4483 vm_object_unlock(object);
4484 assert(orig_object->ref_count >= orig_object->res_count);
4485 }
4486 #endif /* TASK_SWAPPER */
4487
4488 /*
4489 * vm_object_reference:
4490 *
4491 * Gets another reference to the given object.
4492 */
4493 #ifdef vm_object_reference
4494 #undef vm_object_reference
4495 #endif
4496 __private_extern__ void
4497 vm_object_reference(
4498 register vm_object_t object)
4499 {
4500 if (object == VM_OBJECT_NULL)
4501 return;
4502
4503 vm_object_lock(object);
4504 assert(object->ref_count > 0);
4505 vm_object_reference_locked(object);
4506 vm_object_unlock(object);
4507 }
4508
4509 #ifdef MACH_BSD
4510 /*
4511 * Scale the vm_object_cache
4512 * This is required to make sure that the vm_object_cache is big
4513 * enough to effectively cache the mapped file.
4514 * This is really important with UBC as all the regular file vnodes
4515 * have memory object associated with them. Havving this cache too
4516 * small results in rapid reclaim of vnodes and hurts performance a LOT!
4517 *
4518 * This is also needed as number of vnodes can be dynamically scaled.
4519 */
4520 kern_return_t
4521 adjust_vm_object_cache(vm_size_t oval, vm_size_t nval)
4522 {
4523 vm_object_cached_max = nval;
4524 vm_object_cache_trim(FALSE);
4525 return (KERN_SUCCESS);
4526 }
4527 #endif /* MACH_BSD */
4528