]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
9cec5d48632d667cfece1097bafaf1a0a5322b6c
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm/vm_object.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Virtual memory object module.
57 */
58
59 #ifdef MACH_BSD
60 /* remove as part of compoenent support merge */
61 extern int vnode_pager_workaround;
62 #endif
63
64 #include <mach_pagemap.h>
65 #include <task_swapper.h>
66
67 #include <mach/mach_types.h>
68 #include <mach/memory_object.h>
69 #include <mach/memory_object_default.h>
70 #include <mach/memory_object_control_server.h>
71 #include <mach/vm_param.h>
72 #include <ipc/ipc_port.h>
73 #include <kern/assert.h>
74 #include <kern/lock.h>
75 #include <kern/queue.h>
76 #include <kern/xpr.h>
77 #include <kern/zalloc.h>
78 #include <kern/host.h>
79 #include <kern/host_statistics.h>
80 #include <kern/processor.h>
81 #include <vm/memory_object.h>
82 #include <vm/vm_fault.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_object.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <kern/misc_protos.h>
88
89
90
91 /*
92 * Virtual memory objects maintain the actual data
93 * associated with allocated virtual memory. A given
94 * page of memory exists within exactly one object.
95 *
96 * An object is only deallocated when all "references"
97 * are given up.
98 *
99 * Associated with each object is a list of all resident
100 * memory pages belonging to that object; this list is
101 * maintained by the "vm_page" module, but locked by the object's
102 * lock.
103 *
104 * Each object also records the memory object reference
105 * that is used by the kernel to request and write
106 * back data (the memory object, field "pager"), etc...
107 *
108 * Virtual memory objects are allocated to provide
109 * zero-filled memory (vm_allocate) or map a user-defined
110 * memory object into a virtual address space (vm_map).
111 *
112 * Virtual memory objects that refer to a user-defined
113 * memory object are called "permanent", because all changes
114 * made in virtual memory are reflected back to the
115 * memory manager, which may then store it permanently.
116 * Other virtual memory objects are called "temporary",
117 * meaning that changes need be written back only when
118 * necessary to reclaim pages, and that storage associated
119 * with the object can be discarded once it is no longer
120 * mapped.
121 *
122 * A permanent memory object may be mapped into more
123 * than one virtual address space. Moreover, two threads
124 * may attempt to make the first mapping of a memory
125 * object concurrently. Only one thread is allowed to
126 * complete this mapping; all others wait for the
127 * "pager_initialized" field is asserted, indicating
128 * that the first thread has initialized all of the
129 * necessary fields in the virtual memory object structure.
130 *
131 * The kernel relies on a *default memory manager* to
132 * provide backing storage for the zero-filled virtual
133 * memory objects. The pager memory objects associated
134 * with these temporary virtual memory objects are only
135 * requested from the default memory manager when it
136 * becomes necessary. Virtual memory objects
137 * that depend on the default memory manager are called
138 * "internal". The "pager_created" field is provided to
139 * indicate whether these ports have ever been allocated.
140 *
141 * The kernel may also create virtual memory objects to
142 * hold changed pages after a copy-on-write operation.
143 * In this case, the virtual memory object (and its
144 * backing storage -- its memory object) only contain
145 * those pages that have been changed. The "shadow"
146 * field refers to the virtual memory object that contains
147 * the remainder of the contents. The "shadow_offset"
148 * field indicates where in the "shadow" these contents begin.
149 * The "copy" field refers to a virtual memory object
150 * to which changed pages must be copied before changing
151 * this object, in order to implement another form
152 * of copy-on-write optimization.
153 *
154 * The virtual memory object structure also records
155 * the attributes associated with its memory object.
156 * The "pager_ready", "can_persist" and "copy_strategy"
157 * fields represent those attributes. The "cached_list"
158 * field is used in the implementation of the persistence
159 * attribute.
160 *
161 * ZZZ Continue this comment.
162 */
163
164 /* Forward declarations for internal functions. */
165 static void _vm_object_allocate(
166 vm_object_size_t size,
167 vm_object_t object);
168
169 static kern_return_t vm_object_terminate(
170 vm_object_t object);
171
172 extern void vm_object_remove(
173 vm_object_t object);
174
175 static vm_object_t vm_object_cache_trim(
176 boolean_t called_from_vm_object_deallocate);
177
178 static void vm_object_deactivate_all_pages(
179 vm_object_t object);
180
181 static void vm_object_abort_activity(
182 vm_object_t object);
183
184 static kern_return_t vm_object_copy_call(
185 vm_object_t src_object,
186 vm_object_offset_t src_offset,
187 vm_object_size_t size,
188 vm_object_t *_result_object);
189
190 static void vm_object_do_collapse(
191 vm_object_t object,
192 vm_object_t backing_object);
193
194 static void vm_object_do_bypass(
195 vm_object_t object,
196 vm_object_t backing_object);
197
198 static void vm_object_release_pager(
199 memory_object_t pager);
200
201 static zone_t vm_object_zone; /* vm backing store zone */
202
203 /*
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
206 */
207 static struct vm_object kernel_object_store;
208 __private_extern__ vm_object_t kernel_object = &kernel_object_store;
209
210 /*
211 * The submap object is used as a placeholder for vm_map_submap
212 * operations. The object is declared in vm_map.c because it
213 * is exported by the vm_map module. The storage is declared
214 * here because it must be initialized here.
215 */
216 static struct vm_object vm_submap_object_store;
217
218 /*
219 * Virtual memory objects are initialized from
220 * a template (see vm_object_allocate).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see _vm_object_allocate()).
225 */
226 static struct vm_object vm_object_template;
227
228 /*
229 * Virtual memory objects that are not referenced by
230 * any address maps, but that are allowed to persist
231 * (an attribute specified by the associated memory manager),
232 * are kept in a queue (vm_object_cached_list).
233 *
234 * When an object from this queue is referenced again,
235 * for example to make another address space mapping,
236 * it must be removed from the queue. That is, the
237 * queue contains *only* objects with zero references.
238 *
239 * The kernel may choose to terminate objects from this
240 * queue in order to reclaim storage. The current policy
241 * is to permit a fixed maximum number of unreferenced
242 * objects (vm_object_cached_max).
243 *
244 * A spin lock (accessed by routines
245 * vm_object_cache_{lock,lock_try,unlock}) governs the
246 * object cache. It must be held when objects are
247 * added to or removed from the cache (in vm_object_terminate).
248 * The routines that acquire a reference to a virtual
249 * memory object based on one of the memory object ports
250 * must also lock the cache.
251 *
252 * Ideally, the object cache should be more isolated
253 * from the reference mechanism, so that the lock need
254 * not be held to make simple references.
255 */
256 static queue_head_t vm_object_cached_list;
257 static int vm_object_cached_count;
258 static int vm_object_cached_high; /* highest # cached objects */
259 static int vm_object_cached_max = 512; /* may be patched*/
260
261 static decl_mutex_data(,vm_object_cached_lock_data)
262
263 #define vm_object_cache_lock() \
264 mutex_lock(&vm_object_cached_lock_data)
265 #define vm_object_cache_lock_try() \
266 mutex_try(&vm_object_cached_lock_data)
267 #define vm_object_cache_unlock() \
268 mutex_unlock(&vm_object_cached_lock_data)
269
270 #define VM_OBJECT_HASH_COUNT 1024
271 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
272 static struct zone *vm_object_hash_zone;
273
274 struct vm_object_hash_entry {
275 queue_chain_t hash_link; /* hash chain link */
276 memory_object_t pager; /* pager we represent */
277 vm_object_t object; /* corresponding object */
278 boolean_t waiting; /* someone waiting for
279 * termination */
280 };
281
282 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
283 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
284
285 #define VM_OBJECT_HASH_SHIFT 8
286 #define vm_object_hash(pager) \
287 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
288
289 /*
290 * vm_object_hash_lookup looks up a pager in the hashtable
291 * and returns the corresponding entry, with optional removal.
292 */
293
294 static vm_object_hash_entry_t
295 vm_object_hash_lookup(
296 memory_object_t pager,
297 boolean_t remove_entry)
298 {
299 register queue_t bucket;
300 register vm_object_hash_entry_t entry;
301
302 bucket = &vm_object_hashtable[vm_object_hash(pager)];
303
304 entry = (vm_object_hash_entry_t)queue_first(bucket);
305 while (!queue_end(bucket, (queue_entry_t)entry)) {
306 if (entry->pager == pager && !remove_entry)
307 return(entry);
308 else if (entry->pager == pager) {
309 queue_remove(bucket, entry,
310 vm_object_hash_entry_t, hash_link);
311 return(entry);
312 }
313
314 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
315 }
316
317 return(VM_OBJECT_HASH_ENTRY_NULL);
318 }
319
320 /*
321 * vm_object_hash_enter enters the specified
322 * pager / cache object association in the hashtable.
323 */
324
325 static void
326 vm_object_hash_insert(
327 vm_object_hash_entry_t entry)
328 {
329 register queue_t bucket;
330
331 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
332
333 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
334 }
335
336 static vm_object_hash_entry_t
337 vm_object_hash_entry_alloc(
338 memory_object_t pager)
339 {
340 vm_object_hash_entry_t entry;
341
342 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
343 entry->pager = pager;
344 entry->object = VM_OBJECT_NULL;
345 entry->waiting = FALSE;
346
347 return(entry);
348 }
349
350 void
351 vm_object_hash_entry_free(
352 vm_object_hash_entry_t entry)
353 {
354 zfree(vm_object_hash_zone, (vm_offset_t)entry);
355 }
356
357 /*
358 * vm_object_allocate:
359 *
360 * Returns a new object with the given size.
361 */
362
363 static void
364 _vm_object_allocate(
365 vm_object_size_t size,
366 vm_object_t object)
367 {
368 XPR(XPR_VM_OBJECT,
369 "vm_object_allocate, object 0x%X size 0x%X\n",
370 (integer_t)object, size, 0,0,0);
371
372 *object = vm_object_template;
373 queue_init(&object->memq);
374 queue_init(&object->msr_q);
375 #ifdef UBC_DEBUG
376 queue_init(&object->uplq);
377 #endif /* UBC_DEBUG */
378 vm_object_lock_init(object);
379 object->size = size;
380 }
381
382 __private_extern__ vm_object_t
383 vm_object_allocate(
384 vm_object_size_t size)
385 {
386 register vm_object_t object;
387
388 object = (vm_object_t) zalloc(vm_object_zone);
389
390 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
391
392 if (object != VM_OBJECT_NULL)
393 _vm_object_allocate(size, object);
394
395 return object;
396 }
397
398 /*
399 * vm_object_bootstrap:
400 *
401 * Initialize the VM objects module.
402 */
403 __private_extern__ void
404 vm_object_bootstrap(void)
405 {
406 register i;
407
408 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
409 round_page(512*1024),
410 round_page(12*1024),
411 "vm objects");
412
413 queue_init(&vm_object_cached_list);
414 mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE);
415
416 vm_object_hash_zone =
417 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
418 round_page(512*1024),
419 round_page(12*1024),
420 "vm object hash entries");
421
422 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
423 queue_init(&vm_object_hashtable[i]);
424
425 /*
426 * Fill in a template object, for quick initialization
427 */
428
429 /* memq; Lock; init after allocation */
430 vm_object_template.size = 0;
431 vm_object_template.frozen_size = 0;
432 vm_object_template.ref_count = 1;
433 #if TASK_SWAPPER
434 vm_object_template.res_count = 1;
435 #endif /* TASK_SWAPPER */
436 vm_object_template.resident_page_count = 0;
437 vm_object_template.copy = VM_OBJECT_NULL;
438 vm_object_template.shadow = VM_OBJECT_NULL;
439 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
440 vm_object_template.cow_hint = 0;
441 vm_object_template.true_share = FALSE;
442
443 vm_object_template.pager = MEMORY_OBJECT_NULL;
444 vm_object_template.paging_offset = 0;
445 vm_object_template.pager_request = PAGER_REQUEST_NULL;
446 /* msr_q; init after allocation */
447
448 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
449 vm_object_template.absent_count = 0;
450 vm_object_template.paging_in_progress = 0;
451
452 /* Begin bitfields */
453 vm_object_template.all_wanted = 0; /* all bits FALSE */
454 vm_object_template.pager_created = FALSE;
455 vm_object_template.pager_initialized = FALSE;
456 vm_object_template.pager_ready = FALSE;
457 vm_object_template.pager_trusted = FALSE;
458 vm_object_template.can_persist = FALSE;
459 vm_object_template.internal = TRUE;
460 vm_object_template.temporary = TRUE;
461 vm_object_template.private = FALSE;
462 vm_object_template.pageout = FALSE;
463 vm_object_template.alive = TRUE;
464 vm_object_template.lock_in_progress = FALSE;
465 vm_object_template.lock_restart = FALSE;
466 vm_object_template.silent_overwrite = FALSE;
467 vm_object_template.advisory_pageout = FALSE;
468 vm_object_template.shadowed = FALSE;
469 vm_object_template.terminating = FALSE;
470 vm_object_template.shadow_severed = FALSE;
471 vm_object_template.phys_contiguous = FALSE;
472 vm_object_template.nophyscache = FALSE;
473 /* End bitfields */
474
475 /* cached_list; init after allocation */
476 vm_object_template.last_alloc = (vm_object_offset_t) 0;
477 vm_object_template.cluster_size = 0;
478 #if MACH_PAGEMAP
479 vm_object_template.existence_map = VM_EXTERNAL_NULL;
480 #endif /* MACH_PAGEMAP */
481 #if MACH_ASSERT
482 vm_object_template.paging_object = VM_OBJECT_NULL;
483 #endif /* MACH_ASSERT */
484
485 /*
486 * Initialize the "kernel object"
487 */
488
489 kernel_object = &kernel_object_store;
490
491 /*
492 * Note that in the following size specifications, we need to add 1 because
493 * VM_MAX_KERNEL_ADDRESS is a maximum address, not a size.
494 */
495 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
496 kernel_object);
497
498 /*
499 * Initialize the "submap object". Make it as large as the
500 * kernel object so that no limit is imposed on submap sizes.
501 */
502
503 vm_submap_object = &vm_submap_object_store;
504 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
505 vm_submap_object);
506 /*
507 * Create an "extra" reference to this object so that we never
508 * try to deallocate it; zfree doesn't like to be called with
509 * non-zone memory.
510 */
511 vm_object_reference(vm_submap_object);
512
513 #if MACH_PAGEMAP
514 vm_external_module_initialize();
515 #endif /* MACH_PAGEMAP */
516 }
517
518 __private_extern__ void
519 vm_object_init(void)
520 {
521 /*
522 * Finish initializing the kernel object.
523 */
524 }
525
526 /* remove the typedef below when emergency work-around is taken out */
527 typedef struct vnode_pager {
528 memory_object_t pager;
529 memory_object_t pager_handle; /* pager */
530 memory_object_control_t control_handle; /* memory object's control handle */
531 void *vnode_handle; /* vnode handle */
532 } *vnode_pager_t;
533
534 #define MIGHT_NOT_CACHE_SHADOWS 1
535 #if MIGHT_NOT_CACHE_SHADOWS
536 static int cache_shadows = TRUE;
537 #endif /* MIGHT_NOT_CACHE_SHADOWS */
538
539 /*
540 * vm_object_deallocate:
541 *
542 * Release a reference to the specified object,
543 * gained either through a vm_object_allocate
544 * or a vm_object_reference call. When all references
545 * are gone, storage associated with this object
546 * may be relinquished.
547 *
548 * No object may be locked.
549 */
550 __private_extern__ void
551 vm_object_deallocate(
552 register vm_object_t object)
553 {
554 boolean_t retry_cache_trim = FALSE;
555 vm_object_t shadow;
556
557 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
558 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
559
560
561 while (object != VM_OBJECT_NULL) {
562
563 /*
564 * The cache holds a reference (uncounted) to
565 * the object; we must lock it before removing
566 * the object.
567 */
568
569 vm_object_cache_lock();
570 vm_object_lock(object);
571
572 assert(object->ref_count > 0);
573
574 /*
575 * If the object has a named reference, and only
576 * that reference would remain, inform the pager
577 * about the last "mapping" reference going away.
578 */
579 if ((object->ref_count == 2) && (object->named)) {
580 memory_object_t pager = object->pager;
581
582 /* Notify the Pager that there are no */
583 /* more mappers for this object */
584
585 if (pager != MEMORY_OBJECT_NULL) {
586 vm_object_unlock(object);
587 vm_object_cache_unlock();
588
589 memory_object_unmap(pager);
590
591 vm_object_cache_lock();
592 vm_object_lock(object);
593 assert(object->ref_count > 0);
594 }
595 }
596
597 /*
598 * Lose the reference. If other references
599 * remain, then we are done, unless we need
600 * to retry a cache trim.
601 * If it is the last reference, then keep it
602 * until any pending initialization is completed.
603 */
604
605 /* if the object is terminating, it cannot go into */
606 /* the cache and we obviously should not call */
607 /* terminate again. */
608
609 if ((object->ref_count > 1) || object->terminating) {
610 object->ref_count--;
611 vm_object_res_deallocate(object);
612 vm_object_unlock(object);
613 vm_object_cache_unlock();
614 if (retry_cache_trim &&
615 ((object = vm_object_cache_trim(TRUE)) !=
616 VM_OBJECT_NULL)) {
617 continue;
618 }
619 return;
620 }
621
622 /*
623 * We have to wait for initialization
624 * before destroying or caching the object.
625 */
626
627 if (object->pager_created && ! object->pager_initialized) {
628 assert(! object->can_persist);
629 vm_object_assert_wait(object,
630 VM_OBJECT_EVENT_INITIALIZED,
631 THREAD_UNINT);
632 vm_object_unlock(object);
633 vm_object_cache_unlock();
634 thread_block((void (*)(void))0);
635 continue;
636 }
637
638 /*
639 * If this object can persist, then enter it in
640 * the cache. Otherwise, terminate it.
641 *
642 * NOTE: Only permanent objects are cached, and
643 * permanent objects cannot have shadows. This
644 * affects the residence counting logic in a minor
645 * way (can do it in-line, mostly).
646 */
647
648 if ((object->can_persist) && (object->alive)) {
649 /*
650 * Now it is safe to decrement reference count,
651 * and to return if reference count is > 0.
652 */
653 if (--object->ref_count > 0) {
654 vm_object_res_deallocate(object);
655 vm_object_unlock(object);
656 vm_object_cache_unlock();
657 if (retry_cache_trim &&
658 ((object = vm_object_cache_trim(TRUE)) !=
659 VM_OBJECT_NULL)) {
660 continue;
661 }
662 return;
663 }
664
665 #if MIGHT_NOT_CACHE_SHADOWS
666 /*
667 * Remove shadow now if we don't
668 * want to cache shadows.
669 */
670 if (! cache_shadows) {
671 shadow = object->shadow;
672 object->shadow = VM_OBJECT_NULL;
673 }
674 #endif /* MIGHT_NOT_CACHE_SHADOWS */
675
676 /*
677 * Enter the object onto the queue of
678 * cached objects, and deactivate
679 * all of its pages.
680 */
681 assert(object->shadow == VM_OBJECT_NULL);
682 VM_OBJ_RES_DECR(object);
683 XPR(XPR_VM_OBJECT,
684 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
685 (integer_t)object,
686 (integer_t)vm_object_cached_list.next,
687 (integer_t)vm_object_cached_list.prev,0,0);
688
689 vm_object_cached_count++;
690 if (vm_object_cached_count > vm_object_cached_high)
691 vm_object_cached_high = vm_object_cached_count;
692 queue_enter(&vm_object_cached_list, object,
693 vm_object_t, cached_list);
694 vm_object_cache_unlock();
695 vm_object_deactivate_all_pages(object);
696 vm_object_unlock(object);
697
698 #if MIGHT_NOT_CACHE_SHADOWS
699 /*
700 * If we have a shadow that we need
701 * to deallocate, do so now, remembering
702 * to trim the cache later.
703 */
704 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
705 object = shadow;
706 retry_cache_trim = TRUE;
707 continue;
708 }
709 #endif /* MIGHT_NOT_CACHE_SHADOWS */
710
711 /*
712 * Trim the cache. If the cache trim
713 * returns with a shadow for us to deallocate,
714 * then remember to retry the cache trim
715 * when we are done deallocating the shadow.
716 * Otherwise, we are done.
717 */
718
719 object = vm_object_cache_trim(TRUE);
720 if (object == VM_OBJECT_NULL) {
721 return;
722 }
723 retry_cache_trim = TRUE;
724
725 } else {
726 /*
727 * This object is not cachable; terminate it.
728 */
729 XPR(XPR_VM_OBJECT,
730 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n",
731 (integer_t)object, object->resident_page_count,
732 object->paging_in_progress,
733 (natural_t)current_thread(),object->ref_count);
734
735 VM_OBJ_RES_DECR(object); /* XXX ? */
736 /*
737 * Terminate this object. If it had a shadow,
738 * then deallocate it; otherwise, if we need
739 * to retry a cache trim, do so now; otherwise,
740 * we are done. "pageout" objects have a shadow,
741 * but maintain a "paging reference" rather than
742 * a normal reference.
743 */
744 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
745 if(vm_object_terminate(object) != KERN_SUCCESS) {
746 return;
747 }
748 if (shadow != VM_OBJECT_NULL) {
749 object = shadow;
750 continue;
751 }
752 if (retry_cache_trim &&
753 ((object = vm_object_cache_trim(TRUE)) !=
754 VM_OBJECT_NULL)) {
755 continue;
756 }
757 return;
758 }
759 }
760 assert(! retry_cache_trim);
761 }
762
763 /*
764 * Check to see whether we really need to trim
765 * down the cache. If so, remove an object from
766 * the cache, terminate it, and repeat.
767 *
768 * Called with, and returns with, cache lock unlocked.
769 */
770 vm_object_t
771 vm_object_cache_trim(
772 boolean_t called_from_vm_object_deallocate)
773 {
774 register vm_object_t object = VM_OBJECT_NULL;
775 vm_object_t shadow;
776
777 for (;;) {
778
779 /*
780 * If we no longer need to trim the cache,
781 * then we are done.
782 */
783
784 vm_object_cache_lock();
785 if (vm_object_cached_count <= vm_object_cached_max) {
786 vm_object_cache_unlock();
787 return VM_OBJECT_NULL;
788 }
789
790 /*
791 * We must trim down the cache, so remove
792 * the first object in the cache.
793 */
794 XPR(XPR_VM_OBJECT,
795 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
796 (integer_t)vm_object_cached_list.next,
797 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
798
799 object = (vm_object_t) queue_first(&vm_object_cached_list);
800 vm_object_lock(object);
801 queue_remove(&vm_object_cached_list, object, vm_object_t,
802 cached_list);
803 vm_object_cached_count--;
804
805 /*
806 * Since this object is in the cache, we know
807 * that it is initialized and has no references.
808 * Take a reference to avoid recursive deallocations.
809 */
810
811 assert(object->pager_initialized);
812 assert(object->ref_count == 0);
813 object->ref_count++;
814
815 /*
816 * Terminate the object.
817 * If the object had a shadow, we let vm_object_deallocate
818 * deallocate it. "pageout" objects have a shadow, but
819 * maintain a "paging reference" rather than a normal
820 * reference.
821 * (We are careful here to limit recursion.)
822 */
823 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
824 if(vm_object_terminate(object) != KERN_SUCCESS)
825 continue;
826 if (shadow != VM_OBJECT_NULL) {
827 if (called_from_vm_object_deallocate) {
828 return shadow;
829 } else {
830 vm_object_deallocate(shadow);
831 }
832 }
833 }
834 }
835
836 boolean_t vm_object_terminate_remove_all = FALSE;
837
838 /*
839 * Routine: vm_object_terminate
840 * Purpose:
841 * Free all resources associated with a vm_object.
842 * In/out conditions:
843 * Upon entry, the object must be locked,
844 * and the object must have exactly one reference.
845 *
846 * The shadow object reference is left alone.
847 *
848 * The object must be unlocked if its found that pages
849 * must be flushed to a backing object. If someone
850 * manages to map the object while it is being flushed
851 * the object is returned unlocked and unchanged. Otherwise,
852 * upon exit, the cache will be unlocked, and the
853 * object will cease to exist.
854 */
855 static kern_return_t
856 vm_object_terminate(
857 register vm_object_t object)
858 {
859 memory_object_t pager;
860 register vm_page_t p;
861 vm_object_t shadow_object;
862
863 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
864 (integer_t)object, object->ref_count, 0, 0, 0);
865
866 if (!object->pageout && (!object->temporary || object->can_persist)
867 && (object->pager != NULL || object->shadow_severed)) {
868 vm_object_cache_unlock();
869 while (!queue_empty(&object->memq)) {
870 /*
871 * Clear pager_trusted bit so that the pages get yanked
872 * out of the object instead of cleaned in place. This
873 * prevents a deadlock in XMM and makes more sense anyway.
874 */
875 object->pager_trusted = FALSE;
876
877 p = (vm_page_t) queue_first(&object->memq);
878
879 VM_PAGE_CHECK(p);
880
881 if (p->busy || p->cleaning) {
882 if(p->cleaning || p->absent) {
883 vm_object_paging_wait(object, THREAD_UNINT);
884 continue;
885 } else {
886 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
887 }
888 }
889
890 vm_page_lock_queues();
891 VM_PAGE_QUEUES_REMOVE(p);
892 vm_page_unlock_queues();
893
894 if (p->absent || p->private) {
895
896 /*
897 * For private pages, VM_PAGE_FREE just
898 * leaves the page structure around for
899 * its owner to clean up. For absent
900 * pages, the structure is returned to
901 * the appropriate pool.
902 */
903
904 goto free_page;
905 }
906
907 if (p->fictitious)
908 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
909
910 if (!p->dirty)
911 p->dirty = pmap_is_modified(p->phys_addr);
912
913 if ((p->dirty || p->precious) && !p->error && object->alive) {
914 p->busy = TRUE;
915 vm_object_paging_begin(object);
916 /* protect the object from re-use/caching while it */
917 /* is unlocked */
918 vm_object_unlock(object);
919 vm_pageout_cluster(p); /* flush page */
920 vm_object_lock(object);
921 vm_object_paging_wait(object, THREAD_UNINT);
922 XPR(XPR_VM_OBJECT,
923 "vm_object_terminate restart, object 0x%X ref %d\n",
924 (integer_t)object, object->ref_count, 0, 0, 0);
925 } else {
926 free_page:
927 VM_PAGE_FREE(p);
928 }
929 }
930 vm_object_unlock(object);
931 vm_object_cache_lock();
932 vm_object_lock(object);
933 }
934
935 /*
936 * Make sure the object isn't already being terminated
937 */
938 if(object->terminating) {
939 object->ref_count -= 1;
940 assert(object->ref_count > 0);
941 vm_object_cache_unlock();
942 vm_object_unlock(object);
943 return KERN_FAILURE;
944 }
945
946 /*
947 * Did somebody get a reference to the object while we were
948 * cleaning it?
949 */
950 if(object->ref_count != 1) {
951 object->ref_count -= 1;
952 assert(object->ref_count > 0);
953 vm_object_res_deallocate(object);
954 vm_object_cache_unlock();
955 vm_object_unlock(object);
956 return KERN_FAILURE;
957 }
958
959 /*
960 * Make sure no one can look us up now.
961 */
962
963 object->terminating = TRUE;
964 object->alive = FALSE;
965 vm_object_remove(object);
966
967 /*
968 * Detach the object from its shadow if we are the shadow's
969 * copy.
970 */
971 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
972 !(object->pageout)) {
973 vm_object_lock(shadow_object);
974 assert((shadow_object->copy == object) ||
975 (shadow_object->copy == VM_OBJECT_NULL));
976 shadow_object->copy = VM_OBJECT_NULL;
977 vm_object_unlock(shadow_object);
978 }
979
980 /*
981 * The pageout daemon might be playing with our pages.
982 * Now that the object is dead, it won't touch any more
983 * pages, but some pages might already be on their way out.
984 * Hence, we wait until the active paging activities have ceased
985 * before we break the association with the pager itself.
986 */
987 while (object->paging_in_progress != 0) {
988 vm_object_cache_unlock();
989 vm_object_wait(object,
990 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
991 THREAD_UNINT);
992 vm_object_cache_lock();
993 vm_object_lock(object);
994 }
995
996 pager = object->pager;
997 object->pager = MEMORY_OBJECT_NULL;
998
999 if (pager != MEMORY_OBJECT_NULL)
1000 memory_object_control_disable(object->pager_request);
1001 vm_object_cache_unlock();
1002
1003 object->ref_count--;
1004 #if TASK_SWAPPER
1005 assert(object->res_count == 0);
1006 #endif /* TASK_SWAPPER */
1007
1008 assert (object->ref_count == 0);
1009
1010 /*
1011 * Clean or free the pages, as appropriate.
1012 * It is possible for us to find busy/absent pages,
1013 * if some faults on this object were aborted.
1014 */
1015 if (object->pageout) {
1016 assert(shadow_object != VM_OBJECT_NULL);
1017 assert(shadow_object == object->shadow);
1018
1019 vm_pageout_object_terminate(object);
1020
1021 } else if ((object->temporary && !object->can_persist) ||
1022 (pager == MEMORY_OBJECT_NULL)) {
1023 while (!queue_empty(&object->memq)) {
1024 p = (vm_page_t) queue_first(&object->memq);
1025
1026 VM_PAGE_CHECK(p);
1027 VM_PAGE_FREE(p);
1028 }
1029 } else if (!queue_empty(&object->memq)) {
1030 panic("vm_object_terminate: queue just emptied isn't");
1031 }
1032
1033 assert(object->paging_in_progress == 0);
1034 assert(object->ref_count == 0);
1035
1036 /*
1037 * If the pager has not already been released by
1038 * vm_object_destroy, we need to terminate it and
1039 * release our reference to it here.
1040 */
1041 if (pager != MEMORY_OBJECT_NULL) {
1042 vm_object_unlock(object);
1043 vm_object_release_pager(pager);
1044 vm_object_lock(object);
1045 }
1046
1047 /* kick off anyone waiting on terminating */
1048 object->terminating = FALSE;
1049 vm_object_paging_begin(object);
1050 vm_object_paging_end(object);
1051 vm_object_unlock(object);
1052
1053 #if MACH_PAGEMAP
1054 vm_external_destroy(object->existence_map, object->size);
1055 #endif /* MACH_PAGEMAP */
1056
1057 /*
1058 * Free the space for the object.
1059 */
1060 zfree(vm_object_zone, (vm_offset_t) object);
1061 return KERN_SUCCESS;
1062 }
1063
1064 /*
1065 * Routine: vm_object_pager_wakeup
1066 * Purpose: Wake up anyone waiting for termination of a pager.
1067 */
1068
1069 static void
1070 vm_object_pager_wakeup(
1071 memory_object_t pager)
1072 {
1073 vm_object_hash_entry_t entry;
1074 boolean_t waiting = FALSE;
1075
1076 /*
1077 * If anyone was waiting for the memory_object_terminate
1078 * to be queued, wake them up now.
1079 */
1080 vm_object_cache_lock();
1081 entry = vm_object_hash_lookup(pager, TRUE);
1082 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1083 waiting = entry->waiting;
1084 vm_object_cache_unlock();
1085 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1086 if (waiting)
1087 thread_wakeup((event_t) pager);
1088 vm_object_hash_entry_free(entry);
1089 }
1090 }
1091
1092 /*
1093 * Routine: vm_object_release_pager
1094 * Purpose: Terminate the pager and, upon completion,
1095 * release our last reference to it.
1096 * just like memory_object_terminate, except
1097 * that we wake up anyone blocked in vm_object_enter
1098 * waiting for termination message to be queued
1099 * before calling memory_object_init.
1100 */
1101 static void
1102 vm_object_release_pager(
1103 memory_object_t pager)
1104 {
1105
1106 /*
1107 * Terminate the pager.
1108 */
1109
1110 (void) memory_object_terminate(pager);
1111
1112 /*
1113 * Wakeup anyone waiting for this terminate
1114 */
1115 vm_object_pager_wakeup(pager);
1116
1117 /*
1118 * Release reference to pager.
1119 */
1120 memory_object_deallocate(pager);
1121 }
1122
1123 /*
1124 * Routine: vm_object_abort_activity [internal use only]
1125 * Purpose:
1126 * Abort paging requests pending on this object.
1127 * In/out conditions:
1128 * The object is locked on entry and exit.
1129 */
1130 static void
1131 vm_object_abort_activity(
1132 vm_object_t object)
1133 {
1134 register
1135 vm_page_t p;
1136 vm_page_t next;
1137
1138 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
1139 (integer_t)object, 0, 0, 0, 0);
1140
1141 /*
1142 * Abort all activity that would be waiting
1143 * for a result on this memory object.
1144 *
1145 * We could also choose to destroy all pages
1146 * that we have in memory for this object, but
1147 * we don't.
1148 */
1149
1150 p = (vm_page_t) queue_first(&object->memq);
1151 while (!queue_end(&object->memq, (queue_entry_t) p)) {
1152 next = (vm_page_t) queue_next(&p->listq);
1153
1154 /*
1155 * If it's being paged in, destroy it.
1156 * If an unlock has been requested, start it again.
1157 */
1158
1159 if (p->busy && p->absent) {
1160 VM_PAGE_FREE(p);
1161 }
1162 else {
1163 if (p->unlock_request != VM_PROT_NONE)
1164 p->unlock_request = VM_PROT_NONE;
1165 PAGE_WAKEUP(p);
1166 }
1167
1168 p = next;
1169 }
1170
1171 /*
1172 * Wake up threads waiting for the memory object to
1173 * become ready.
1174 */
1175
1176 object->pager_ready = TRUE;
1177 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1178 }
1179
1180 /*
1181 * Routine: vm_object_destroy
1182 * Purpose:
1183 * Shut down a VM object, despite the
1184 * presence of address map (or other) references
1185 * to the vm_object.
1186 */
1187 kern_return_t
1188 vm_object_destroy(
1189 vm_object_t object,
1190 kern_return_t reason)
1191 {
1192 memory_object_t old_pager;
1193
1194 if (object == VM_OBJECT_NULL)
1195 return(KERN_SUCCESS);
1196
1197 /*
1198 * Remove the pager association immediately.
1199 *
1200 * This will prevent the memory manager from further
1201 * meddling. [If it wanted to flush data or make
1202 * other changes, it should have done so before performing
1203 * the destroy call.]
1204 */
1205
1206 vm_object_cache_lock();
1207 vm_object_lock(object);
1208 object->can_persist = FALSE;
1209 object->named = FALSE;
1210 object->alive = FALSE;
1211
1212 /*
1213 * Rip out the pager from the vm_object now...
1214 */
1215
1216 vm_object_remove(object);
1217 old_pager = object->pager;
1218 object->pager = MEMORY_OBJECT_NULL;
1219 if (old_pager != MEMORY_OBJECT_NULL)
1220 memory_object_control_disable(object->pager_request);
1221 vm_object_cache_unlock();
1222
1223 /*
1224 * Wait for the existing paging activity (that got
1225 * through before we nulled out the pager) to subside.
1226 */
1227
1228 vm_object_paging_wait(object, THREAD_UNINT);
1229 vm_object_unlock(object);
1230
1231 /*
1232 * Terminate the object now.
1233 */
1234 if (old_pager != MEMORY_OBJECT_NULL) {
1235 vm_object_release_pager(old_pager);
1236
1237 /*
1238 * JMM - Release the caller's reference. This assumes the
1239 * caller had a reference to release, which is a big (but
1240 * currently valid) assumption if this is driven from the
1241 * vnode pager (it is holding a named reference when making
1242 * this call)..
1243 */
1244 vm_object_deallocate(object);
1245
1246 }
1247 return(KERN_SUCCESS);
1248 }
1249
1250 /*
1251 * vm_object_deactivate_pages
1252 *
1253 * Deactivate all pages in the specified object. (Keep its pages
1254 * in memory even though it is no longer referenced.)
1255 *
1256 * The object must be locked.
1257 */
1258 static void
1259 vm_object_deactivate_all_pages(
1260 register vm_object_t object)
1261 {
1262 register vm_page_t p;
1263
1264 queue_iterate(&object->memq, p, vm_page_t, listq) {
1265 vm_page_lock_queues();
1266 if (!p->busy)
1267 vm_page_deactivate(p);
1268 vm_page_unlock_queues();
1269 }
1270 }
1271
1272 __private_extern__ void
1273 vm_object_deactivate_pages(
1274 vm_object_t object,
1275 vm_object_offset_t offset,
1276 vm_object_size_t size,
1277 boolean_t kill_page)
1278 {
1279 vm_object_t orig_object;
1280 int pages_moved = 0;
1281 int pages_found = 0;
1282
1283 /*
1284 * entered with object lock held, acquire a paging reference to
1285 * prevent the memory_object and control ports from
1286 * being destroyed.
1287 */
1288 orig_object = object;
1289
1290 for (;;) {
1291 register vm_page_t m;
1292 vm_object_offset_t toffset;
1293 vm_object_size_t tsize;
1294
1295 vm_object_paging_begin(object);
1296 vm_page_lock_queues();
1297
1298 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1299
1300 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1301
1302 pages_found++;
1303
1304 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1305
1306 m->reference = FALSE;
1307 pmap_clear_reference(m->phys_addr);
1308
1309 if ((kill_page) && (object->internal)) {
1310 m->precious = FALSE;
1311 m->dirty = FALSE;
1312 pmap_clear_modify(m->phys_addr);
1313 vm_external_state_clr(object->existence_map, offset);
1314 }
1315 VM_PAGE_QUEUES_REMOVE(m);
1316
1317 queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq);
1318
1319 m->inactive = TRUE;
1320 if (!m->fictitious)
1321 vm_page_inactive_count++;
1322
1323 pages_moved++;
1324 }
1325 }
1326 }
1327 vm_page_unlock_queues();
1328 vm_object_paging_end(object);
1329
1330 if (object->shadow) {
1331 vm_object_t tmp_object;
1332
1333 kill_page = 0;
1334
1335 offset += object->shadow_offset;
1336
1337 tmp_object = object->shadow;
1338 vm_object_lock(tmp_object);
1339
1340 if (object != orig_object)
1341 vm_object_unlock(object);
1342 object = tmp_object;
1343 } else
1344 break;
1345 }
1346 if (object != orig_object)
1347 vm_object_unlock(object);
1348 }
1349
1350 /*
1351 * Routine: vm_object_pmap_protect
1352 *
1353 * Purpose:
1354 * Reduces the permission for all physical
1355 * pages in the specified object range.
1356 *
1357 * If removing write permission only, it is
1358 * sufficient to protect only the pages in
1359 * the top-level object; only those pages may
1360 * have write permission.
1361 *
1362 * If removing all access, we must follow the
1363 * shadow chain from the top-level object to
1364 * remove access to all pages in shadowed objects.
1365 *
1366 * The object must *not* be locked. The object must
1367 * be temporary/internal.
1368 *
1369 * If pmap is not NULL, this routine assumes that
1370 * the only mappings for the pages are in that
1371 * pmap.
1372 */
1373
1374 __private_extern__ void
1375 vm_object_pmap_protect(
1376 register vm_object_t object,
1377 register vm_object_offset_t offset,
1378 vm_size_t size,
1379 pmap_t pmap,
1380 vm_offset_t pmap_start,
1381 vm_prot_t prot)
1382 {
1383 if (object == VM_OBJECT_NULL)
1384 return;
1385
1386 vm_object_lock(object);
1387
1388 assert(object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
1389
1390 while (TRUE) {
1391 if (object->resident_page_count > atop(size) / 2 &&
1392 pmap != PMAP_NULL) {
1393 vm_object_unlock(object);
1394 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1395 return;
1396 }
1397
1398 {
1399 register vm_page_t p;
1400 register vm_object_offset_t end;
1401
1402 end = offset + size;
1403
1404 if (pmap != PMAP_NULL) {
1405 queue_iterate(&object->memq, p, vm_page_t, listq) {
1406 if (!p->fictitious &&
1407 (offset <= p->offset) && (p->offset < end)) {
1408
1409 vm_offset_t start = pmap_start +
1410 (vm_offset_t)(p->offset - offset);
1411
1412 pmap_protect(pmap, start, start + PAGE_SIZE, prot);
1413 }
1414 }
1415 } else {
1416 queue_iterate(&object->memq, p, vm_page_t, listq) {
1417 if (!p->fictitious &&
1418 (offset <= p->offset) && (p->offset < end)) {
1419
1420 pmap_page_protect(p->phys_addr,
1421 prot & ~p->page_lock);
1422 }
1423 }
1424 }
1425 }
1426
1427 if (prot == VM_PROT_NONE) {
1428 /*
1429 * Must follow shadow chain to remove access
1430 * to pages in shadowed objects.
1431 */
1432 register vm_object_t next_object;
1433
1434 next_object = object->shadow;
1435 if (next_object != VM_OBJECT_NULL) {
1436 offset += object->shadow_offset;
1437 vm_object_lock(next_object);
1438 vm_object_unlock(object);
1439 object = next_object;
1440 }
1441 else {
1442 /*
1443 * End of chain - we are done.
1444 */
1445 break;
1446 }
1447 }
1448 else {
1449 /*
1450 * Pages in shadowed objects may never have
1451 * write permission - we may stop here.
1452 */
1453 break;
1454 }
1455 }
1456
1457 vm_object_unlock(object);
1458 }
1459
1460 /*
1461 * Routine: vm_object_copy_slowly
1462 *
1463 * Description:
1464 * Copy the specified range of the source
1465 * virtual memory object without using
1466 * protection-based optimizations (such
1467 * as copy-on-write). The pages in the
1468 * region are actually copied.
1469 *
1470 * In/out conditions:
1471 * The caller must hold a reference and a lock
1472 * for the source virtual memory object. The source
1473 * object will be returned *unlocked*.
1474 *
1475 * Results:
1476 * If the copy is completed successfully, KERN_SUCCESS is
1477 * returned. If the caller asserted the interruptible
1478 * argument, and an interruption occurred while waiting
1479 * for a user-generated event, MACH_SEND_INTERRUPTED is
1480 * returned. Other values may be returned to indicate
1481 * hard errors during the copy operation.
1482 *
1483 * A new virtual memory object is returned in a
1484 * parameter (_result_object). The contents of this
1485 * new object, starting at a zero offset, are a copy
1486 * of the source memory region. In the event of
1487 * an error, this parameter will contain the value
1488 * VM_OBJECT_NULL.
1489 */
1490 __private_extern__ kern_return_t
1491 vm_object_copy_slowly(
1492 register vm_object_t src_object,
1493 vm_object_offset_t src_offset,
1494 vm_object_size_t size,
1495 boolean_t interruptible,
1496 vm_object_t *_result_object) /* OUT */
1497 {
1498 vm_object_t new_object;
1499 vm_object_offset_t new_offset;
1500
1501 vm_object_offset_t src_lo_offset = src_offset;
1502 vm_object_offset_t src_hi_offset = src_offset + size;
1503
1504 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1505 src_object, src_offset, size, 0, 0);
1506
1507 if (size == 0) {
1508 vm_object_unlock(src_object);
1509 *_result_object = VM_OBJECT_NULL;
1510 return(KERN_INVALID_ARGUMENT);
1511 }
1512
1513 /*
1514 * Prevent destruction of the source object while we copy.
1515 */
1516
1517 assert(src_object->ref_count > 0);
1518 src_object->ref_count++;
1519 VM_OBJ_RES_INCR(src_object);
1520 vm_object_unlock(src_object);
1521
1522 /*
1523 * Create a new object to hold the copied pages.
1524 * A few notes:
1525 * We fill the new object starting at offset 0,
1526 * regardless of the input offset.
1527 * We don't bother to lock the new object within
1528 * this routine, since we have the only reference.
1529 */
1530
1531 new_object = vm_object_allocate(size);
1532 new_offset = 0;
1533
1534 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1535
1536 for ( ;
1537 size != 0 ;
1538 src_offset += PAGE_SIZE_64,
1539 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1540 ) {
1541 vm_page_t new_page;
1542 vm_fault_return_t result;
1543
1544 while ((new_page = vm_page_alloc(new_object, new_offset))
1545 == VM_PAGE_NULL) {
1546 if (!vm_page_wait(interruptible)) {
1547 vm_object_deallocate(new_object);
1548 *_result_object = VM_OBJECT_NULL;
1549 return(MACH_SEND_INTERRUPTED);
1550 }
1551 }
1552
1553 do {
1554 vm_prot_t prot = VM_PROT_READ;
1555 vm_page_t _result_page;
1556 vm_page_t top_page;
1557 register
1558 vm_page_t result_page;
1559 kern_return_t error_code;
1560
1561 vm_object_lock(src_object);
1562 vm_object_paging_begin(src_object);
1563
1564 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1565 result = vm_fault_page(src_object, src_offset,
1566 VM_PROT_READ, FALSE, interruptible,
1567 src_lo_offset, src_hi_offset,
1568 VM_BEHAVIOR_SEQUENTIAL,
1569 &prot, &_result_page, &top_page,
1570 (int *)0,
1571 &error_code, FALSE, FALSE, NULL, 0);
1572
1573 switch(result) {
1574 case VM_FAULT_SUCCESS:
1575 result_page = _result_page;
1576
1577 /*
1578 * We don't need to hold the object
1579 * lock -- the busy page will be enough.
1580 * [We don't care about picking up any
1581 * new modifications.]
1582 *
1583 * Copy the page to the new object.
1584 *
1585 * POLICY DECISION:
1586 * If result_page is clean,
1587 * we could steal it instead
1588 * of copying.
1589 */
1590
1591 vm_object_unlock(result_page->object);
1592 vm_page_copy(result_page, new_page);
1593
1594 /*
1595 * Let go of both pages (make them
1596 * not busy, perform wakeup, activate).
1597 */
1598
1599 new_page->busy = FALSE;
1600 new_page->dirty = TRUE;
1601 vm_object_lock(result_page->object);
1602 PAGE_WAKEUP_DONE(result_page);
1603
1604 vm_page_lock_queues();
1605 if (!result_page->active &&
1606 !result_page->inactive)
1607 vm_page_activate(result_page);
1608 vm_page_activate(new_page);
1609 vm_page_unlock_queues();
1610
1611 /*
1612 * Release paging references and
1613 * top-level placeholder page, if any.
1614 */
1615
1616 vm_fault_cleanup(result_page->object,
1617 top_page);
1618
1619 break;
1620
1621 case VM_FAULT_RETRY:
1622 break;
1623
1624 case VM_FAULT_FICTITIOUS_SHORTAGE:
1625 vm_page_more_fictitious();
1626 break;
1627
1628 case VM_FAULT_MEMORY_SHORTAGE:
1629 if (vm_page_wait(interruptible))
1630 break;
1631 /* fall thru */
1632
1633 case VM_FAULT_INTERRUPTED:
1634 vm_page_free(new_page);
1635 vm_object_deallocate(new_object);
1636 vm_object_deallocate(src_object);
1637 *_result_object = VM_OBJECT_NULL;
1638 return(MACH_SEND_INTERRUPTED);
1639
1640 case VM_FAULT_MEMORY_ERROR:
1641 /*
1642 * A policy choice:
1643 * (a) ignore pages that we can't
1644 * copy
1645 * (b) return the null object if
1646 * any page fails [chosen]
1647 */
1648
1649 vm_page_lock_queues();
1650 vm_page_free(new_page);
1651 vm_page_unlock_queues();
1652 vm_object_deallocate(new_object);
1653 vm_object_deallocate(src_object);
1654 *_result_object = VM_OBJECT_NULL;
1655 return(error_code ? error_code:
1656 KERN_MEMORY_ERROR);
1657 }
1658 } while (result != VM_FAULT_SUCCESS);
1659 }
1660
1661 /*
1662 * Lose the extra reference, and return our object.
1663 */
1664
1665 vm_object_deallocate(src_object);
1666 *_result_object = new_object;
1667 return(KERN_SUCCESS);
1668 }
1669
1670 /*
1671 * Routine: vm_object_copy_quickly
1672 *
1673 * Purpose:
1674 * Copy the specified range of the source virtual
1675 * memory object, if it can be done without waiting
1676 * for user-generated events.
1677 *
1678 * Results:
1679 * If the copy is successful, the copy is returned in
1680 * the arguments; otherwise, the arguments are not
1681 * affected.
1682 *
1683 * In/out conditions:
1684 * The object should be unlocked on entry and exit.
1685 */
1686
1687 /*ARGSUSED*/
1688 __private_extern__ boolean_t
1689 vm_object_copy_quickly(
1690 vm_object_t *_object, /* INOUT */
1691 vm_object_offset_t offset, /* IN */
1692 vm_object_size_t size, /* IN */
1693 boolean_t *_src_needs_copy, /* OUT */
1694 boolean_t *_dst_needs_copy) /* OUT */
1695 {
1696 vm_object_t object = *_object;
1697 memory_object_copy_strategy_t copy_strategy;
1698
1699 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1700 *_object, offset, size, 0, 0);
1701 if (object == VM_OBJECT_NULL) {
1702 *_src_needs_copy = FALSE;
1703 *_dst_needs_copy = FALSE;
1704 return(TRUE);
1705 }
1706
1707 vm_object_lock(object);
1708
1709 copy_strategy = object->copy_strategy;
1710
1711 switch (copy_strategy) {
1712 case MEMORY_OBJECT_COPY_SYMMETRIC:
1713
1714 /*
1715 * Symmetric copy strategy.
1716 * Make another reference to the object.
1717 * Leave object/offset unchanged.
1718 */
1719
1720 assert(object->ref_count > 0);
1721 object->ref_count++;
1722 vm_object_res_reference(object);
1723 object->shadowed = TRUE;
1724 vm_object_unlock(object);
1725
1726 /*
1727 * Both source and destination must make
1728 * shadows, and the source must be made
1729 * read-only if not already.
1730 */
1731
1732 *_src_needs_copy = TRUE;
1733 *_dst_needs_copy = TRUE;
1734
1735 break;
1736
1737 case MEMORY_OBJECT_COPY_DELAY:
1738 vm_object_unlock(object);
1739 return(FALSE);
1740
1741 default:
1742 vm_object_unlock(object);
1743 return(FALSE);
1744 }
1745 return(TRUE);
1746 }
1747
1748 static int copy_call_count = 0;
1749 static int copy_call_sleep_count = 0;
1750 static int copy_call_restart_count = 0;
1751
1752 /*
1753 * Routine: vm_object_copy_call [internal]
1754 *
1755 * Description:
1756 * Copy the source object (src_object), using the
1757 * user-managed copy algorithm.
1758 *
1759 * In/out conditions:
1760 * The source object must be locked on entry. It
1761 * will be *unlocked* on exit.
1762 *
1763 * Results:
1764 * If the copy is successful, KERN_SUCCESS is returned.
1765 * A new object that represents the copied virtual
1766 * memory is returned in a parameter (*_result_object).
1767 * If the return value indicates an error, this parameter
1768 * is not valid.
1769 */
1770 static kern_return_t
1771 vm_object_copy_call(
1772 vm_object_t src_object,
1773 vm_object_offset_t src_offset,
1774 vm_object_size_t size,
1775 vm_object_t *_result_object) /* OUT */
1776 {
1777 kern_return_t kr;
1778 vm_object_t copy;
1779 boolean_t check_ready = FALSE;
1780
1781 /*
1782 * If a copy is already in progress, wait and retry.
1783 *
1784 * XXX
1785 * Consider making this call interruptable, as Mike
1786 * intended it to be.
1787 *
1788 * XXXO
1789 * Need a counter or version or something to allow
1790 * us to use the copy that the currently requesting
1791 * thread is obtaining -- is it worth adding to the
1792 * vm object structure? Depends how common this case it.
1793 */
1794 copy_call_count++;
1795 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1796 vm_object_wait(src_object, VM_OBJECT_EVENT_COPY_CALL,
1797 THREAD_UNINT);
1798 vm_object_lock(src_object);
1799 copy_call_restart_count++;
1800 }
1801
1802 /*
1803 * Indicate (for the benefit of memory_object_create_copy)
1804 * that we want a copy for src_object. (Note that we cannot
1805 * do a real assert_wait before calling memory_object_copy,
1806 * so we simply set the flag.)
1807 */
1808
1809 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1810 vm_object_unlock(src_object);
1811
1812 /*
1813 * Ask the memory manager to give us a memory object
1814 * which represents a copy of the src object.
1815 * The memory manager may give us a memory object
1816 * which we already have, or it may give us a
1817 * new memory object. This memory object will arrive
1818 * via memory_object_create_copy.
1819 */
1820
1821 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1822 if (kr != KERN_SUCCESS) {
1823 return kr;
1824 }
1825
1826 /*
1827 * Wait for the copy to arrive.
1828 */
1829 vm_object_lock(src_object);
1830 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1831 vm_object_wait(src_object, VM_OBJECT_EVENT_COPY_CALL,
1832 THREAD_UNINT);
1833 vm_object_lock(src_object);
1834 copy_call_sleep_count++;
1835 }
1836 Retry:
1837 assert(src_object->copy != VM_OBJECT_NULL);
1838 copy = src_object->copy;
1839 if (!vm_object_lock_try(copy)) {
1840 vm_object_unlock(src_object);
1841 mutex_pause(); /* wait a bit */
1842 vm_object_lock(src_object);
1843 goto Retry;
1844 }
1845 if (copy->size < src_offset+size)
1846 copy->size = src_offset+size;
1847
1848 if (!copy->pager_ready)
1849 check_ready = TRUE;
1850
1851 /*
1852 * Return the copy.
1853 */
1854 *_result_object = copy;
1855 vm_object_unlock(copy);
1856 vm_object_unlock(src_object);
1857
1858 /* Wait for the copy to be ready. */
1859 if (check_ready == TRUE) {
1860 vm_object_lock(copy);
1861 while (!copy->pager_ready) {
1862 vm_object_wait(copy, VM_OBJECT_EVENT_PAGER_READY,
1863 FALSE);
1864 vm_object_lock(copy);
1865 }
1866 vm_object_unlock(copy);
1867 }
1868
1869 return KERN_SUCCESS;
1870 }
1871
1872 static int copy_delayed_lock_collisions = 0;
1873 static int copy_delayed_max_collisions = 0;
1874 static int copy_delayed_lock_contention = 0;
1875 static int copy_delayed_protect_iterate = 0;
1876 static int copy_delayed_protect_lookup = 0;
1877 static int copy_delayed_protect_lookup_wait = 0;
1878
1879 /*
1880 * Routine: vm_object_copy_delayed [internal]
1881 *
1882 * Description:
1883 * Copy the specified virtual memory object, using
1884 * the asymmetric copy-on-write algorithm.
1885 *
1886 * In/out conditions:
1887 * The object must be unlocked on entry.
1888 *
1889 * This routine will not block waiting for user-generated
1890 * events. It is not interruptible.
1891 */
1892 __private_extern__ vm_object_t
1893 vm_object_copy_delayed(
1894 vm_object_t src_object,
1895 vm_object_offset_t src_offset,
1896 vm_object_size_t size)
1897 {
1898 vm_object_t new_copy = VM_OBJECT_NULL;
1899 vm_object_t old_copy;
1900 vm_page_t p;
1901 vm_object_size_t copy_size;
1902
1903 int collisions = 0;
1904 /*
1905 * The user-level memory manager wants to see all of the changes
1906 * to this object, but it has promised not to make any changes on
1907 * its own.
1908 *
1909 * Perform an asymmetric copy-on-write, as follows:
1910 * Create a new object, called a "copy object" to hold
1911 * pages modified by the new mapping (i.e., the copy,
1912 * not the original mapping).
1913 * Record the original object as the backing object for
1914 * the copy object. If the original mapping does not
1915 * change a page, it may be used read-only by the copy.
1916 * Record the copy object in the original object.
1917 * When the original mapping causes a page to be modified,
1918 * it must be copied to a new page that is "pushed" to
1919 * the copy object.
1920 * Mark the new mapping (the copy object) copy-on-write.
1921 * This makes the copy object itself read-only, allowing
1922 * it to be reused if the original mapping makes no
1923 * changes, and simplifying the synchronization required
1924 * in the "push" operation described above.
1925 *
1926 * The copy-on-write is said to be assymetric because the original
1927 * object is *not* marked copy-on-write. A copied page is pushed
1928 * to the copy object, regardless which party attempted to modify
1929 * the page.
1930 *
1931 * Repeated asymmetric copy operations may be done. If the
1932 * original object has not been changed since the last copy, its
1933 * copy object can be reused. Otherwise, a new copy object can be
1934 * inserted between the original object and its previous copy
1935 * object. Since any copy object is read-only, this cannot affect
1936 * affect the contents of the previous copy object.
1937 *
1938 * Note that a copy object is higher in the object tree than the
1939 * original object; therefore, use of the copy object recorded in
1940 * the original object must be done carefully, to avoid deadlock.
1941 */
1942
1943 Retry:
1944 vm_object_lock(src_object);
1945
1946 /*
1947 * See whether we can reuse the result of a previous
1948 * copy operation.
1949 */
1950
1951 old_copy = src_object->copy;
1952 if (old_copy != VM_OBJECT_NULL) {
1953 /*
1954 * Try to get the locks (out of order)
1955 */
1956 if (!vm_object_lock_try(old_copy)) {
1957 vm_object_unlock(src_object);
1958 mutex_pause();
1959
1960 /* Heisenberg Rules */
1961 copy_delayed_lock_collisions++;
1962 if (collisions++ == 0)
1963 copy_delayed_lock_contention++;
1964
1965 if (collisions > copy_delayed_max_collisions)
1966 copy_delayed_max_collisions = collisions;
1967
1968 goto Retry;
1969 }
1970
1971 /*
1972 * Determine whether the old copy object has
1973 * been modified.
1974 */
1975
1976 if (old_copy->resident_page_count == 0 &&
1977 !old_copy->pager_created) {
1978 /*
1979 * It has not been modified.
1980 *
1981 * Return another reference to
1982 * the existing copy-object.
1983 */
1984 assert(old_copy->ref_count > 0);
1985 old_copy->ref_count++;
1986
1987 if (old_copy->size < src_offset+size)
1988 old_copy->size = src_offset+size;
1989
1990 #if TASK_SWAPPER
1991 /*
1992 * We have to reproduce some of the code from
1993 * vm_object_res_reference because we've taken
1994 * the locks out of order here, and deadlock
1995 * would result if we simply called that function.
1996 */
1997 if (++old_copy->res_count == 1) {
1998 assert(old_copy->shadow == src_object);
1999 vm_object_res_reference(src_object);
2000 }
2001 #endif /* TASK_SWAPPER */
2002
2003 vm_object_unlock(old_copy);
2004 vm_object_unlock(src_object);
2005
2006 if (new_copy != VM_OBJECT_NULL) {
2007 vm_object_unlock(new_copy);
2008 vm_object_deallocate(new_copy);
2009 }
2010
2011 return(old_copy);
2012 }
2013 if (new_copy == VM_OBJECT_NULL) {
2014 vm_object_unlock(old_copy);
2015 vm_object_unlock(src_object);
2016 new_copy = vm_object_allocate(src_offset + size);
2017 vm_object_lock(new_copy);
2018 goto Retry;
2019 }
2020
2021 /*
2022 * Adjust the size argument so that the newly-created
2023 * copy object will be large enough to back either the
2024 * new old copy object or the new mapping.
2025 */
2026 if (old_copy->size > src_offset+size)
2027 size = old_copy->size - src_offset;
2028
2029 /*
2030 * The copy-object is always made large enough to
2031 * completely shadow the original object, since
2032 * it may have several users who want to shadow
2033 * the original object at different points.
2034 */
2035
2036 assert((old_copy->shadow == src_object) &&
2037 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2038
2039 /*
2040 * Make the old copy-object shadow the new one.
2041 * It will receive no more pages from the original
2042 * object.
2043 */
2044
2045 src_object->ref_count--; /* remove ref. from old_copy */
2046 assert(src_object->ref_count > 0);
2047 old_copy->shadow = new_copy;
2048 assert(new_copy->ref_count > 0);
2049 new_copy->ref_count++; /* for old_copy->shadow ref. */
2050
2051 #if TASK_SWAPPER
2052 if (old_copy->res_count) {
2053 VM_OBJ_RES_INCR(new_copy);
2054 VM_OBJ_RES_DECR(src_object);
2055 }
2056 #endif
2057
2058 vm_object_unlock(old_copy); /* done with old_copy */
2059 } else if (new_copy == VM_OBJECT_NULL) {
2060 vm_object_unlock(src_object);
2061 new_copy = vm_object_allocate(src_offset + size);
2062 vm_object_lock(new_copy);
2063 goto Retry;
2064 }
2065
2066 /*
2067 * Readjust the copy-object size if necessary.
2068 */
2069 copy_size = new_copy->size;
2070 if (copy_size < src_offset+size) {
2071 copy_size = src_offset+size;
2072 new_copy->size = copy_size;
2073 }
2074
2075 /*
2076 * Point the new copy at the existing object.
2077 */
2078
2079 new_copy->shadow = src_object;
2080 new_copy->shadow_offset = 0;
2081 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2082 assert(src_object->ref_count > 0);
2083 src_object->ref_count++;
2084 VM_OBJ_RES_INCR(src_object);
2085 src_object->copy = new_copy;
2086 vm_object_unlock(new_copy);
2087
2088 /*
2089 * Mark all (current) pages of the existing object copy-on-write.
2090 * This object may have a shadow chain below it, but
2091 * those pages will already be marked copy-on-write.
2092 */
2093
2094 vm_object_paging_wait(src_object, THREAD_UNINT);
2095 copy_delayed_protect_iterate++;
2096 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2097 if (!p->fictitious)
2098 pmap_page_protect(p->phys_addr,
2099 (VM_PROT_ALL & ~VM_PROT_WRITE &
2100 ~p->page_lock));
2101 }
2102 vm_object_unlock(src_object);
2103 XPR(XPR_VM_OBJECT,
2104 "vm_object_copy_delayed: used copy object %X for source %X\n",
2105 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2106
2107 return(new_copy);
2108 }
2109
2110 /*
2111 * Routine: vm_object_copy_strategically
2112 *
2113 * Purpose:
2114 * Perform a copy according to the source object's
2115 * declared strategy. This operation may block,
2116 * and may be interrupted.
2117 */
2118 __private_extern__ kern_return_t
2119 vm_object_copy_strategically(
2120 register vm_object_t src_object,
2121 vm_object_offset_t src_offset,
2122 vm_object_size_t size,
2123 vm_object_t *dst_object, /* OUT */
2124 vm_object_offset_t *dst_offset, /* OUT */
2125 boolean_t *dst_needs_copy) /* OUT */
2126 {
2127 boolean_t result;
2128 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2129 memory_object_copy_strategy_t copy_strategy;
2130
2131 assert(src_object != VM_OBJECT_NULL);
2132
2133 vm_object_lock(src_object);
2134
2135 /*
2136 * The copy strategy is only valid if the memory manager
2137 * is "ready". Internal objects are always ready.
2138 */
2139
2140 while (!src_object->internal && !src_object->pager_ready) {
2141
2142 vm_object_wait( src_object,
2143 VM_OBJECT_EVENT_PAGER_READY,
2144 interruptible);
2145 if (interruptible &&
2146 (current_thread()->wait_result != THREAD_AWAKENED)) {
2147 *dst_object = VM_OBJECT_NULL;
2148 *dst_offset = 0;
2149 *dst_needs_copy = FALSE;
2150 return(MACH_SEND_INTERRUPTED);
2151 }
2152 vm_object_lock(src_object);
2153 }
2154
2155 copy_strategy = src_object->copy_strategy;
2156
2157 /*
2158 * Use the appropriate copy strategy.
2159 */
2160
2161 switch (copy_strategy) {
2162 case MEMORY_OBJECT_COPY_NONE:
2163 result = vm_object_copy_slowly(src_object, src_offset, size,
2164 interruptible, dst_object);
2165 if (result == KERN_SUCCESS) {
2166 *dst_offset = 0;
2167 *dst_needs_copy = FALSE;
2168 }
2169 break;
2170
2171 case MEMORY_OBJECT_COPY_CALL:
2172 result = vm_object_copy_call(src_object, src_offset, size,
2173 dst_object);
2174 if (result == KERN_SUCCESS) {
2175 *dst_offset = src_offset;
2176 *dst_needs_copy = TRUE;
2177 }
2178 break;
2179
2180 case MEMORY_OBJECT_COPY_DELAY:
2181 vm_object_unlock(src_object);
2182 *dst_object = vm_object_copy_delayed(src_object,
2183 src_offset, size);
2184 *dst_offset = src_offset;
2185 *dst_needs_copy = TRUE;
2186 result = KERN_SUCCESS;
2187 break;
2188
2189 case MEMORY_OBJECT_COPY_SYMMETRIC:
2190 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2191 vm_object_unlock(src_object);
2192 result = KERN_MEMORY_RESTART_COPY;
2193 break;
2194
2195 default:
2196 panic("copy_strategically: bad strategy");
2197 result = KERN_INVALID_ARGUMENT;
2198 }
2199 return(result);
2200 }
2201
2202 /*
2203 * vm_object_shadow:
2204 *
2205 * Create a new object which is backed by the
2206 * specified existing object range. The source
2207 * object reference is deallocated.
2208 *
2209 * The new object and offset into that object
2210 * are returned in the source parameters.
2211 */
2212 boolean_t vm_object_shadow_check = FALSE;
2213
2214 __private_extern__ boolean_t
2215 vm_object_shadow(
2216 vm_object_t *object, /* IN/OUT */
2217 vm_object_offset_t *offset, /* IN/OUT */
2218 vm_object_size_t length)
2219 {
2220 register vm_object_t source;
2221 register vm_object_t result;
2222
2223 source = *object;
2224 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2225
2226 /*
2227 * Determine if we really need a shadow.
2228 */
2229
2230 if (vm_object_shadow_check && source->ref_count == 1 &&
2231 (source->shadow == VM_OBJECT_NULL ||
2232 source->shadow->copy == VM_OBJECT_NULL))
2233 {
2234 source->shadowed = FALSE;
2235 return FALSE;
2236 }
2237
2238 /*
2239 * Allocate a new object with the given length
2240 */
2241
2242 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2243 panic("vm_object_shadow: no object for shadowing");
2244
2245 /*
2246 * The new object shadows the source object, adding
2247 * a reference to it. Our caller changes his reference
2248 * to point to the new object, removing a reference to
2249 * the source object. Net result: no change of reference
2250 * count.
2251 */
2252 result->shadow = source;
2253
2254 /*
2255 * Store the offset into the source object,
2256 * and fix up the offset into the new object.
2257 */
2258
2259 result->shadow_offset = *offset;
2260
2261 /*
2262 * Return the new things
2263 */
2264
2265 *offset = 0;
2266 *object = result;
2267 return TRUE;
2268 }
2269
2270 /*
2271 * The relationship between vm_object structures and
2272 * the memory_object requires careful synchronization.
2273 *
2274 * All associations are created by memory_object_create_named
2275 * for external pagers and vm_object_pager_create for internal
2276 * objects as follows:
2277 *
2278 * pager: the memory_object itself, supplied by
2279 * the user requesting a mapping (or the kernel,
2280 * when initializing internal objects); the
2281 * kernel simulates holding send rights by keeping
2282 * a port reference;
2283 *
2284 * pager_request:
2285 * the memory object control port,
2286 * created by the kernel; the kernel holds
2287 * receive (and ownership) rights to this
2288 * port, but no other references.
2289 *
2290 * When initialization is complete, the "initialized" field
2291 * is asserted. Other mappings using a particular memory object,
2292 * and any references to the vm_object gained through the
2293 * port association must wait for this initialization to occur.
2294 *
2295 * In order to allow the memory manager to set attributes before
2296 * requests (notably virtual copy operations, but also data or
2297 * unlock requests) are made, a "ready" attribute is made available.
2298 * Only the memory manager may affect the value of this attribute.
2299 * Its value does not affect critical kernel functions, such as
2300 * internal object initialization or destruction. [Furthermore,
2301 * memory objects created by the kernel are assumed to be ready
2302 * immediately; the default memory manager need not explicitly
2303 * set the "ready" attribute.]
2304 *
2305 * [Both the "initialized" and "ready" attribute wait conditions
2306 * use the "pager" field as the wait event.]
2307 *
2308 * The port associations can be broken down by any of the
2309 * following routines:
2310 * vm_object_terminate:
2311 * No references to the vm_object remain, and
2312 * the object cannot (or will not) be cached.
2313 * This is the normal case, and is done even
2314 * though one of the other cases has already been
2315 * done.
2316 * memory_object_destroy:
2317 * The memory manager has requested that the
2318 * kernel relinquish references to the memory
2319 * object. [The memory manager may not want to
2320 * destroy the memory object, but may wish to
2321 * refuse or tear down existing memory mappings.]
2322 *
2323 * Each routine that breaks an association must break all of
2324 * them at once. At some later time, that routine must clear
2325 * the pager field and release the memory object references.
2326 * [Furthermore, each routine must cope with the simultaneous
2327 * or previous operations of the others.]
2328 *
2329 * In addition to the lock on the object, the vm_object_cache_lock
2330 * governs the associations. References gained through the
2331 * association require use of the cache lock.
2332 *
2333 * Because the pager field may be cleared spontaneously, it
2334 * cannot be used to determine whether a memory object has
2335 * ever been associated with a particular vm_object. [This
2336 * knowledge is important to the shadow object mechanism.]
2337 * For this reason, an additional "created" attribute is
2338 * provided.
2339 *
2340 * During various paging operations, the pager reference found in the
2341 * vm_object must be valid. To prevent this from being released,
2342 * (other than being removed, i.e., made null), routines may use
2343 * the vm_object_paging_begin/end routines [actually, macros].
2344 * The implementation uses the "paging_in_progress" and "wanted" fields.
2345 * [Operations that alter the validity of the pager values include the
2346 * termination routines and vm_object_collapse.]
2347 */
2348
2349 #if 0
2350 /*
2351 * Routine: vm_object_pager_dead
2352 *
2353 * Purpose:
2354 * A port is being destroy, and the IPC kobject code
2355 * can't tell if it represents a pager port or not.
2356 * So this function is called each time it sees a port
2357 * die.
2358 * THIS IS HORRIBLY INEFFICIENT. We should only call
2359 * this routine if we had requested a notification on
2360 * the port.
2361 */
2362
2363 __private_extern__ void
2364 vm_object_pager_dead(
2365 ipc_port_t pager)
2366 {
2367 vm_object_t object;
2368 vm_object_hash_entry_t entry;
2369
2370 /*
2371 * Perform essentially the same operations as in vm_object_lookup,
2372 * except that this time we look up based on the memory_object
2373 * port, not the control port.
2374 */
2375 vm_object_cache_lock();
2376 entry = vm_object_hash_lookup(pager, FALSE);
2377 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2378 entry->object == VM_OBJECT_NULL) {
2379 vm_object_cache_unlock();
2380 return;
2381 }
2382
2383 object = entry->object;
2384 entry->object = VM_OBJECT_NULL;
2385
2386 vm_object_lock(object);
2387 if (object->ref_count == 0) {
2388 XPR(XPR_VM_OBJECT_CACHE,
2389 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2390 (integer_t)object,
2391 (integer_t)vm_object_cached_list.next,
2392 (integer_t)vm_object_cached_list.prev, 0,0);
2393
2394 queue_remove(&vm_object_cached_list, object,
2395 vm_object_t, cached_list);
2396 vm_object_cached_count--;
2397 }
2398 object->ref_count++;
2399 vm_object_res_reference(object);
2400
2401 object->can_persist = FALSE;
2402
2403 assert(object->pager == pager);
2404
2405 /*
2406 * Remove the pager association.
2407 *
2408 * Note that the memory_object itself is dead, so
2409 * we don't bother with it.
2410 */
2411
2412 object->pager = MEMORY_OBJECT_NULL;
2413
2414 vm_object_unlock(object);
2415 vm_object_cache_unlock();
2416
2417 vm_object_pager_wakeup(pager);
2418
2419 /*
2420 * Release the pager reference. Note that there's no
2421 * point in trying the memory_object_terminate call
2422 * because the memory_object itself is dead. Also
2423 * release the memory_object_control reference, since
2424 * the pager didn't do that either.
2425 */
2426
2427 memory_object_deallocate(pager);
2428 memory_object_control_deallocate(object->pager_request);
2429
2430
2431 /*
2432 * Restart pending page requests
2433 */
2434 vm_object_lock(object);
2435 vm_object_abort_activity(object);
2436 vm_object_unlock(object);
2437
2438 /*
2439 * Lose the object reference.
2440 */
2441
2442 vm_object_deallocate(object);
2443 }
2444 #endif
2445
2446 /*
2447 * Routine: vm_object_enter
2448 * Purpose:
2449 * Find a VM object corresponding to the given
2450 * pager; if no such object exists, create one,
2451 * and initialize the pager.
2452 */
2453 vm_object_t
2454 vm_object_enter(
2455 memory_object_t pager,
2456 vm_object_size_t size,
2457 boolean_t internal,
2458 boolean_t init,
2459 boolean_t named)
2460 {
2461 register vm_object_t object;
2462 vm_object_t new_object;
2463 boolean_t must_init;
2464 vm_object_hash_entry_t entry, new_entry;
2465
2466 if (pager == MEMORY_OBJECT_NULL)
2467 return(vm_object_allocate(size));
2468
2469 new_object = VM_OBJECT_NULL;
2470 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2471 must_init = init;
2472
2473 /*
2474 * Look for an object associated with this port.
2475 */
2476
2477 restart:
2478 vm_object_cache_lock();
2479 for (;;) {
2480 entry = vm_object_hash_lookup(pager, FALSE);
2481
2482 /*
2483 * If a previous object is being terminated,
2484 * we must wait for the termination message
2485 * to be queued.
2486 *
2487 * We set kobject to a non-null value to let the
2488 * terminator know that someone is waiting.
2489 * Among the possibilities is that the port
2490 * could die while we're waiting. Must restart
2491 * instead of continuing the loop.
2492 */
2493
2494 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
2495 if (entry->object != VM_OBJECT_NULL)
2496 break;
2497
2498 entry->waiting = TRUE;
2499 assert_wait((event_t) pager, THREAD_UNINT);
2500 vm_object_cache_unlock();
2501 thread_block((void (*)(void))0);
2502 goto restart;
2503 }
2504
2505 /*
2506 * We must unlock to create a new object;
2507 * if we do so, we must try the lookup again.
2508 */
2509
2510 if (new_object == VM_OBJECT_NULL) {
2511 vm_object_cache_unlock();
2512 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2513 new_entry = vm_object_hash_entry_alloc(pager);
2514 new_object = vm_object_allocate(size);
2515 vm_object_cache_lock();
2516 } else {
2517 /*
2518 * Lookup failed twice, and we have something
2519 * to insert; set the object.
2520 */
2521
2522 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2523 vm_object_hash_insert(new_entry);
2524 entry = new_entry;
2525 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2526 }
2527
2528 entry->object = new_object;
2529 new_object = VM_OBJECT_NULL;
2530 must_init = TRUE;
2531 }
2532 }
2533
2534 object = entry->object;
2535 assert(object != VM_OBJECT_NULL);
2536
2537 if (!must_init) {
2538 vm_object_lock(object);
2539 assert(object->pager_created);
2540 assert(!internal || object->internal);
2541 if (named) {
2542 assert(!object->named);
2543 object->named = TRUE;
2544 }
2545 if (object->ref_count == 0) {
2546 XPR(XPR_VM_OBJECT_CACHE,
2547 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2548 (integer_t)object,
2549 (integer_t)vm_object_cached_list.next,
2550 (integer_t)vm_object_cached_list.prev, 0,0);
2551 queue_remove(&vm_object_cached_list, object,
2552 vm_object_t, cached_list);
2553 vm_object_cached_count--;
2554 }
2555 object->ref_count++;
2556 vm_object_res_reference(object);
2557 vm_object_unlock(object);
2558
2559 VM_STAT(hits++);
2560 }
2561 assert(object->ref_count > 0);
2562
2563 VM_STAT(lookups++);
2564
2565 vm_object_cache_unlock();
2566
2567 XPR(XPR_VM_OBJECT,
2568 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2569 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2570
2571 /*
2572 * If we raced to create a vm_object but lost, let's
2573 * throw away ours.
2574 */
2575
2576 if (new_object != VM_OBJECT_NULL)
2577 vm_object_deallocate(new_object);
2578
2579 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2580 vm_object_hash_entry_free(new_entry);
2581
2582 if (must_init) {
2583 pager_request_t pager_request;
2584
2585 /*
2586 * Allocate request port.
2587 */
2588
2589 pager_request = memory_object_control_allocate(object);
2590 assert (pager_request != PAGER_REQUEST_NULL);
2591
2592 vm_object_lock(object);
2593
2594 /*
2595 * Copy the reference we were given.
2596 */
2597
2598 memory_object_reference(pager);
2599 object->pager_created = TRUE;
2600 object->pager = pager;
2601 object->internal = internal;
2602 object->pager_trusted = internal;
2603 if (!internal) {
2604 /* copy strategy invalid until set by memory manager */
2605 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2606 }
2607 object->pager_request = pager_request;
2608 object->pager_ready = FALSE;
2609
2610 vm_object_unlock(object);
2611
2612 /*
2613 * Let the pager know we're using it.
2614 */
2615
2616 (void) memory_object_init(pager,
2617 object->pager_request,
2618 PAGE_SIZE);
2619
2620 vm_object_lock(object);
2621 if (named)
2622 object->named = TRUE;
2623 if (internal) {
2624 object->pager_ready = TRUE;
2625 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2626 }
2627
2628 object->pager_initialized = TRUE;
2629 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2630 } else {
2631 vm_object_lock(object);
2632 }
2633
2634 /*
2635 * [At this point, the object must be locked]
2636 */
2637
2638 /*
2639 * Wait for the work above to be done by the first
2640 * thread to map this object.
2641 */
2642
2643 while (!object->pager_initialized) {
2644 vm_object_wait( object,
2645 VM_OBJECT_EVENT_INITIALIZED,
2646 THREAD_UNINT);
2647 vm_object_lock(object);
2648 }
2649 vm_object_unlock(object);
2650
2651 XPR(XPR_VM_OBJECT,
2652 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2653 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2654 return(object);
2655 }
2656
2657 /*
2658 * Routine: vm_object_pager_create
2659 * Purpose:
2660 * Create a memory object for an internal object.
2661 * In/out conditions:
2662 * The object is locked on entry and exit;
2663 * it may be unlocked within this call.
2664 * Limitations:
2665 * Only one thread may be performing a
2666 * vm_object_pager_create on an object at
2667 * a time. Presumably, only the pageout
2668 * daemon will be using this routine.
2669 */
2670
2671 void
2672 vm_object_pager_create(
2673 register vm_object_t object)
2674 {
2675 memory_object_t pager;
2676 vm_object_hash_entry_t entry;
2677 #if MACH_PAGEMAP
2678 vm_object_size_t size;
2679 vm_external_map_t map;
2680 #endif /* MACH_PAGEMAP */
2681
2682 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2683 (integer_t)object, 0,0,0,0);
2684
2685 if (memory_manager_default_check() != KERN_SUCCESS)
2686 return;
2687
2688 /*
2689 * Prevent collapse or termination by holding a paging reference
2690 */
2691
2692 vm_object_paging_begin(object);
2693 if (object->pager_created) {
2694 /*
2695 * Someone else got to it first...
2696 * wait for them to finish initializing the ports
2697 */
2698 while (!object->pager_initialized) {
2699 vm_object_wait( object,
2700 VM_OBJECT_EVENT_INITIALIZED,
2701 THREAD_UNINT);
2702 vm_object_lock(object);
2703 }
2704 vm_object_paging_end(object);
2705 return;
2706 }
2707
2708 /*
2709 * Indicate that a memory object has been assigned
2710 * before dropping the lock, to prevent a race.
2711 */
2712
2713 object->pager_created = TRUE;
2714 object->paging_offset = 0;
2715
2716 #if MACH_PAGEMAP
2717 size = object->size;
2718 #endif /* MACH_PAGEMAP */
2719 vm_object_unlock(object);
2720
2721 #if MACH_PAGEMAP
2722 map = vm_external_create(size);
2723 vm_object_lock(object);
2724 assert(object->size == size);
2725 object->existence_map = map;
2726 vm_object_unlock(object);
2727 #endif /* MACH_PAGEMAP */
2728
2729 /*
2730 * Create the [internal] pager, and associate it with this object.
2731 *
2732 * We make the association here so that vm_object_enter()
2733 * can look up the object to complete initializing it. No
2734 * user will ever map this object.
2735 */
2736 {
2737 memory_object_default_t dmm;
2738 vm_size_t cluster_size;
2739
2740 /* acquire a reference for the default memory manager */
2741 dmm = memory_manager_default_reference(&cluster_size);
2742 assert(cluster_size >= PAGE_SIZE);
2743
2744 object->cluster_size = cluster_size; /* XXX ??? */
2745 assert(object->temporary);
2746
2747 /* create our new memory object */
2748 (void) memory_object_create(dmm, object->size, &pager);
2749
2750 memory_object_default_deallocate(dmm);
2751 }
2752
2753 entry = vm_object_hash_entry_alloc(pager);
2754
2755 vm_object_cache_lock();
2756 vm_object_hash_insert(entry);
2757
2758 entry->object = object;
2759 vm_object_cache_unlock();
2760
2761 /*
2762 * A reference was returned by
2763 * memory_object_create(), and it is
2764 * copied by vm_object_enter().
2765 */
2766
2767 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2768 panic("vm_object_pager_create: mismatch");
2769
2770 /*
2771 * Drop the reference we were passed.
2772 */
2773 memory_object_deallocate(pager);
2774
2775 vm_object_lock(object);
2776
2777 /*
2778 * Release the paging reference
2779 */
2780 vm_object_paging_end(object);
2781 }
2782
2783 /*
2784 * Routine: vm_object_remove
2785 * Purpose:
2786 * Eliminate the pager/object association
2787 * for this pager.
2788 * Conditions:
2789 * The object cache must be locked.
2790 */
2791 __private_extern__ void
2792 vm_object_remove(
2793 vm_object_t object)
2794 {
2795 memory_object_t pager;
2796 pager_request_t pager_request;
2797
2798 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
2799 vm_object_hash_entry_t entry;
2800
2801 entry = vm_object_hash_lookup(pager, FALSE);
2802 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2803 entry->object = VM_OBJECT_NULL;
2804 }
2805
2806 }
2807
2808 /*
2809 * Global variables for vm_object_collapse():
2810 *
2811 * Counts for normal collapses and bypasses.
2812 * Debugging variables, to watch or disable collapse.
2813 */
2814 static long object_collapses = 0;
2815 static long object_bypasses = 0;
2816
2817 static boolean_t vm_object_collapse_allowed = TRUE;
2818 static boolean_t vm_object_bypass_allowed = TRUE;
2819
2820 static int vm_external_discarded;
2821 static int vm_external_collapsed;
2822
2823 /*
2824 * Routine: vm_object_do_collapse
2825 * Purpose:
2826 * Collapse an object with the object backing it.
2827 * Pages in the backing object are moved into the
2828 * parent, and the backing object is deallocated.
2829 * Conditions:
2830 * Both objects and the cache are locked; the page
2831 * queues are unlocked.
2832 *
2833 */
2834 static void
2835 vm_object_do_collapse(
2836 vm_object_t object,
2837 vm_object_t backing_object)
2838 {
2839 vm_page_t p, pp;
2840 vm_object_offset_t new_offset, backing_offset;
2841 vm_object_size_t size;
2842
2843 backing_offset = object->shadow_offset;
2844 size = object->size;
2845
2846 /*
2847 * Move all in-memory pages from backing_object
2848 * to the parent. Pages that have been paged out
2849 * will be overwritten by any of the parent's
2850 * pages that shadow them.
2851 */
2852
2853 while (!queue_empty(&backing_object->memq)) {
2854
2855 p = (vm_page_t) queue_first(&backing_object->memq);
2856
2857 new_offset = (p->offset - backing_offset);
2858
2859 assert(!p->busy || p->absent);
2860
2861 /*
2862 * If the parent has a page here, or if
2863 * this page falls outside the parent,
2864 * dispose of it.
2865 *
2866 * Otherwise, move it as planned.
2867 */
2868
2869 if (p->offset < backing_offset || new_offset >= size) {
2870 VM_PAGE_FREE(p);
2871 } else {
2872 pp = vm_page_lookup(object, new_offset);
2873 if (pp == VM_PAGE_NULL) {
2874
2875 /*
2876 * Parent now has no page.
2877 * Move the backing object's page up.
2878 */
2879
2880 vm_page_rename(p, object, new_offset);
2881 #if MACH_PAGEMAP
2882 } else if (pp->absent) {
2883
2884 /*
2885 * Parent has an absent page...
2886 * it's not being paged in, so
2887 * it must really be missing from
2888 * the parent.
2889 *
2890 * Throw out the absent page...
2891 * any faults looking for that
2892 * page will restart with the new
2893 * one.
2894 */
2895
2896 VM_PAGE_FREE(pp);
2897 vm_page_rename(p, object, new_offset);
2898 #endif /* MACH_PAGEMAP */
2899 } else {
2900 assert(! pp->absent);
2901
2902 /*
2903 * Parent object has a real page.
2904 * Throw away the backing object's
2905 * page.
2906 */
2907 VM_PAGE_FREE(p);
2908 }
2909 }
2910 }
2911
2912 assert(object->pager == MEMORY_OBJECT_NULL ||
2913 backing_object->pager == MEMORY_OBJECT_NULL);
2914
2915 if (backing_object->pager != MEMORY_OBJECT_NULL) {
2916 vm_object_hash_entry_t entry;
2917
2918 /*
2919 * Move the pager from backing_object to object.
2920 *
2921 * XXX We're only using part of the paging space
2922 * for keeps now... we ought to discard the
2923 * unused portion.
2924 */
2925
2926 object->pager = backing_object->pager;
2927 entry = vm_object_hash_lookup(object->pager, FALSE);
2928 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
2929 entry->object = object;
2930 object->pager_created = backing_object->pager_created;
2931 object->pager_request = backing_object->pager_request;
2932 object->pager_ready = backing_object->pager_ready;
2933 object->pager_initialized = backing_object->pager_initialized;
2934 object->cluster_size = backing_object->cluster_size;
2935 object->paging_offset =
2936 backing_object->paging_offset + backing_offset;
2937 if (object->pager_request != PAGER_REQUEST_NULL) {
2938 memory_object_control_collapse(object->pager_request,
2939 object);
2940 }
2941 }
2942
2943 vm_object_cache_unlock();
2944
2945 object->paging_offset = backing_object->paging_offset + backing_offset;
2946
2947 #if MACH_PAGEMAP
2948 /*
2949 * If the shadow offset is 0, the use the existence map from
2950 * the backing object if there is one. If the shadow offset is
2951 * not zero, toss it.
2952 *
2953 * XXX - If the shadow offset is not 0 then a bit copy is needed
2954 * if the map is to be salvaged. For now, we just just toss the
2955 * old map, giving the collapsed object no map. This means that
2956 * the pager is invoked for zero fill pages. If analysis shows
2957 * that this happens frequently and is a performance hit, then
2958 * this code should be fixed to salvage the map.
2959 */
2960 assert(object->existence_map == VM_EXTERNAL_NULL);
2961 if (backing_offset || (size != backing_object->size)) {
2962 vm_external_discarded++;
2963 vm_external_destroy(backing_object->existence_map,
2964 backing_object->size);
2965 }
2966 else {
2967 vm_external_collapsed++;
2968 object->existence_map = backing_object->existence_map;
2969 }
2970 backing_object->existence_map = VM_EXTERNAL_NULL;
2971 #endif /* MACH_PAGEMAP */
2972
2973 /*
2974 * Object now shadows whatever backing_object did.
2975 * Note that the reference to backing_object->shadow
2976 * moves from within backing_object to within object.
2977 */
2978
2979 object->shadow = backing_object->shadow;
2980 object->shadow_offset += backing_object->shadow_offset;
2981 assert((object->shadow == VM_OBJECT_NULL) ||
2982 (object->shadow->copy == VM_OBJECT_NULL));
2983
2984 /*
2985 * Discard backing_object.
2986 *
2987 * Since the backing object has no pages, no
2988 * pager left, and no object references within it,
2989 * all that is necessary is to dispose of it.
2990 */
2991
2992 assert((backing_object->ref_count == 1) &&
2993 (backing_object->resident_page_count == 0) &&
2994 (backing_object->paging_in_progress == 0));
2995
2996 backing_object->alive = FALSE;
2997 vm_object_unlock(backing_object);
2998
2999 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3000 (integer_t)backing_object, 0,0,0,0);
3001
3002 zfree(vm_object_zone, (vm_offset_t) backing_object);
3003
3004 object_collapses++;
3005 }
3006
3007 static void
3008 vm_object_do_bypass(
3009 vm_object_t object,
3010 vm_object_t backing_object)
3011 {
3012 /*
3013 * Make the parent shadow the next object
3014 * in the chain.
3015 */
3016
3017 #if TASK_SWAPPER
3018 /*
3019 * Do object reference in-line to
3020 * conditionally increment shadow's
3021 * residence count. If object is not
3022 * resident, leave residence count
3023 * on shadow alone.
3024 */
3025 if (backing_object->shadow != VM_OBJECT_NULL) {
3026 vm_object_lock(backing_object->shadow);
3027 backing_object->shadow->ref_count++;
3028 if (object->res_count != 0)
3029 vm_object_res_reference(backing_object->shadow);
3030 vm_object_unlock(backing_object->shadow);
3031 }
3032 #else /* TASK_SWAPPER */
3033 vm_object_reference(backing_object->shadow);
3034 #endif /* TASK_SWAPPER */
3035
3036 object->shadow = backing_object->shadow;
3037 object->shadow_offset += backing_object->shadow_offset;
3038
3039 /*
3040 * Backing object might have had a copy pointer
3041 * to us. If it did, clear it.
3042 */
3043 if (backing_object->copy == object) {
3044 backing_object->copy = VM_OBJECT_NULL;
3045 }
3046
3047 /*
3048 * Drop the reference count on backing_object.
3049 #if TASK_SWAPPER
3050 * Since its ref_count was at least 2, it
3051 * will not vanish; so we don't need to call
3052 * vm_object_deallocate.
3053 * [FBDP: that doesn't seem to be true any more]
3054 *
3055 * The res_count on the backing object is
3056 * conditionally decremented. It's possible
3057 * (via vm_pageout_scan) to get here with
3058 * a "swapped" object, which has a 0 res_count,
3059 * in which case, the backing object res_count
3060 * is already down by one.
3061 #else
3062 * Don't call vm_object_deallocate unless
3063 * ref_count drops to zero.
3064 *
3065 * The ref_count can drop to zero here if the
3066 * backing object could be bypassed but not
3067 * collapsed, such as when the backing object
3068 * is temporary and cachable.
3069 #endif
3070 */
3071 if (backing_object->ref_count > 1) {
3072 backing_object->ref_count--;
3073 #if TASK_SWAPPER
3074 if (object->res_count != 0)
3075 vm_object_res_deallocate(backing_object);
3076 assert(backing_object->ref_count > 0);
3077 #endif /* TASK_SWAPPER */
3078 vm_object_unlock(backing_object);
3079 } else {
3080
3081 /*
3082 * Drop locks so that we can deallocate
3083 * the backing object.
3084 */
3085
3086 #if TASK_SWAPPER
3087 if (object->res_count == 0) {
3088 /* XXX get a reference for the deallocate below */
3089 vm_object_res_reference(backing_object);
3090 }
3091 #endif /* TASK_SWAPPER */
3092 vm_object_unlock(object);
3093 vm_object_unlock(backing_object);
3094 vm_object_deallocate(backing_object);
3095
3096 /*
3097 * Relock object. We don't have to reverify
3098 * its state since vm_object_collapse will
3099 * do that for us as it starts at the
3100 * top of its loop.
3101 */
3102
3103 vm_object_lock(object);
3104 }
3105
3106 object_bypasses++;
3107 }
3108
3109
3110 /*
3111 * vm_object_collapse:
3112 *
3113 * Perform an object collapse or an object bypass if appropriate.
3114 * The real work of collapsing and bypassing is performed in
3115 * the routines vm_object_do_collapse and vm_object_do_bypass.
3116 *
3117 * Requires that the object be locked and the page queues be unlocked.
3118 *
3119 */
3120 __private_extern__ void
3121 vm_object_collapse(
3122 register vm_object_t object)
3123 {
3124 register vm_object_t backing_object;
3125 register vm_object_offset_t backing_offset;
3126 register vm_object_size_t size;
3127 register vm_object_offset_t new_offset;
3128 register vm_page_t p;
3129
3130 vm_offset_t current_offset;
3131
3132 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3133 return;
3134 }
3135
3136 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3137 (integer_t)object, 0,0,0,0);
3138
3139 while (TRUE) {
3140 /*
3141 * Verify that the conditions are right for either
3142 * collapse or bypass:
3143 *
3144 * The object exists and no pages in it are currently
3145 * being paged out, and
3146 */
3147 if (object == VM_OBJECT_NULL ||
3148 object->paging_in_progress != 0 ||
3149 object->absent_count != 0)
3150 return;
3151
3152 /*
3153 * There is a backing object, and
3154 */
3155
3156 if ((backing_object = object->shadow) == VM_OBJECT_NULL)
3157 return;
3158
3159 vm_object_lock(backing_object);
3160
3161 /*
3162 * ...
3163 * The backing object is not read_only,
3164 * and no pages in the backing object are
3165 * currently being paged out.
3166 * The backing object is internal.
3167 *
3168 */
3169
3170 if (!backing_object->internal ||
3171 backing_object->paging_in_progress != 0) {
3172 vm_object_unlock(backing_object);
3173 return;
3174 }
3175
3176 /*
3177 * The backing object can't be a copy-object:
3178 * the shadow_offset for the copy-object must stay
3179 * as 0. Furthermore (for the 'we have all the
3180 * pages' case), if we bypass backing_object and
3181 * just shadow the next object in the chain, old
3182 * pages from that object would then have to be copied
3183 * BOTH into the (former) backing_object and into the
3184 * parent object.
3185 */
3186 if (backing_object->shadow != VM_OBJECT_NULL &&
3187 backing_object->shadow->copy != VM_OBJECT_NULL) {
3188 vm_object_unlock(backing_object);
3189 return;
3190 }
3191
3192 /*
3193 * We can now try to either collapse the backing
3194 * object (if the parent is the only reference to
3195 * it) or (perhaps) remove the parent's reference
3196 * to it.
3197 *
3198 * If there is exactly one reference to the backing
3199 * object, we may be able to collapse it into the
3200 * parent.
3201 *
3202 * The backing object must not have a pager
3203 * created for it, since collapsing an object
3204 * into a backing_object dumps new pages into
3205 * the backing_object that its pager doesn't
3206 * know about.
3207 */
3208
3209 if (backing_object->ref_count == 1 &&
3210 ! object->pager_created &&
3211 vm_object_collapse_allowed) {
3212
3213 XPR(XPR_VM_OBJECT,
3214 "vm_object_collapse: %x to %x, pager %x, pager_request %x\n",
3215 (integer_t)backing_object, (integer_t)object,
3216 (integer_t)backing_object->pager,
3217 (integer_t)backing_object->pager_request, 0);
3218
3219 /*
3220 * We need the cache lock for collapsing,
3221 * but we must not deadlock.
3222 */
3223
3224 if (! vm_object_cache_lock_try()) {
3225 vm_object_unlock(backing_object);
3226 return;
3227 }
3228
3229 /*
3230 * Collapse the object with its backing
3231 * object, and try again with the object's
3232 * new backing object.
3233 */
3234
3235 vm_object_do_collapse(object, backing_object);
3236 continue;
3237 }
3238
3239
3240 /*
3241 * Collapsing the backing object was not possible
3242 * or permitted, so let's try bypassing it.
3243 */
3244
3245 if (! vm_object_bypass_allowed) {
3246 vm_object_unlock(backing_object);
3247 return;
3248 }
3249
3250
3251 /*
3252 * If the backing object has a pager but no pagemap,
3253 * then we cannot bypass it, because we don't know
3254 * what pages it has.
3255 */
3256 if (backing_object->pager_created
3257 #if MACH_PAGEMAP
3258 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3259 #endif /* MACH_PAGEMAP */
3260 ) {
3261 vm_object_unlock(backing_object);
3262 return;
3263 }
3264
3265 /*
3266 * If the object has a pager but no pagemap,
3267 * then we cannot bypass it, because we don't know
3268 * what pages it has.
3269 */
3270 if (object->pager_created
3271 #if MACH_PAGEMAP
3272 && (object->existence_map == VM_EXTERNAL_NULL)
3273 #endif /* MACH_PAGEMAP */
3274 ) {
3275 vm_object_unlock(backing_object);
3276 return;
3277 }
3278
3279 backing_offset = object->shadow_offset;
3280 size = object->size;
3281
3282 /*
3283 * If all of the pages in the backing object are
3284 * shadowed by the parent object, the parent
3285 * object no longer has to shadow the backing
3286 * object; it can shadow the next one in the
3287 * chain.
3288 *
3289 * If the backing object has existence info,
3290 * we must check examine its existence info
3291 * as well.
3292 *
3293 */
3294
3295 if(object->cow_hint >= size)
3296 object->cow_hint = 0;
3297 current_offset = object->cow_hint;
3298 while(TRUE) {
3299 if (vm_page_lookup(object,
3300 (vm_object_offset_t)current_offset)
3301 != VM_PAGE_NULL) {
3302 current_offset+=PAGE_SIZE;
3303 } else if ((object->pager_created) &&
3304 (object->existence_map != NULL) &&
3305 (vm_external_state_get(object->existence_map,
3306 current_offset)
3307 != VM_EXTERNAL_STATE_ABSENT)) {
3308 current_offset+=PAGE_SIZE;
3309 } else if (vm_page_lookup(backing_object,
3310 (vm_object_offset_t)current_offset
3311 + backing_offset)!= VM_PAGE_NULL) {
3312 /* found a dependency */
3313 object->cow_hint = current_offset;
3314 vm_object_unlock(backing_object);
3315 return;
3316 } else if ((backing_object->pager_created) &&
3317 (backing_object->existence_map != NULL) &&
3318 (vm_external_state_get(
3319 backing_object->existence_map,
3320 current_offset + backing_offset)
3321 != VM_EXTERNAL_STATE_ABSENT)) {
3322 /* found a dependency */
3323 object->cow_hint = current_offset;
3324 vm_object_unlock(backing_object);
3325 return;
3326 } else {
3327 current_offset+=PAGE_SIZE;
3328 }
3329 if(current_offset >= size) {
3330 /* wrap at end of object */
3331 current_offset = 0;
3332 }
3333 if(current_offset == object->cow_hint) {
3334 /* we are free of shadow influence */
3335 break;
3336 }
3337 }
3338 /* reset the cow_hint for any objects deeper in the chain */
3339 object->cow_hint = 0;
3340
3341
3342
3343 /*
3344 * All interesting pages in the backing object
3345 * already live in the parent or its pager.
3346 * Thus we can bypass the backing object.
3347 */
3348
3349 vm_object_do_bypass(object, backing_object);
3350
3351 /*
3352 * Try again with this object's new backing object.
3353 */
3354
3355 continue;
3356 }
3357 }
3358
3359 /*
3360 * Routine: vm_object_page_remove: [internal]
3361 * Purpose:
3362 * Removes all physical pages in the specified
3363 * object range from the object's list of pages.
3364 *
3365 * In/out conditions:
3366 * The object must be locked.
3367 * The object must not have paging_in_progress, usually
3368 * guaranteed by not having a pager.
3369 */
3370 unsigned int vm_object_page_remove_lookup = 0;
3371 unsigned int vm_object_page_remove_iterate = 0;
3372
3373 __private_extern__ void
3374 vm_object_page_remove(
3375 register vm_object_t object,
3376 register vm_object_offset_t start,
3377 register vm_object_offset_t end)
3378 {
3379 register vm_page_t p, next;
3380
3381 /*
3382 * One and two page removals are most popular.
3383 * The factor of 16 here is somewhat arbitrary.
3384 * It balances vm_object_lookup vs iteration.
3385 */
3386
3387 if (atop(end - start) < (unsigned)object->resident_page_count/16) {
3388 vm_object_page_remove_lookup++;
3389
3390 for (; start < end; start += PAGE_SIZE_64) {
3391 p = vm_page_lookup(object, start);
3392 if (p != VM_PAGE_NULL) {
3393 assert(!p->cleaning && !p->pageout);
3394 if (!p->fictitious)
3395 pmap_page_protect(p->phys_addr,
3396 VM_PROT_NONE);
3397 VM_PAGE_FREE(p);
3398 }
3399 }
3400 } else {
3401 vm_object_page_remove_iterate++;
3402
3403 p = (vm_page_t) queue_first(&object->memq);
3404 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3405 next = (vm_page_t) queue_next(&p->listq);
3406 if ((start <= p->offset) && (p->offset < end)) {
3407 assert(!p->cleaning && !p->pageout);
3408 if (!p->fictitious)
3409 pmap_page_protect(p->phys_addr,
3410 VM_PROT_NONE);
3411 VM_PAGE_FREE(p);
3412 }
3413 p = next;
3414 }
3415 }
3416 }
3417
3418
3419 /*
3420 * Routine: vm_object_coalesce
3421 * Function: Coalesces two objects backing up adjoining
3422 * regions of memory into a single object.
3423 *
3424 * returns TRUE if objects were combined.
3425 *
3426 * NOTE: Only works at the moment if the second object is NULL -
3427 * if it's not, which object do we lock first?
3428 *
3429 * Parameters:
3430 * prev_object First object to coalesce
3431 * prev_offset Offset into prev_object
3432 * next_object Second object into coalesce
3433 * next_offset Offset into next_object
3434 *
3435 * prev_size Size of reference to prev_object
3436 * next_size Size of reference to next_object
3437 *
3438 * Conditions:
3439 * The object(s) must *not* be locked. The map must be locked
3440 * to preserve the reference to the object(s).
3441 */
3442 static int vm_object_coalesce_count = 0;
3443
3444 __private_extern__ boolean_t
3445 vm_object_coalesce(
3446 register vm_object_t prev_object,
3447 vm_object_t next_object,
3448 vm_object_offset_t prev_offset,
3449 vm_object_offset_t next_offset,
3450 vm_object_size_t prev_size,
3451 vm_object_size_t next_size)
3452 {
3453 vm_object_size_t newsize;
3454
3455 #ifdef lint
3456 next_offset++;
3457 #endif /* lint */
3458
3459 if (next_object != VM_OBJECT_NULL) {
3460 return(FALSE);
3461 }
3462
3463 if (prev_object == VM_OBJECT_NULL) {
3464 return(TRUE);
3465 }
3466
3467 XPR(XPR_VM_OBJECT,
3468 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3469 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3470
3471 vm_object_lock(prev_object);
3472
3473 /*
3474 * Try to collapse the object first
3475 */
3476 vm_object_collapse(prev_object);
3477
3478 /*
3479 * Can't coalesce if pages not mapped to
3480 * prev_entry may be in use any way:
3481 * . more than one reference
3482 * . paged out
3483 * . shadows another object
3484 * . has a copy elsewhere
3485 * . paging references (pages might be in page-list)
3486 */
3487
3488 if ((prev_object->ref_count > 1) ||
3489 prev_object->pager_created ||
3490 (prev_object->shadow != VM_OBJECT_NULL) ||
3491 (prev_object->copy != VM_OBJECT_NULL) ||
3492 (prev_object->true_share != FALSE) ||
3493 (prev_object->paging_in_progress != 0)) {
3494 vm_object_unlock(prev_object);
3495 return(FALSE);
3496 }
3497
3498 vm_object_coalesce_count++;
3499
3500 /*
3501 * Remove any pages that may still be in the object from
3502 * a previous deallocation.
3503 */
3504 vm_object_page_remove(prev_object,
3505 prev_offset + prev_size,
3506 prev_offset + prev_size + next_size);
3507
3508 /*
3509 * Extend the object if necessary.
3510 */
3511 newsize = prev_offset + prev_size + next_size;
3512 if (newsize > prev_object->size) {
3513 #if MACH_PAGEMAP
3514 /*
3515 * We cannot extend an object that has existence info,
3516 * since the existence info might then fail to cover
3517 * the entire object.
3518 *
3519 * This assertion must be true because the object
3520 * has no pager, and we only create existence info
3521 * for objects with pagers.
3522 */
3523 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3524 #endif /* MACH_PAGEMAP */
3525 prev_object->size = newsize;
3526 }
3527
3528 vm_object_unlock(prev_object);
3529 return(TRUE);
3530 }
3531
3532 /*
3533 * Attach a set of physical pages to an object, so that they can
3534 * be mapped by mapping the object. Typically used to map IO memory.
3535 *
3536 * The mapping function and its private data are used to obtain the
3537 * physical addresses for each page to be mapped.
3538 */
3539 void
3540 vm_object_page_map(
3541 vm_object_t object,
3542 vm_object_offset_t offset,
3543 vm_object_size_t size,
3544 vm_object_offset_t (*map_fn)(void *map_fn_data,
3545 vm_object_offset_t offset),
3546 void *map_fn_data) /* private to map_fn */
3547 {
3548 int num_pages;
3549 int i;
3550 vm_page_t m;
3551 vm_page_t old_page;
3552 vm_object_offset_t addr;
3553
3554 num_pages = atop(size);
3555
3556 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3557
3558 addr = (*map_fn)(map_fn_data, offset);
3559
3560 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3561 vm_page_more_fictitious();
3562
3563 vm_object_lock(object);
3564 if ((old_page = vm_page_lookup(object, offset))
3565 != VM_PAGE_NULL)
3566 {
3567 vm_page_lock_queues();
3568 vm_page_free(old_page);
3569 vm_page_unlock_queues();
3570 }
3571
3572 vm_page_init(m, addr);
3573 /* private normally requires lock_queues but since we */
3574 /* are initializing the page, its not necessary here */
3575 m->private = TRUE; /* don`t free page */
3576 m->wire_count = 1;
3577 vm_page_insert(m, object, offset);
3578
3579 PAGE_WAKEUP_DONE(m);
3580 vm_object_unlock(object);
3581 }
3582 }
3583
3584 #include <mach_kdb.h>
3585
3586 #if MACH_KDB
3587 #include <ddb/db_output.h>
3588 #include <vm/vm_print.h>
3589
3590 #define printf kdbprintf
3591
3592 extern boolean_t vm_object_cached(
3593 vm_object_t object);
3594
3595 extern void print_bitstring(
3596 char byte);
3597
3598 boolean_t vm_object_print_pages = FALSE;
3599
3600 void
3601 print_bitstring(
3602 char byte)
3603 {
3604 printf("%c%c%c%c%c%c%c%c",
3605 ((byte & (1 << 0)) ? '1' : '0'),
3606 ((byte & (1 << 1)) ? '1' : '0'),
3607 ((byte & (1 << 2)) ? '1' : '0'),
3608 ((byte & (1 << 3)) ? '1' : '0'),
3609 ((byte & (1 << 4)) ? '1' : '0'),
3610 ((byte & (1 << 5)) ? '1' : '0'),
3611 ((byte & (1 << 6)) ? '1' : '0'),
3612 ((byte & (1 << 7)) ? '1' : '0'));
3613 }
3614
3615 boolean_t
3616 vm_object_cached(
3617 register vm_object_t object)
3618 {
3619 register vm_object_t o;
3620
3621 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3622 if (object == o) {
3623 return TRUE;
3624 }
3625 }
3626 return FALSE;
3627 }
3628
3629 #if MACH_PAGEMAP
3630 /*
3631 * vm_external_print: [ debug ]
3632 */
3633 void
3634 vm_external_print(
3635 vm_external_map_t map,
3636 vm_size_t size)
3637 {
3638 if (map == VM_EXTERNAL_NULL) {
3639 printf("0 ");
3640 } else {
3641 vm_size_t existence_size = stob(size);
3642 printf("{ size=%d, map=[", existence_size);
3643 if (existence_size > 0) {
3644 print_bitstring(map[0]);
3645 }
3646 if (existence_size > 1) {
3647 print_bitstring(map[1]);
3648 }
3649 if (existence_size > 2) {
3650 printf("...");
3651 print_bitstring(map[existence_size-1]);
3652 }
3653 printf("] }\n");
3654 }
3655 return;
3656 }
3657 #endif /* MACH_PAGEMAP */
3658
3659 int
3660 vm_follow_object(
3661 vm_object_t object)
3662 {
3663 extern db_indent;
3664
3665 int count = 0;
3666 int orig_db_indent = db_indent;
3667
3668 while (TRUE) {
3669 if (object == VM_OBJECT_NULL) {
3670 db_indent = orig_db_indent;
3671 return count;
3672 }
3673
3674 count += 1;
3675
3676 iprintf("object 0x%x", object);
3677 printf(", shadow=0x%x", object->shadow);
3678 printf(", copy=0x%x", object->copy);
3679 printf(", pager=0x%x", object->pager);
3680 printf(", ref=%d\n", object->ref_count);
3681
3682 db_indent += 2;
3683 object = object->shadow;
3684 }
3685
3686 }
3687
3688 /*
3689 * vm_object_print: [ debug ]
3690 */
3691 void
3692 vm_object_print(
3693 vm_object_t object,
3694 boolean_t have_addr,
3695 int arg_count,
3696 char *modif)
3697 {
3698 register vm_page_t p;
3699 extern db_indent;
3700 char *s;
3701
3702 register int count;
3703
3704 if (object == VM_OBJECT_NULL)
3705 return;
3706
3707 iprintf("object 0x%x\n", object);
3708
3709 db_indent += 2;
3710
3711 iprintf("size=0x%x", object->size);
3712 printf(", cluster=0x%x", object->cluster_size);
3713 printf(", frozen=0x%x", object->frozen_size);
3714 printf(", ref_count=%d\n", object->ref_count);
3715 iprintf("");
3716 #if TASK_SWAPPER
3717 printf("res_count=%d, ", object->res_count);
3718 #endif /* TASK_SWAPPER */
3719 printf("resident_page_count=%d\n", object->resident_page_count);
3720
3721 iprintf("shadow=0x%x", object->shadow);
3722 if (object->shadow) {
3723 register int i = 0;
3724 vm_object_t shadow = object;
3725 while(shadow = shadow->shadow)
3726 i++;
3727 printf(" (depth %d)", i);
3728 }
3729 printf(", copy=0x%x", object->copy);
3730 printf(", shadow_offset=0x%x", object->shadow_offset);
3731 printf(", last_alloc=0x%x\n", object->last_alloc);
3732
3733 iprintf("pager=0x%x", object->pager);
3734 printf(", paging_offset=0x%x", object->paging_offset);
3735 printf(", pager_request=0x%x\n", object->pager_request);
3736
3737 iprintf("copy_strategy=%d[", object->copy_strategy);
3738 switch (object->copy_strategy) {
3739 case MEMORY_OBJECT_COPY_NONE:
3740 printf("copy_none");
3741 break;
3742
3743 case MEMORY_OBJECT_COPY_CALL:
3744 printf("copy_call");
3745 break;
3746
3747 case MEMORY_OBJECT_COPY_DELAY:
3748 printf("copy_delay");
3749 break;
3750
3751 case MEMORY_OBJECT_COPY_SYMMETRIC:
3752 printf("copy_symmetric");
3753 break;
3754
3755 case MEMORY_OBJECT_COPY_INVALID:
3756 printf("copy_invalid");
3757 break;
3758
3759 default:
3760 printf("?");
3761 }
3762 printf("]");
3763 printf(", absent_count=%d\n", object->absent_count);
3764
3765 iprintf("all_wanted=0x%x<", object->all_wanted);
3766 s = "";
3767 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
3768 printf("%sinit", s);
3769 s = ",";
3770 }
3771 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
3772 printf("%sready", s);
3773 s = ",";
3774 }
3775 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
3776 printf("%spaging", s);
3777 s = ",";
3778 }
3779 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
3780 printf("%sabsent", s);
3781 s = ",";
3782 }
3783 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
3784 printf("%slock", s);
3785 s = ",";
3786 }
3787 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
3788 printf("%suncaching", s);
3789 s = ",";
3790 }
3791 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
3792 printf("%scopy_call", s);
3793 s = ",";
3794 }
3795 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
3796 printf("%scaching", s);
3797 s = ",";
3798 }
3799 printf(">");
3800 printf(", paging_in_progress=%d\n", object->paging_in_progress);
3801
3802 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
3803 (object->pager_created ? "" : "!"),
3804 (object->pager_initialized ? "" : "!"),
3805 (object->pager_ready ? "" : "!"),
3806 (object->can_persist ? "" : "!"),
3807 (object->pager_trusted ? "" : "!"),
3808 (object->pageout ? "" : "!"),
3809 (object->internal ? "internal" : "external"),
3810 (object->temporary ? "temporary" : "permanent"));
3811 iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n",
3812 (object->alive ? "" : "!"),
3813 (object->lock_in_progress ? "" : "!"),
3814 (object->lock_restart ? "" : "!"),
3815 (object->shadowed ? "" : "!"),
3816 (vm_object_cached(object) ? "" : "!"),
3817 (object->private ? "" : "!"));
3818 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
3819 (object->advisory_pageout ? "" : "!"),
3820 (object->silent_overwrite ? "" : "!"));
3821
3822 #if MACH_PAGEMAP
3823 iprintf("existence_map=");
3824 vm_external_print(object->existence_map, object->size);
3825 #endif /* MACH_PAGEMAP */
3826 #if MACH_ASSERT
3827 iprintf("paging_object=0x%x\n", object->paging_object);
3828 #endif /* MACH_ASSERT */
3829
3830 if (vm_object_print_pages) {
3831 count = 0;
3832 p = (vm_page_t) queue_first(&object->memq);
3833 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3834 if (count == 0) {
3835 iprintf("memory:=");
3836 } else if (count == 2) {
3837 printf("\n");
3838 iprintf(" ...");
3839 count = 0;
3840 } else {
3841 printf(",");
3842 }
3843 count++;
3844
3845 printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p);
3846 p = (vm_page_t) queue_next(&p->listq);
3847 }
3848 if (count != 0) {
3849 printf("\n");
3850 }
3851 }
3852 db_indent -= 2;
3853 }
3854
3855
3856 /*
3857 * vm_object_find [ debug ]
3858 *
3859 * Find all tasks which reference the given vm_object.
3860 */
3861
3862 boolean_t vm_object_find(vm_object_t object);
3863 boolean_t vm_object_print_verbose = FALSE;
3864
3865 boolean_t
3866 vm_object_find(
3867 vm_object_t object)
3868 {
3869 task_t task;
3870 vm_map_t map;
3871 vm_map_entry_t entry;
3872 processor_set_t pset = &default_pset;
3873 boolean_t found = FALSE;
3874
3875 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
3876 map = task->map;
3877 for (entry = vm_map_first_entry(map);
3878 entry && entry != vm_map_to_entry(map);
3879 entry = entry->vme_next) {
3880
3881 vm_object_t obj;
3882
3883 /*
3884 * For the time being skip submaps,
3885 * only the kernel can have submaps,
3886 * and unless we are interested in
3887 * kernel objects, we can simply skip
3888 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
3889 * for a full solution.
3890 */
3891 if (entry->is_sub_map)
3892 continue;
3893 if (entry)
3894 obj = entry->object.vm_object;
3895 else
3896 continue;
3897
3898 while (obj != VM_OBJECT_NULL) {
3899 if (obj == object) {
3900 if (!found) {
3901 printf("TASK\t\tMAP\t\tENTRY\n");
3902 found = TRUE;
3903 }
3904 printf("0x%x\t0x%x\t0x%x\n",
3905 task, map, entry);
3906 }
3907 obj = obj->shadow;
3908 }
3909 }
3910 }
3911
3912 return(found);
3913 }
3914
3915 #endif /* MACH_KDB */
3916
3917 kern_return_t
3918 vm_object_populate_with_private(
3919 vm_object_t object,
3920 vm_object_offset_t offset,
3921 vm_offset_t phys_addr,
3922 vm_size_t size)
3923 {
3924 vm_offset_t base_addr;
3925 vm_object_offset_t base_offset;
3926
3927
3928 if(!object->private)
3929 return KERN_FAILURE;
3930
3931 if((base_addr = trunc_page(phys_addr)) != phys_addr) {
3932 return KERN_FAILURE;
3933 }
3934
3935
3936 vm_object_lock(object);
3937 if(!object->phys_contiguous) {
3938 vm_page_t m;
3939 if((base_offset = trunc_page(offset)) != offset) {
3940 vm_object_unlock(object);
3941 return KERN_FAILURE;
3942 }
3943 base_offset += object->paging_offset;
3944 while(size) {
3945 m = vm_page_lookup(object, base_offset);
3946 if(m != VM_PAGE_NULL) {
3947 if(m->fictitious) {
3948 vm_page_lock_queues();
3949 m->fictitious = FALSE;
3950 m->private = TRUE;
3951 m->phys_addr = base_addr;
3952 if(!m->busy) {
3953 m->busy = TRUE;
3954 }
3955 if(!m->absent) {
3956 m->absent = TRUE;
3957 object->absent_count++;
3958 }
3959 m->list_req_pending = TRUE;
3960 vm_page_unlock_queues();
3961 } else if (m->phys_addr != base_addr) {
3962 /* pmap call to clear old mapping */
3963 pmap_page_protect(m->phys_addr,
3964 VM_PROT_NONE);
3965 m->phys_addr = base_addr;
3966 }
3967 } else {
3968 while ((m = vm_page_grab_fictitious())
3969 == VM_PAGE_NULL)
3970 vm_page_more_fictitious();
3971 vm_page_lock_queues();
3972 m->fictitious = FALSE;
3973 m->private = TRUE;
3974 m->phys_addr = base_addr;
3975 m->list_req_pending = TRUE;
3976 m->absent = TRUE;
3977 m->unusual = TRUE;
3978 object->absent_count++;
3979 vm_page_unlock_queues();
3980 vm_page_insert(m, object, base_offset);
3981 }
3982 base_addr += PAGE_SIZE;
3983 base_offset += PAGE_SIZE;
3984 size -= PAGE_SIZE;
3985 }
3986 } else {
3987 /* NOTE: we should check the original settings here */
3988 /* if we have a size > zero a pmap call should be made */
3989 /* to disable the range */
3990
3991 /* pmap_? */
3992
3993 /* shadows on contiguous memory are not allowed */
3994 /* we therefore can use the offset field */
3995 object->shadow_offset = (vm_object_offset_t)phys_addr;
3996 object->size = size;
3997 }
3998 vm_object_unlock(object);
3999 return KERN_SUCCESS;
4000 }
4001
4002 /*
4003 * memory_object_free_from_cache:
4004 *
4005 * Walk the vm_object cache list, removing and freeing vm_objects
4006 * which are backed by the pager identified by the caller, (pager_id).
4007 * Remove up to "count" objects, if there are that may available
4008 * in the cache.
4009 *
4010 * Walk the list at most once, return the number of vm_objects
4011 * actually freed.
4012 */
4013
4014 __private_extern__ kern_return_t
4015 memory_object_free_from_cache(
4016 host_t host,
4017 int *pager_id,
4018 int *count)
4019 {
4020
4021 int object_released = 0;
4022 int i;
4023
4024 register vm_object_t object = VM_OBJECT_NULL;
4025 vm_object_t shadow;
4026
4027 /*
4028 if(host == HOST_NULL)
4029 return(KERN_INVALID_ARGUMENT);
4030 */
4031
4032 try_again:
4033 vm_object_cache_lock();
4034
4035 queue_iterate(&vm_object_cached_list, object,
4036 vm_object_t, cached_list) {
4037 if (object->pager && (pager_id == object->pager->pager)) {
4038 vm_object_lock(object);
4039 queue_remove(&vm_object_cached_list, object,
4040 vm_object_t, cached_list);
4041 vm_object_cached_count--;
4042
4043 /*
4044 * Since this object is in the cache, we know
4045 * that it is initialized and has only a pager's
4046 * (implicit) reference. Take a reference to avoid
4047 * recursive deallocations.
4048 */
4049
4050 assert(object->pager_initialized);
4051 assert(object->ref_count == 0);
4052 object->ref_count++;
4053
4054 /*
4055 * Terminate the object.
4056 * If the object had a shadow, we let
4057 * vm_object_deallocate deallocate it.
4058 * "pageout" objects have a shadow, but
4059 * maintain a "paging reference" rather
4060 * than a normal reference.
4061 * (We are careful here to limit recursion.)
4062 */
4063 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4064 if ((vm_object_terminate(object) == KERN_SUCCESS)
4065 && (shadow != VM_OBJECT_NULL)) {
4066 vm_object_deallocate(shadow);
4067 }
4068
4069 if(object_released++ == *count)
4070 return KERN_SUCCESS;
4071 goto try_again;
4072 }
4073 }
4074 vm_object_cache_unlock();
4075 *count = object_released;
4076 return KERN_SUCCESS;
4077 }
4078
4079
4080
4081 kern_return_t
4082 memory_object_create_named(
4083 memory_object_t pager,
4084 memory_object_offset_t size,
4085 memory_object_control_t *control)
4086 {
4087 vm_object_t object;
4088 vm_object_hash_entry_t entry;
4089
4090 *control = MEMORY_OBJECT_CONTROL_NULL;
4091 if (pager == MEMORY_OBJECT_NULL)
4092 return KERN_INVALID_ARGUMENT;
4093
4094 vm_object_cache_lock();
4095 entry = vm_object_hash_lookup(pager, FALSE);
4096 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4097 (entry->object != VM_OBJECT_NULL)) {
4098 if (entry->object->named == TRUE)
4099 panic("memory_object_create_named: caller already holds the right"); }
4100
4101 vm_object_cache_unlock();
4102 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4103 == VM_OBJECT_NULL) {
4104 return(KERN_INVALID_OBJECT);
4105 }
4106
4107 /* wait for object (if any) to be ready */
4108 if (object != VM_OBJECT_NULL) {
4109 vm_object_lock(object);
4110 object->named = TRUE;
4111 while (!object->pager_ready) {
4112 vm_object_wait(object,
4113 VM_OBJECT_EVENT_PAGER_READY,
4114 FALSE);
4115 vm_object_lock(object);
4116 }
4117 *control = object->pager_request;
4118 vm_object_unlock(object);
4119 }
4120 return (KERN_SUCCESS);
4121 }
4122
4123
4124 /*
4125 * Routine: memory_object_recover_named [user interface]
4126 * Purpose:
4127 * Attempt to recover a named reference for a VM object.
4128 * VM will verify that the object has not already started
4129 * down the termination path, and if it has, will optionally
4130 * wait for that to finish.
4131 * Returns:
4132 * KERN_SUCCESS - we recovered a named reference on the object
4133 * KERN_FAILURE - we could not recover a reference (object dead)
4134 * KERN_INVALID_ARGUMENT - bad memory object control
4135 */
4136 kern_return_t
4137 memory_object_recover_named(
4138 memory_object_control_t control,
4139 boolean_t wait_on_terminating)
4140 {
4141 vm_object_t object;
4142
4143 vm_object_cache_lock();
4144 object = memory_object_control_to_vm_object(control);
4145 if (object == VM_OBJECT_NULL) {
4146 vm_object_cache_unlock();
4147 return (KERN_INVALID_ARGUMENT);
4148 }
4149
4150 restart:
4151 vm_object_lock(object);
4152
4153 if (object->terminating && wait_on_terminating) {
4154 vm_object_cache_unlock();
4155 vm_object_wait(object,
4156 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4157 THREAD_UNINT);
4158 vm_object_cache_lock();
4159 goto restart;
4160 }
4161
4162 if (!object->alive) {
4163 vm_object_cache_unlock();
4164 vm_object_unlock(object);
4165 return KERN_FAILURE;
4166 }
4167
4168 if (object->named == TRUE) {
4169 vm_object_cache_unlock();
4170 vm_object_unlock(object);
4171 return KERN_SUCCESS;
4172 }
4173
4174 if((object->ref_count == 0) && (!object->terminating)){
4175 queue_remove(&vm_object_cached_list, object,
4176 vm_object_t, cached_list);
4177 vm_object_cached_count--;
4178 XPR(XPR_VM_OBJECT_CACHE,
4179 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4180 (integer_t)object,
4181 (integer_t)vm_object_cached_list.next,
4182 (integer_t)vm_object_cached_list.prev, 0,0);
4183 }
4184
4185 vm_object_cache_unlock();
4186
4187 object->named = TRUE;
4188 object->ref_count++;
4189 vm_object_res_reference(object);
4190 while (!object->pager_ready) {
4191 vm_object_wait(object,
4192 VM_OBJECT_EVENT_PAGER_READY,
4193 FALSE);
4194 vm_object_lock(object);
4195 }
4196 vm_object_unlock(object);
4197 return (KERN_SUCCESS);
4198 }
4199
4200
4201 /*
4202 * vm_object_release_name:
4203 *
4204 * Enforces name semantic on memory_object reference count decrement
4205 * This routine should not be called unless the caller holds a name
4206 * reference gained through the memory_object_create_named.
4207 *
4208 * If the TERMINATE_IDLE flag is set, the call will return if the
4209 * reference count is not 1. i.e. idle with the only remaining reference
4210 * being the name.
4211 * If the decision is made to proceed the name field flag is set to
4212 * false and the reference count is decremented. If the RESPECT_CACHE
4213 * flag is set and the reference count has gone to zero, the
4214 * memory_object is checked to see if it is cacheable otherwise when
4215 * the reference count is zero, it is simply terminated.
4216 */
4217
4218 __private_extern__ kern_return_t
4219 vm_object_release_name(
4220 vm_object_t object,
4221 int flags)
4222 {
4223 vm_object_t shadow;
4224 boolean_t original_object = TRUE;
4225
4226 while (object != VM_OBJECT_NULL) {
4227
4228 /*
4229 * The cache holds a reference (uncounted) to
4230 * the object. We must locke it before removing
4231 * the object.
4232 *
4233 */
4234
4235 vm_object_cache_lock();
4236 vm_object_lock(object);
4237 assert(object->alive);
4238 if(original_object)
4239 assert(object->named);
4240 assert(object->ref_count > 0);
4241
4242 /*
4243 * We have to wait for initialization before
4244 * destroying or caching the object.
4245 */
4246
4247 if (object->pager_created && !object->pager_initialized) {
4248 assert(!object->can_persist);
4249 vm_object_assert_wait(object,
4250 VM_OBJECT_EVENT_INITIALIZED,
4251 THREAD_UNINT);
4252 vm_object_unlock(object);
4253 vm_object_cache_unlock();
4254 thread_block((void (*)(void)) 0);
4255 continue;
4256 }
4257
4258 if (((object->ref_count > 1)
4259 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4260 || (object->terminating)) {
4261 vm_object_unlock(object);
4262 vm_object_cache_unlock();
4263 return KERN_FAILURE;
4264 } else {
4265 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4266 vm_object_unlock(object);
4267 vm_object_cache_unlock();
4268 return KERN_SUCCESS;
4269 }
4270 }
4271
4272 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4273 (object->ref_count == 1)) {
4274 if(original_object)
4275 object->named = FALSE;
4276 vm_object_unlock(object);
4277 vm_object_cache_unlock();
4278 /* let vm_object_deallocate push this thing into */
4279 /* the cache, if that it is where it is bound */
4280 vm_object_deallocate(object);
4281 return KERN_SUCCESS;
4282 }
4283 VM_OBJ_RES_DECR(object);
4284 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4285 if(object->ref_count == 1) {
4286 if(vm_object_terminate(object) != KERN_SUCCESS) {
4287 if(original_object) {
4288 return KERN_FAILURE;
4289 } else {
4290 return KERN_SUCCESS;
4291 }
4292 }
4293 if (shadow != VM_OBJECT_NULL) {
4294 original_object = FALSE;
4295 object = shadow;
4296 continue;
4297 }
4298 return KERN_SUCCESS;
4299 } else {
4300 object->ref_count--;
4301 assert(object->ref_count > 0);
4302 if(original_object)
4303 object->named = FALSE;
4304 vm_object_unlock(object);
4305 vm_object_cache_unlock();
4306 return KERN_SUCCESS;
4307 }
4308 }
4309 }
4310
4311
4312 __private_extern__ kern_return_t
4313 vm_object_lock_request(
4314 vm_object_t object,
4315 vm_object_offset_t offset,
4316 vm_object_size_t size,
4317 memory_object_return_t should_return,
4318 int flags,
4319 vm_prot_t prot)
4320 {
4321 vm_object_offset_t original_offset = offset;
4322 boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH;
4323
4324 XPR(XPR_MEMORY_OBJECT,
4325 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4326 (integer_t)object, offset, size,
4327 (((should_return&1)<<1)|should_flush), prot);
4328
4329 /*
4330 * Check for bogus arguments.
4331 */
4332 if (object == VM_OBJECT_NULL)
4333 return (KERN_INVALID_ARGUMENT);
4334
4335 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4336 return (KERN_INVALID_ARGUMENT);
4337
4338 size = round_page(size);
4339
4340 /*
4341 * Lock the object, and acquire a paging reference to
4342 * prevent the memory_object reference from being released.
4343 */
4344 vm_object_lock(object);
4345 vm_object_paging_begin(object);
4346 offset -= object->paging_offset;
4347
4348 (void)vm_object_update(object,
4349 offset, size, should_return, flags, prot);
4350
4351 vm_object_paging_end(object);
4352 vm_object_unlock(object);
4353
4354 return (KERN_SUCCESS);
4355 }
4356
4357
4358
4359 #if TASK_SWAPPER
4360 /*
4361 * vm_object_res_deallocate
4362 *
4363 * (recursively) decrement residence counts on vm objects and their shadows.
4364 * Called from vm_object_deallocate and when swapping out an object.
4365 *
4366 * The object is locked, and remains locked throughout the function,
4367 * even as we iterate down the shadow chain. Locks on intermediate objects
4368 * will be dropped, but not the original object.
4369 *
4370 * NOTE: this function used to use recursion, rather than iteration.
4371 */
4372
4373 __private_extern__ void
4374 vm_object_res_deallocate(
4375 vm_object_t object)
4376 {
4377 vm_object_t orig_object = object;
4378 /*
4379 * Object is locked so it can be called directly
4380 * from vm_object_deallocate. Original object is never
4381 * unlocked.
4382 */
4383 assert(object->res_count > 0);
4384 while (--object->res_count == 0) {
4385 assert(object->ref_count >= object->res_count);
4386 vm_object_deactivate_all_pages(object);
4387 /* iterate on shadow, if present */
4388 if (object->shadow != VM_OBJECT_NULL) {
4389 vm_object_t tmp_object = object->shadow;
4390 vm_object_lock(tmp_object);
4391 if (object != orig_object)
4392 vm_object_unlock(object);
4393 object = tmp_object;
4394 assert(object->res_count > 0);
4395 } else
4396 break;
4397 }
4398 if (object != orig_object)
4399 vm_object_unlock(object);
4400 }
4401
4402 /*
4403 * vm_object_res_reference
4404 *
4405 * Internal function to increment residence count on a vm object
4406 * and its shadows. It is called only from vm_object_reference, and
4407 * when swapping in a vm object, via vm_map_swap.
4408 *
4409 * The object is locked, and remains locked throughout the function,
4410 * even as we iterate down the shadow chain. Locks on intermediate objects
4411 * will be dropped, but not the original object.
4412 *
4413 * NOTE: this function used to use recursion, rather than iteration.
4414 */
4415
4416 __private_extern__ void
4417 vm_object_res_reference(
4418 vm_object_t object)
4419 {
4420 vm_object_t orig_object = object;
4421 /*
4422 * Object is locked, so this can be called directly
4423 * from vm_object_reference. This lock is never released.
4424 */
4425 while ((++object->res_count == 1) &&
4426 (object->shadow != VM_OBJECT_NULL)) {
4427 vm_object_t tmp_object = object->shadow;
4428
4429 assert(object->ref_count >= object->res_count);
4430 vm_object_lock(tmp_object);
4431 if (object != orig_object)
4432 vm_object_unlock(object);
4433 object = tmp_object;
4434 }
4435 if (object != orig_object)
4436 vm_object_unlock(object);
4437 assert(orig_object->ref_count >= orig_object->res_count);
4438 }
4439 #endif /* TASK_SWAPPER */
4440
4441 /*
4442 * vm_object_reference:
4443 *
4444 * Gets another reference to the given object.
4445 */
4446 #ifdef vm_object_reference
4447 #undef vm_object_reference
4448 #endif
4449 __private_extern__ void
4450 vm_object_reference(
4451 register vm_object_t object)
4452 {
4453 if (object == VM_OBJECT_NULL)
4454 return;
4455
4456 vm_object_lock(object);
4457 assert(object->ref_count > 0);
4458 vm_object_reference_locked(object);
4459 vm_object_unlock(object);
4460 }
4461
4462 #ifdef MACH_BSD
4463 /*
4464 * Scale the vm_object_cache
4465 * This is required to make sure that the vm_object_cache is big
4466 * enough to effectively cache the mapped file.
4467 * This is really important with UBC as all the regular file vnodes
4468 * have memory object associated with them. Havving this cache too
4469 * small results in rapid reclaim of vnodes and hurts performance a LOT!
4470 *
4471 * This is also needed as number of vnodes can be dynamically scaled.
4472 */
4473 kern_return_t
4474 adjust_vm_object_cache(vm_size_t oval, vm_size_t nval)
4475 {
4476 vm_object_cached_max = nval;
4477 vm_object_cache_trim(FALSE);
4478 return (KERN_SUCCESS);
4479 }
4480 #endif /* MACH_BSD */
4481