]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
xnu-344.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm/vm_object.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Virtual memory object module.
57 */
58
59 #ifdef MACH_BSD
60 /* remove as part of compoenent support merge */
61 extern int vnode_pager_workaround;
62 #endif
63
64 #include <mach_pagemap.h>
65 #include <task_swapper.h>
66
67 #include <mach/mach_types.h>
68 #include <mach/memory_object.h>
69 #include <mach/memory_object_default.h>
70 #include <mach/memory_object_control_server.h>
71 #include <mach/vm_param.h>
72 #include <ipc/ipc_port.h>
73 #include <kern/assert.h>
74 #include <kern/lock.h>
75 #include <kern/queue.h>
76 #include <kern/xpr.h>
77 #include <kern/zalloc.h>
78 #include <kern/host.h>
79 #include <kern/host_statistics.h>
80 #include <kern/processor.h>
81 #include <vm/memory_object.h>
82 #include <vm/vm_fault.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_object.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <kern/misc_protos.h>
88
89
90
91 /*
92 * Virtual memory objects maintain the actual data
93 * associated with allocated virtual memory. A given
94 * page of memory exists within exactly one object.
95 *
96 * An object is only deallocated when all "references"
97 * are given up.
98 *
99 * Associated with each object is a list of all resident
100 * memory pages belonging to that object; this list is
101 * maintained by the "vm_page" module, but locked by the object's
102 * lock.
103 *
104 * Each object also records the memory object reference
105 * that is used by the kernel to request and write
106 * back data (the memory object, field "pager"), etc...
107 *
108 * Virtual memory objects are allocated to provide
109 * zero-filled memory (vm_allocate) or map a user-defined
110 * memory object into a virtual address space (vm_map).
111 *
112 * Virtual memory objects that refer to a user-defined
113 * memory object are called "permanent", because all changes
114 * made in virtual memory are reflected back to the
115 * memory manager, which may then store it permanently.
116 * Other virtual memory objects are called "temporary",
117 * meaning that changes need be written back only when
118 * necessary to reclaim pages, and that storage associated
119 * with the object can be discarded once it is no longer
120 * mapped.
121 *
122 * A permanent memory object may be mapped into more
123 * than one virtual address space. Moreover, two threads
124 * may attempt to make the first mapping of a memory
125 * object concurrently. Only one thread is allowed to
126 * complete this mapping; all others wait for the
127 * "pager_initialized" field is asserted, indicating
128 * that the first thread has initialized all of the
129 * necessary fields in the virtual memory object structure.
130 *
131 * The kernel relies on a *default memory manager* to
132 * provide backing storage for the zero-filled virtual
133 * memory objects. The pager memory objects associated
134 * with these temporary virtual memory objects are only
135 * requested from the default memory manager when it
136 * becomes necessary. Virtual memory objects
137 * that depend on the default memory manager are called
138 * "internal". The "pager_created" field is provided to
139 * indicate whether these ports have ever been allocated.
140 *
141 * The kernel may also create virtual memory objects to
142 * hold changed pages after a copy-on-write operation.
143 * In this case, the virtual memory object (and its
144 * backing storage -- its memory object) only contain
145 * those pages that have been changed. The "shadow"
146 * field refers to the virtual memory object that contains
147 * the remainder of the contents. The "shadow_offset"
148 * field indicates where in the "shadow" these contents begin.
149 * The "copy" field refers to a virtual memory object
150 * to which changed pages must be copied before changing
151 * this object, in order to implement another form
152 * of copy-on-write optimization.
153 *
154 * The virtual memory object structure also records
155 * the attributes associated with its memory object.
156 * The "pager_ready", "can_persist" and "copy_strategy"
157 * fields represent those attributes. The "cached_list"
158 * field is used in the implementation of the persistence
159 * attribute.
160 *
161 * ZZZ Continue this comment.
162 */
163
164 /* Forward declarations for internal functions. */
165 static void _vm_object_allocate(
166 vm_object_size_t size,
167 vm_object_t object);
168
169 static kern_return_t vm_object_terminate(
170 vm_object_t object);
171
172 extern void vm_object_remove(
173 vm_object_t object);
174
175 static vm_object_t vm_object_cache_trim(
176 boolean_t called_from_vm_object_deallocate);
177
178 static void vm_object_deactivate_all_pages(
179 vm_object_t object);
180
181 static void vm_object_abort_activity(
182 vm_object_t object);
183
184 static kern_return_t vm_object_copy_call(
185 vm_object_t src_object,
186 vm_object_offset_t src_offset,
187 vm_object_size_t size,
188 vm_object_t *_result_object);
189
190 static void vm_object_do_collapse(
191 vm_object_t object,
192 vm_object_t backing_object);
193
194 static void vm_object_do_bypass(
195 vm_object_t object,
196 vm_object_t backing_object);
197
198 static void vm_object_release_pager(
199 memory_object_t pager);
200
201 static zone_t vm_object_zone; /* vm backing store zone */
202
203 /*
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
206 */
207 static struct vm_object kernel_object_store;
208 __private_extern__ vm_object_t kernel_object = &kernel_object_store;
209
210 /*
211 * The submap object is used as a placeholder for vm_map_submap
212 * operations. The object is declared in vm_map.c because it
213 * is exported by the vm_map module. The storage is declared
214 * here because it must be initialized here.
215 */
216 static struct vm_object vm_submap_object_store;
217
218 /*
219 * Virtual memory objects are initialized from
220 * a template (see vm_object_allocate).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see _vm_object_allocate()).
225 */
226 static struct vm_object vm_object_template;
227
228 /*
229 * Virtual memory objects that are not referenced by
230 * any address maps, but that are allowed to persist
231 * (an attribute specified by the associated memory manager),
232 * are kept in a queue (vm_object_cached_list).
233 *
234 * When an object from this queue is referenced again,
235 * for example to make another address space mapping,
236 * it must be removed from the queue. That is, the
237 * queue contains *only* objects with zero references.
238 *
239 * The kernel may choose to terminate objects from this
240 * queue in order to reclaim storage. The current policy
241 * is to permit a fixed maximum number of unreferenced
242 * objects (vm_object_cached_max).
243 *
244 * A spin lock (accessed by routines
245 * vm_object_cache_{lock,lock_try,unlock}) governs the
246 * object cache. It must be held when objects are
247 * added to or removed from the cache (in vm_object_terminate).
248 * The routines that acquire a reference to a virtual
249 * memory object based on one of the memory object ports
250 * must also lock the cache.
251 *
252 * Ideally, the object cache should be more isolated
253 * from the reference mechanism, so that the lock need
254 * not be held to make simple references.
255 */
256 static queue_head_t vm_object_cached_list;
257 static int vm_object_cached_count=0;
258 static int vm_object_cached_high; /* highest # cached objects */
259 static int vm_object_cached_max = 512; /* may be patched*/
260
261 static decl_mutex_data(,vm_object_cached_lock_data)
262
263 #define vm_object_cache_lock() \
264 mutex_lock(&vm_object_cached_lock_data)
265 #define vm_object_cache_lock_try() \
266 mutex_try(&vm_object_cached_lock_data)
267 #define vm_object_cache_unlock() \
268 mutex_unlock(&vm_object_cached_lock_data)
269
270 #define VM_OBJECT_HASH_COUNT 1024
271 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
272 static struct zone *vm_object_hash_zone;
273
274 struct vm_object_hash_entry {
275 queue_chain_t hash_link; /* hash chain link */
276 memory_object_t pager; /* pager we represent */
277 vm_object_t object; /* corresponding object */
278 boolean_t waiting; /* someone waiting for
279 * termination */
280 };
281
282 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
283 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
284
285 #define VM_OBJECT_HASH_SHIFT 8
286 #define vm_object_hash(pager) \
287 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
288
289 /*
290 * vm_object_hash_lookup looks up a pager in the hashtable
291 * and returns the corresponding entry, with optional removal.
292 */
293
294 static vm_object_hash_entry_t
295 vm_object_hash_lookup(
296 memory_object_t pager,
297 boolean_t remove_entry)
298 {
299 register queue_t bucket;
300 register vm_object_hash_entry_t entry;
301
302 bucket = &vm_object_hashtable[vm_object_hash(pager)];
303
304 entry = (vm_object_hash_entry_t)queue_first(bucket);
305 while (!queue_end(bucket, (queue_entry_t)entry)) {
306 if (entry->pager == pager && !remove_entry)
307 return(entry);
308 else if (entry->pager == pager) {
309 queue_remove(bucket, entry,
310 vm_object_hash_entry_t, hash_link);
311 return(entry);
312 }
313
314 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
315 }
316
317 return(VM_OBJECT_HASH_ENTRY_NULL);
318 }
319
320 /*
321 * vm_object_hash_enter enters the specified
322 * pager / cache object association in the hashtable.
323 */
324
325 static void
326 vm_object_hash_insert(
327 vm_object_hash_entry_t entry)
328 {
329 register queue_t bucket;
330
331 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
332
333 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
334 }
335
336 static vm_object_hash_entry_t
337 vm_object_hash_entry_alloc(
338 memory_object_t pager)
339 {
340 vm_object_hash_entry_t entry;
341
342 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
343 entry->pager = pager;
344 entry->object = VM_OBJECT_NULL;
345 entry->waiting = FALSE;
346
347 return(entry);
348 }
349
350 void
351 vm_object_hash_entry_free(
352 vm_object_hash_entry_t entry)
353 {
354 zfree(vm_object_hash_zone, (vm_offset_t)entry);
355 }
356
357 /*
358 * vm_object_allocate:
359 *
360 * Returns a new object with the given size.
361 */
362
363 static void
364 _vm_object_allocate(
365 vm_object_size_t size,
366 vm_object_t object)
367 {
368 XPR(XPR_VM_OBJECT,
369 "vm_object_allocate, object 0x%X size 0x%X\n",
370 (integer_t)object, size, 0,0,0);
371
372 *object = vm_object_template;
373 queue_init(&object->memq);
374 queue_init(&object->msr_q);
375 #ifdef UBC_DEBUG
376 queue_init(&object->uplq);
377 #endif /* UBC_DEBUG */
378 vm_object_lock_init(object);
379 object->size = size;
380 }
381
382 __private_extern__ vm_object_t
383 vm_object_allocate(
384 vm_object_size_t size)
385 {
386 register vm_object_t object;
387
388 object = (vm_object_t) zalloc(vm_object_zone);
389
390 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
391
392 if (object != VM_OBJECT_NULL)
393 _vm_object_allocate(size, object);
394
395 return object;
396 }
397
398 /*
399 * vm_object_bootstrap:
400 *
401 * Initialize the VM objects module.
402 */
403 __private_extern__ void
404 vm_object_bootstrap(void)
405 {
406 register i;
407
408 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
409 round_page(512*1024),
410 round_page(12*1024),
411 "vm objects");
412
413 queue_init(&vm_object_cached_list);
414 mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE);
415
416 vm_object_hash_zone =
417 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
418 round_page(512*1024),
419 round_page(12*1024),
420 "vm object hash entries");
421
422 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
423 queue_init(&vm_object_hashtable[i]);
424
425 /*
426 * Fill in a template object, for quick initialization
427 */
428
429 /* memq; Lock; init after allocation */
430 vm_object_template.size = 0;
431 vm_object_template.frozen_size = 0;
432 vm_object_template.ref_count = 1;
433 #if TASK_SWAPPER
434 vm_object_template.res_count = 1;
435 #endif /* TASK_SWAPPER */
436 vm_object_template.resident_page_count = 0;
437 vm_object_template.copy = VM_OBJECT_NULL;
438 vm_object_template.shadow = VM_OBJECT_NULL;
439 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
440 vm_object_template.cow_hint = 0;
441 vm_object_template.true_share = FALSE;
442
443 vm_object_template.pager = MEMORY_OBJECT_NULL;
444 vm_object_template.paging_offset = 0;
445 vm_object_template.pager_request = PAGER_REQUEST_NULL;
446 /* msr_q; init after allocation */
447
448 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
449 vm_object_template.absent_count = 0;
450 vm_object_template.paging_in_progress = 0;
451
452 /* Begin bitfields */
453 vm_object_template.all_wanted = 0; /* all bits FALSE */
454 vm_object_template.pager_created = FALSE;
455 vm_object_template.pager_initialized = FALSE;
456 vm_object_template.pager_ready = FALSE;
457 vm_object_template.pager_trusted = FALSE;
458 vm_object_template.can_persist = FALSE;
459 vm_object_template.internal = TRUE;
460 vm_object_template.temporary = TRUE;
461 vm_object_template.private = FALSE;
462 vm_object_template.pageout = FALSE;
463 vm_object_template.alive = TRUE;
464 vm_object_template.lock_in_progress = FALSE;
465 vm_object_template.lock_restart = FALSE;
466 vm_object_template.silent_overwrite = FALSE;
467 vm_object_template.advisory_pageout = FALSE;
468 vm_object_template.shadowed = FALSE;
469 vm_object_template.terminating = FALSE;
470 vm_object_template.shadow_severed = FALSE;
471 vm_object_template.phys_contiguous = FALSE;
472 vm_object_template.nophyscache = FALSE;
473 /* End bitfields */
474
475 /* cache bitfields */
476 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
477
478 /* cached_list; init after allocation */
479 vm_object_template.last_alloc = (vm_object_offset_t) 0;
480 vm_object_template.cluster_size = 0;
481 #if MACH_PAGEMAP
482 vm_object_template.existence_map = VM_EXTERNAL_NULL;
483 #endif /* MACH_PAGEMAP */
484 #if MACH_ASSERT
485 vm_object_template.paging_object = VM_OBJECT_NULL;
486 #endif /* MACH_ASSERT */
487
488 /*
489 * Initialize the "kernel object"
490 */
491
492 kernel_object = &kernel_object_store;
493
494 /*
495 * Note that in the following size specifications, we need to add 1 because
496 * VM_MAX_KERNEL_ADDRESS is a maximum address, not a size.
497 */
498 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
499 kernel_object);
500
501 /*
502 * Initialize the "submap object". Make it as large as the
503 * kernel object so that no limit is imposed on submap sizes.
504 */
505
506 vm_submap_object = &vm_submap_object_store;
507 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
508 vm_submap_object);
509 /*
510 * Create an "extra" reference to this object so that we never
511 * try to deallocate it; zfree doesn't like to be called with
512 * non-zone memory.
513 */
514 vm_object_reference(vm_submap_object);
515
516 #if MACH_PAGEMAP
517 vm_external_module_initialize();
518 #endif /* MACH_PAGEMAP */
519 }
520
521 __private_extern__ void
522 vm_object_init(void)
523 {
524 /*
525 * Finish initializing the kernel object.
526 */
527 }
528
529 /* remove the typedef below when emergency work-around is taken out */
530 typedef struct vnode_pager {
531 memory_object_t pager;
532 memory_object_t pager_handle; /* pager */
533 memory_object_control_t control_handle; /* memory object's control handle */
534 void *vnode_handle; /* vnode handle */
535 } *vnode_pager_t;
536
537 #define MIGHT_NOT_CACHE_SHADOWS 1
538 #if MIGHT_NOT_CACHE_SHADOWS
539 static int cache_shadows = TRUE;
540 #endif /* MIGHT_NOT_CACHE_SHADOWS */
541
542 /*
543 * vm_object_deallocate:
544 *
545 * Release a reference to the specified object,
546 * gained either through a vm_object_allocate
547 * or a vm_object_reference call. When all references
548 * are gone, storage associated with this object
549 * may be relinquished.
550 *
551 * No object may be locked.
552 */
553 __private_extern__ void
554 vm_object_deallocate(
555 register vm_object_t object)
556 {
557 boolean_t retry_cache_trim = FALSE;
558 vm_object_t shadow;
559
560 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
561 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
562
563
564 while (object != VM_OBJECT_NULL) {
565
566 /*
567 * The cache holds a reference (uncounted) to
568 * the object; we must lock it before removing
569 * the object.
570 */
571
572 vm_object_cache_lock();
573 vm_object_lock(object);
574
575 assert(object->ref_count > 0);
576
577 /*
578 * If the object has a named reference, and only
579 * that reference would remain, inform the pager
580 * about the last "mapping" reference going away.
581 */
582 if ((object->ref_count == 2) && (object->named)) {
583 memory_object_t pager = object->pager;
584
585 /* Notify the Pager that there are no */
586 /* more mappers for this object */
587
588 if (pager != MEMORY_OBJECT_NULL) {
589 vm_object_unlock(object);
590 vm_object_cache_unlock();
591
592 memory_object_unmap(pager);
593
594 vm_object_cache_lock();
595 vm_object_lock(object);
596 assert(object->ref_count > 0);
597 }
598 }
599
600 /*
601 * Lose the reference. If other references
602 * remain, then we are done, unless we need
603 * to retry a cache trim.
604 * If it is the last reference, then keep it
605 * until any pending initialization is completed.
606 */
607
608 /* if the object is terminating, it cannot go into */
609 /* the cache and we obviously should not call */
610 /* terminate again. */
611
612 if ((object->ref_count > 1) || object->terminating) {
613 object->ref_count--;
614 vm_object_res_deallocate(object);
615 vm_object_unlock(object);
616 vm_object_cache_unlock();
617 if (retry_cache_trim &&
618 ((object = vm_object_cache_trim(TRUE)) !=
619 VM_OBJECT_NULL)) {
620 continue;
621 }
622 return;
623 }
624
625 /*
626 * We have to wait for initialization
627 * before destroying or caching the object.
628 */
629
630 if (object->pager_created && ! object->pager_initialized) {
631 assert(! object->can_persist);
632 vm_object_assert_wait(object,
633 VM_OBJECT_EVENT_INITIALIZED,
634 THREAD_UNINT);
635 vm_object_unlock(object);
636 vm_object_cache_unlock();
637 thread_block(THREAD_CONTINUE_NULL);
638 continue;
639 }
640
641 /*
642 * If this object can persist, then enter it in
643 * the cache. Otherwise, terminate it.
644 *
645 * NOTE: Only permanent objects are cached, and
646 * permanent objects cannot have shadows. This
647 * affects the residence counting logic in a minor
648 * way (can do it in-line, mostly).
649 */
650
651 if ((object->can_persist) && (object->alive)) {
652 /*
653 * Now it is safe to decrement reference count,
654 * and to return if reference count is > 0.
655 */
656 if (--object->ref_count > 0) {
657 vm_object_res_deallocate(object);
658 vm_object_unlock(object);
659 vm_object_cache_unlock();
660 if (retry_cache_trim &&
661 ((object = vm_object_cache_trim(TRUE)) !=
662 VM_OBJECT_NULL)) {
663 continue;
664 }
665 return;
666 }
667
668 #if MIGHT_NOT_CACHE_SHADOWS
669 /*
670 * Remove shadow now if we don't
671 * want to cache shadows.
672 */
673 if (! cache_shadows) {
674 shadow = object->shadow;
675 object->shadow = VM_OBJECT_NULL;
676 }
677 #endif /* MIGHT_NOT_CACHE_SHADOWS */
678
679 /*
680 * Enter the object onto the queue of
681 * cached objects, and deactivate
682 * all of its pages.
683 */
684 assert(object->shadow == VM_OBJECT_NULL);
685 VM_OBJ_RES_DECR(object);
686 XPR(XPR_VM_OBJECT,
687 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
688 (integer_t)object,
689 (integer_t)vm_object_cached_list.next,
690 (integer_t)vm_object_cached_list.prev,0,0);
691
692 vm_object_cached_count++;
693 if (vm_object_cached_count > vm_object_cached_high)
694 vm_object_cached_high = vm_object_cached_count;
695 queue_enter(&vm_object_cached_list, object,
696 vm_object_t, cached_list);
697 vm_object_cache_unlock();
698 vm_object_deactivate_all_pages(object);
699 vm_object_unlock(object);
700
701 #if MIGHT_NOT_CACHE_SHADOWS
702 /*
703 * If we have a shadow that we need
704 * to deallocate, do so now, remembering
705 * to trim the cache later.
706 */
707 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
708 object = shadow;
709 retry_cache_trim = TRUE;
710 continue;
711 }
712 #endif /* MIGHT_NOT_CACHE_SHADOWS */
713
714 /*
715 * Trim the cache. If the cache trim
716 * returns with a shadow for us to deallocate,
717 * then remember to retry the cache trim
718 * when we are done deallocating the shadow.
719 * Otherwise, we are done.
720 */
721
722 object = vm_object_cache_trim(TRUE);
723 if (object == VM_OBJECT_NULL) {
724 return;
725 }
726 retry_cache_trim = TRUE;
727
728 } else {
729 /*
730 * This object is not cachable; terminate it.
731 */
732 XPR(XPR_VM_OBJECT,
733 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n",
734 (integer_t)object, object->resident_page_count,
735 object->paging_in_progress,
736 (natural_t)current_thread(),object->ref_count);
737
738 VM_OBJ_RES_DECR(object); /* XXX ? */
739 /*
740 * Terminate this object. If it had a shadow,
741 * then deallocate it; otherwise, if we need
742 * to retry a cache trim, do so now; otherwise,
743 * we are done. "pageout" objects have a shadow,
744 * but maintain a "paging reference" rather than
745 * a normal reference.
746 */
747 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
748 if(vm_object_terminate(object) != KERN_SUCCESS) {
749 return;
750 }
751 if (shadow != VM_OBJECT_NULL) {
752 object = shadow;
753 continue;
754 }
755 if (retry_cache_trim &&
756 ((object = vm_object_cache_trim(TRUE)) !=
757 VM_OBJECT_NULL)) {
758 continue;
759 }
760 return;
761 }
762 }
763 assert(! retry_cache_trim);
764 }
765
766 /*
767 * Check to see whether we really need to trim
768 * down the cache. If so, remove an object from
769 * the cache, terminate it, and repeat.
770 *
771 * Called with, and returns with, cache lock unlocked.
772 */
773 vm_object_t
774 vm_object_cache_trim(
775 boolean_t called_from_vm_object_deallocate)
776 {
777 register vm_object_t object = VM_OBJECT_NULL;
778 vm_object_t shadow;
779
780 for (;;) {
781
782 /*
783 * If we no longer need to trim the cache,
784 * then we are done.
785 */
786
787 vm_object_cache_lock();
788 if (vm_object_cached_count <= vm_object_cached_max) {
789 vm_object_cache_unlock();
790 return VM_OBJECT_NULL;
791 }
792
793 /*
794 * We must trim down the cache, so remove
795 * the first object in the cache.
796 */
797 XPR(XPR_VM_OBJECT,
798 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
799 (integer_t)vm_object_cached_list.next,
800 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
801
802 object = (vm_object_t) queue_first(&vm_object_cached_list);
803 if(object == (vm_object_t) &vm_object_cached_list) {
804 /* something's wrong with the calling parameter or */
805 /* the value of vm_object_cached_count, just fix */
806 /* and return */
807 if(vm_object_cached_max < 0)
808 vm_object_cached_max = 0;
809 vm_object_cached_count = 0;
810 vm_object_cache_unlock();
811 return VM_OBJECT_NULL;
812 }
813 vm_object_lock(object);
814 queue_remove(&vm_object_cached_list, object, vm_object_t,
815 cached_list);
816 vm_object_cached_count--;
817
818 /*
819 * Since this object is in the cache, we know
820 * that it is initialized and has no references.
821 * Take a reference to avoid recursive deallocations.
822 */
823
824 assert(object->pager_initialized);
825 assert(object->ref_count == 0);
826 object->ref_count++;
827
828 /*
829 * Terminate the object.
830 * If the object had a shadow, we let vm_object_deallocate
831 * deallocate it. "pageout" objects have a shadow, but
832 * maintain a "paging reference" rather than a normal
833 * reference.
834 * (We are careful here to limit recursion.)
835 */
836 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
837 if(vm_object_terminate(object) != KERN_SUCCESS)
838 continue;
839 if (shadow != VM_OBJECT_NULL) {
840 if (called_from_vm_object_deallocate) {
841 return shadow;
842 } else {
843 vm_object_deallocate(shadow);
844 }
845 }
846 }
847 }
848
849 boolean_t vm_object_terminate_remove_all = FALSE;
850
851 /*
852 * Routine: vm_object_terminate
853 * Purpose:
854 * Free all resources associated with a vm_object.
855 * In/out conditions:
856 * Upon entry, the object must be locked,
857 * and the object must have exactly one reference.
858 *
859 * The shadow object reference is left alone.
860 *
861 * The object must be unlocked if its found that pages
862 * must be flushed to a backing object. If someone
863 * manages to map the object while it is being flushed
864 * the object is returned unlocked and unchanged. Otherwise,
865 * upon exit, the cache will be unlocked, and the
866 * object will cease to exist.
867 */
868 static kern_return_t
869 vm_object_terminate(
870 register vm_object_t object)
871 {
872 memory_object_t pager;
873 register vm_page_t p;
874 vm_object_t shadow_object;
875
876 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
877 (integer_t)object, object->ref_count, 0, 0, 0);
878
879 if (!object->pageout && (!object->temporary || object->can_persist)
880 && (object->pager != NULL || object->shadow_severed)) {
881 vm_object_cache_unlock();
882 while (!queue_empty(&object->memq)) {
883 /*
884 * Clear pager_trusted bit so that the pages get yanked
885 * out of the object instead of cleaned in place. This
886 * prevents a deadlock in XMM and makes more sense anyway.
887 */
888 object->pager_trusted = FALSE;
889
890 p = (vm_page_t) queue_first(&object->memq);
891
892 VM_PAGE_CHECK(p);
893
894 if (p->busy || p->cleaning) {
895 if(p->cleaning || p->absent) {
896 vm_object_paging_wait(object, THREAD_UNINT);
897 continue;
898 } else {
899 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
900 }
901 }
902
903 vm_page_lock_queues();
904 VM_PAGE_QUEUES_REMOVE(p);
905 vm_page_unlock_queues();
906
907 if (p->absent || p->private) {
908
909 /*
910 * For private pages, VM_PAGE_FREE just
911 * leaves the page structure around for
912 * its owner to clean up. For absent
913 * pages, the structure is returned to
914 * the appropriate pool.
915 */
916
917 goto free_page;
918 }
919
920 if (p->fictitious)
921 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
922
923 if (!p->dirty)
924 p->dirty = pmap_is_modified(p->phys_addr);
925
926 if ((p->dirty || p->precious) && !p->error && object->alive) {
927 p->busy = TRUE;
928 vm_object_paging_begin(object);
929 /* protect the object from re-use/caching while it */
930 /* is unlocked */
931 vm_object_unlock(object);
932 vm_pageout_cluster(p); /* flush page */
933 vm_object_lock(object);
934 vm_object_paging_wait(object, THREAD_UNINT);
935 XPR(XPR_VM_OBJECT,
936 "vm_object_terminate restart, object 0x%X ref %d\n",
937 (integer_t)object, object->ref_count, 0, 0, 0);
938 } else {
939 free_page:
940 VM_PAGE_FREE(p);
941 }
942 }
943 vm_object_unlock(object);
944 vm_object_cache_lock();
945 vm_object_lock(object);
946 }
947
948 /*
949 * Make sure the object isn't already being terminated
950 */
951 if(object->terminating) {
952 object->ref_count -= 1;
953 assert(object->ref_count > 0);
954 vm_object_cache_unlock();
955 vm_object_unlock(object);
956 return KERN_FAILURE;
957 }
958
959 /*
960 * Did somebody get a reference to the object while we were
961 * cleaning it?
962 */
963 if(object->ref_count != 1) {
964 object->ref_count -= 1;
965 assert(object->ref_count > 0);
966 vm_object_res_deallocate(object);
967 vm_object_cache_unlock();
968 vm_object_unlock(object);
969 return KERN_FAILURE;
970 }
971
972 /*
973 * Make sure no one can look us up now.
974 */
975
976 object->terminating = TRUE;
977 object->alive = FALSE;
978 vm_object_remove(object);
979
980 /*
981 * Detach the object from its shadow if we are the shadow's
982 * copy.
983 */
984 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
985 !(object->pageout)) {
986 vm_object_lock(shadow_object);
987 assert((shadow_object->copy == object) ||
988 (shadow_object->copy == VM_OBJECT_NULL));
989 shadow_object->copy = VM_OBJECT_NULL;
990 vm_object_unlock(shadow_object);
991 }
992
993 /*
994 * The pageout daemon might be playing with our pages.
995 * Now that the object is dead, it won't touch any more
996 * pages, but some pages might already be on their way out.
997 * Hence, we wait until the active paging activities have ceased
998 * before we break the association with the pager itself.
999 */
1000 while (object->paging_in_progress != 0) {
1001 vm_object_cache_unlock();
1002 vm_object_wait(object,
1003 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1004 THREAD_UNINT);
1005 vm_object_cache_lock();
1006 vm_object_lock(object);
1007 }
1008
1009 pager = object->pager;
1010 object->pager = MEMORY_OBJECT_NULL;
1011
1012 if (pager != MEMORY_OBJECT_NULL)
1013 memory_object_control_disable(object->pager_request);
1014 vm_object_cache_unlock();
1015
1016 object->ref_count--;
1017 #if TASK_SWAPPER
1018 assert(object->res_count == 0);
1019 #endif /* TASK_SWAPPER */
1020
1021 assert (object->ref_count == 0);
1022
1023 /*
1024 * Clean or free the pages, as appropriate.
1025 * It is possible for us to find busy/absent pages,
1026 * if some faults on this object were aborted.
1027 */
1028 if (object->pageout) {
1029 assert(shadow_object != VM_OBJECT_NULL);
1030 assert(shadow_object == object->shadow);
1031
1032 vm_pageout_object_terminate(object);
1033
1034 } else if ((object->temporary && !object->can_persist) ||
1035 (pager == MEMORY_OBJECT_NULL)) {
1036 while (!queue_empty(&object->memq)) {
1037 p = (vm_page_t) queue_first(&object->memq);
1038
1039 VM_PAGE_CHECK(p);
1040 VM_PAGE_FREE(p);
1041 }
1042 } else if (!queue_empty(&object->memq)) {
1043 panic("vm_object_terminate: queue just emptied isn't");
1044 }
1045
1046 assert(object->paging_in_progress == 0);
1047 assert(object->ref_count == 0);
1048
1049 /*
1050 * If the pager has not already been released by
1051 * vm_object_destroy, we need to terminate it and
1052 * release our reference to it here.
1053 */
1054 if (pager != MEMORY_OBJECT_NULL) {
1055 vm_object_unlock(object);
1056 vm_object_release_pager(pager);
1057 vm_object_lock(object);
1058 }
1059
1060 /* kick off anyone waiting on terminating */
1061 object->terminating = FALSE;
1062 vm_object_paging_begin(object);
1063 vm_object_paging_end(object);
1064 vm_object_unlock(object);
1065
1066 #if MACH_PAGEMAP
1067 vm_external_destroy(object->existence_map, object->size);
1068 #endif /* MACH_PAGEMAP */
1069
1070 /*
1071 * Free the space for the object.
1072 */
1073 zfree(vm_object_zone, (vm_offset_t) object);
1074 return KERN_SUCCESS;
1075 }
1076
1077 /*
1078 * Routine: vm_object_pager_wakeup
1079 * Purpose: Wake up anyone waiting for termination of a pager.
1080 */
1081
1082 static void
1083 vm_object_pager_wakeup(
1084 memory_object_t pager)
1085 {
1086 vm_object_hash_entry_t entry;
1087 boolean_t waiting = FALSE;
1088
1089 /*
1090 * If anyone was waiting for the memory_object_terminate
1091 * to be queued, wake them up now.
1092 */
1093 vm_object_cache_lock();
1094 entry = vm_object_hash_lookup(pager, TRUE);
1095 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1096 waiting = entry->waiting;
1097 vm_object_cache_unlock();
1098 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1099 if (waiting)
1100 thread_wakeup((event_t) pager);
1101 vm_object_hash_entry_free(entry);
1102 }
1103 }
1104
1105 /*
1106 * Routine: vm_object_release_pager
1107 * Purpose: Terminate the pager and, upon completion,
1108 * release our last reference to it.
1109 * just like memory_object_terminate, except
1110 * that we wake up anyone blocked in vm_object_enter
1111 * waiting for termination message to be queued
1112 * before calling memory_object_init.
1113 */
1114 static void
1115 vm_object_release_pager(
1116 memory_object_t pager)
1117 {
1118
1119 /*
1120 * Terminate the pager.
1121 */
1122
1123 (void) memory_object_terminate(pager);
1124
1125 /*
1126 * Wakeup anyone waiting for this terminate
1127 */
1128 vm_object_pager_wakeup(pager);
1129
1130 /*
1131 * Release reference to pager.
1132 */
1133 memory_object_deallocate(pager);
1134 }
1135
1136 /*
1137 * Routine: vm_object_abort_activity [internal use only]
1138 * Purpose:
1139 * Abort paging requests pending on this object.
1140 * In/out conditions:
1141 * The object is locked on entry and exit.
1142 */
1143 static void
1144 vm_object_abort_activity(
1145 vm_object_t object)
1146 {
1147 register
1148 vm_page_t p;
1149 vm_page_t next;
1150
1151 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
1152 (integer_t)object, 0, 0, 0, 0);
1153
1154 /*
1155 * Abort all activity that would be waiting
1156 * for a result on this memory object.
1157 *
1158 * We could also choose to destroy all pages
1159 * that we have in memory for this object, but
1160 * we don't.
1161 */
1162
1163 p = (vm_page_t) queue_first(&object->memq);
1164 while (!queue_end(&object->memq, (queue_entry_t) p)) {
1165 next = (vm_page_t) queue_next(&p->listq);
1166
1167 /*
1168 * If it's being paged in, destroy it.
1169 * If an unlock has been requested, start it again.
1170 */
1171
1172 if (p->busy && p->absent) {
1173 VM_PAGE_FREE(p);
1174 }
1175 else {
1176 if (p->unlock_request != VM_PROT_NONE)
1177 p->unlock_request = VM_PROT_NONE;
1178 PAGE_WAKEUP(p);
1179 }
1180
1181 p = next;
1182 }
1183
1184 /*
1185 * Wake up threads waiting for the memory object to
1186 * become ready.
1187 */
1188
1189 object->pager_ready = TRUE;
1190 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1191 }
1192
1193 /*
1194 * Routine: vm_object_destroy
1195 * Purpose:
1196 * Shut down a VM object, despite the
1197 * presence of address map (or other) references
1198 * to the vm_object.
1199 */
1200 kern_return_t
1201 vm_object_destroy(
1202 vm_object_t object,
1203 kern_return_t reason)
1204 {
1205 memory_object_t old_pager;
1206
1207 if (object == VM_OBJECT_NULL)
1208 return(KERN_SUCCESS);
1209
1210 /*
1211 * Remove the pager association immediately.
1212 *
1213 * This will prevent the memory manager from further
1214 * meddling. [If it wanted to flush data or make
1215 * other changes, it should have done so before performing
1216 * the destroy call.]
1217 */
1218
1219 vm_object_cache_lock();
1220 vm_object_lock(object);
1221 object->can_persist = FALSE;
1222 object->named = FALSE;
1223 object->alive = FALSE;
1224
1225 /*
1226 * Rip out the pager from the vm_object now...
1227 */
1228
1229 vm_object_remove(object);
1230 old_pager = object->pager;
1231 object->pager = MEMORY_OBJECT_NULL;
1232 if (old_pager != MEMORY_OBJECT_NULL)
1233 memory_object_control_disable(object->pager_request);
1234 vm_object_cache_unlock();
1235
1236 /*
1237 * Wait for the existing paging activity (that got
1238 * through before we nulled out the pager) to subside.
1239 */
1240
1241 vm_object_paging_wait(object, THREAD_UNINT);
1242 vm_object_unlock(object);
1243
1244 /*
1245 * Terminate the object now.
1246 */
1247 if (old_pager != MEMORY_OBJECT_NULL) {
1248 vm_object_release_pager(old_pager);
1249
1250 /*
1251 * JMM - Release the caller's reference. This assumes the
1252 * caller had a reference to release, which is a big (but
1253 * currently valid) assumption if this is driven from the
1254 * vnode pager (it is holding a named reference when making
1255 * this call)..
1256 */
1257 vm_object_deallocate(object);
1258
1259 }
1260 return(KERN_SUCCESS);
1261 }
1262
1263 /*
1264 * vm_object_deactivate_pages
1265 *
1266 * Deactivate all pages in the specified object. (Keep its pages
1267 * in memory even though it is no longer referenced.)
1268 *
1269 * The object must be locked.
1270 */
1271 static void
1272 vm_object_deactivate_all_pages(
1273 register vm_object_t object)
1274 {
1275 register vm_page_t p;
1276
1277 queue_iterate(&object->memq, p, vm_page_t, listq) {
1278 vm_page_lock_queues();
1279 if (!p->busy)
1280 vm_page_deactivate(p);
1281 vm_page_unlock_queues();
1282 }
1283 }
1284
1285 __private_extern__ void
1286 vm_object_deactivate_pages(
1287 vm_object_t object,
1288 vm_object_offset_t offset,
1289 vm_object_size_t size,
1290 boolean_t kill_page)
1291 {
1292 vm_object_t orig_object;
1293 int pages_moved = 0;
1294 int pages_found = 0;
1295
1296 /*
1297 * entered with object lock held, acquire a paging reference to
1298 * prevent the memory_object and control ports from
1299 * being destroyed.
1300 */
1301 orig_object = object;
1302
1303 for (;;) {
1304 register vm_page_t m;
1305 vm_object_offset_t toffset;
1306 vm_object_size_t tsize;
1307
1308 vm_object_paging_begin(object);
1309 vm_page_lock_queues();
1310
1311 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1312
1313 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1314
1315 pages_found++;
1316
1317 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1318
1319 m->reference = FALSE;
1320 pmap_clear_reference(m->phys_addr);
1321
1322 if ((kill_page) && (object->internal)) {
1323 m->precious = FALSE;
1324 m->dirty = FALSE;
1325 pmap_clear_modify(m->phys_addr);
1326 vm_external_state_clr(object->existence_map, offset);
1327 }
1328 VM_PAGE_QUEUES_REMOVE(m);
1329
1330 if(m->zero_fill) {
1331 queue_enter_first(
1332 &vm_page_queue_zf,
1333 m, vm_page_t, pageq);
1334 } else {
1335 queue_enter_first(
1336 &vm_page_queue_inactive,
1337 m, vm_page_t, pageq);
1338 }
1339
1340 m->inactive = TRUE;
1341 if (!m->fictitious)
1342 vm_page_inactive_count++;
1343
1344 pages_moved++;
1345 }
1346 }
1347 }
1348 vm_page_unlock_queues();
1349 vm_object_paging_end(object);
1350
1351 if (object->shadow) {
1352 vm_object_t tmp_object;
1353
1354 kill_page = 0;
1355
1356 offset += object->shadow_offset;
1357
1358 tmp_object = object->shadow;
1359 vm_object_lock(tmp_object);
1360
1361 if (object != orig_object)
1362 vm_object_unlock(object);
1363 object = tmp_object;
1364 } else
1365 break;
1366 }
1367 if (object != orig_object)
1368 vm_object_unlock(object);
1369 }
1370
1371 /*
1372 * Routine: vm_object_pmap_protect
1373 *
1374 * Purpose:
1375 * Reduces the permission for all physical
1376 * pages in the specified object range.
1377 *
1378 * If removing write permission only, it is
1379 * sufficient to protect only the pages in
1380 * the top-level object; only those pages may
1381 * have write permission.
1382 *
1383 * If removing all access, we must follow the
1384 * shadow chain from the top-level object to
1385 * remove access to all pages in shadowed objects.
1386 *
1387 * The object must *not* be locked. The object must
1388 * be temporary/internal.
1389 *
1390 * If pmap is not NULL, this routine assumes that
1391 * the only mappings for the pages are in that
1392 * pmap.
1393 */
1394
1395 __private_extern__ void
1396 vm_object_pmap_protect(
1397 register vm_object_t object,
1398 register vm_object_offset_t offset,
1399 vm_size_t size,
1400 pmap_t pmap,
1401 vm_offset_t pmap_start,
1402 vm_prot_t prot)
1403 {
1404 if (object == VM_OBJECT_NULL)
1405 return;
1406 size = round_page_64(size);
1407 offset = trunc_page_64(offset);
1408
1409 vm_object_lock(object);
1410
1411 assert(object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
1412
1413 while (TRUE) {
1414 if (object->resident_page_count > atop(size) / 2 &&
1415 pmap != PMAP_NULL) {
1416 vm_object_unlock(object);
1417 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1418 return;
1419 }
1420
1421 /* if we are doing large ranges with respect to resident */
1422 /* page count then we should interate over pages otherwise */
1423 /* inverse page look-up will be faster */
1424 if ((object->resident_page_count / 4) < atop(size)) {
1425 vm_page_t p;
1426 vm_object_offset_t end;
1427
1428 end = offset + size;
1429
1430 if (pmap != PMAP_NULL) {
1431 queue_iterate(&object->memq, p, vm_page_t, listq) {
1432 if (!p->fictitious &&
1433 (offset <= p->offset) && (p->offset < end)) {
1434
1435 vm_offset_t start = pmap_start +
1436 (vm_offset_t)(p->offset - offset);
1437
1438 pmap_protect(pmap, start, start + PAGE_SIZE, prot);
1439 }
1440 }
1441 } else {
1442 queue_iterate(&object->memq, p, vm_page_t, listq) {
1443 if (!p->fictitious &&
1444 (offset <= p->offset) && (p->offset < end)) {
1445
1446 pmap_page_protect(p->phys_addr,
1447 prot & ~p->page_lock);
1448 }
1449 }
1450 }
1451 } else {
1452 vm_page_t p;
1453 vm_object_offset_t end;
1454 vm_object_offset_t target_off;
1455
1456 end = offset + size;
1457
1458 if (pmap != PMAP_NULL) {
1459 for(target_off = offset;
1460 target_off < end; target_off += PAGE_SIZE) {
1461 if(p = vm_page_lookup(object, target_off)) {
1462 vm_offset_t start = pmap_start +
1463 (vm_offset_t)(p->offset - offset);
1464 pmap_protect(pmap, start,
1465 start + PAGE_SIZE, prot);
1466 }
1467 }
1468 } else {
1469 for(target_off = offset;
1470 target_off < end; target_off += PAGE_SIZE) {
1471 if(p = vm_page_lookup(object, target_off)) {
1472 pmap_page_protect(p->phys_addr,
1473 prot & ~p->page_lock);
1474 }
1475 }
1476 }
1477 }
1478
1479 if (prot == VM_PROT_NONE) {
1480 /*
1481 * Must follow shadow chain to remove access
1482 * to pages in shadowed objects.
1483 */
1484 register vm_object_t next_object;
1485
1486 next_object = object->shadow;
1487 if (next_object != VM_OBJECT_NULL) {
1488 offset += object->shadow_offset;
1489 vm_object_lock(next_object);
1490 vm_object_unlock(object);
1491 object = next_object;
1492 }
1493 else {
1494 /*
1495 * End of chain - we are done.
1496 */
1497 break;
1498 }
1499 }
1500 else {
1501 /*
1502 * Pages in shadowed objects may never have
1503 * write permission - we may stop here.
1504 */
1505 break;
1506 }
1507 }
1508
1509 vm_object_unlock(object);
1510 }
1511
1512 /*
1513 * Routine: vm_object_copy_slowly
1514 *
1515 * Description:
1516 * Copy the specified range of the source
1517 * virtual memory object without using
1518 * protection-based optimizations (such
1519 * as copy-on-write). The pages in the
1520 * region are actually copied.
1521 *
1522 * In/out conditions:
1523 * The caller must hold a reference and a lock
1524 * for the source virtual memory object. The source
1525 * object will be returned *unlocked*.
1526 *
1527 * Results:
1528 * If the copy is completed successfully, KERN_SUCCESS is
1529 * returned. If the caller asserted the interruptible
1530 * argument, and an interruption occurred while waiting
1531 * for a user-generated event, MACH_SEND_INTERRUPTED is
1532 * returned. Other values may be returned to indicate
1533 * hard errors during the copy operation.
1534 *
1535 * A new virtual memory object is returned in a
1536 * parameter (_result_object). The contents of this
1537 * new object, starting at a zero offset, are a copy
1538 * of the source memory region. In the event of
1539 * an error, this parameter will contain the value
1540 * VM_OBJECT_NULL.
1541 */
1542 __private_extern__ kern_return_t
1543 vm_object_copy_slowly(
1544 register vm_object_t src_object,
1545 vm_object_offset_t src_offset,
1546 vm_object_size_t size,
1547 boolean_t interruptible,
1548 vm_object_t *_result_object) /* OUT */
1549 {
1550 vm_object_t new_object;
1551 vm_object_offset_t new_offset;
1552
1553 vm_object_offset_t src_lo_offset = src_offset;
1554 vm_object_offset_t src_hi_offset = src_offset + size;
1555
1556 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1557 src_object, src_offset, size, 0, 0);
1558
1559 if (size == 0) {
1560 vm_object_unlock(src_object);
1561 *_result_object = VM_OBJECT_NULL;
1562 return(KERN_INVALID_ARGUMENT);
1563 }
1564
1565 /*
1566 * Prevent destruction of the source object while we copy.
1567 */
1568
1569 assert(src_object->ref_count > 0);
1570 src_object->ref_count++;
1571 VM_OBJ_RES_INCR(src_object);
1572 vm_object_unlock(src_object);
1573
1574 /*
1575 * Create a new object to hold the copied pages.
1576 * A few notes:
1577 * We fill the new object starting at offset 0,
1578 * regardless of the input offset.
1579 * We don't bother to lock the new object within
1580 * this routine, since we have the only reference.
1581 */
1582
1583 new_object = vm_object_allocate(size);
1584 new_offset = 0;
1585
1586 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1587
1588 for ( ;
1589 size != 0 ;
1590 src_offset += PAGE_SIZE_64,
1591 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1592 ) {
1593 vm_page_t new_page;
1594 vm_fault_return_t result;
1595
1596 while ((new_page = vm_page_alloc(new_object, new_offset))
1597 == VM_PAGE_NULL) {
1598 if (!vm_page_wait(interruptible)) {
1599 vm_object_deallocate(new_object);
1600 *_result_object = VM_OBJECT_NULL;
1601 return(MACH_SEND_INTERRUPTED);
1602 }
1603 }
1604
1605 do {
1606 vm_prot_t prot = VM_PROT_READ;
1607 vm_page_t _result_page;
1608 vm_page_t top_page;
1609 register
1610 vm_page_t result_page;
1611 kern_return_t error_code;
1612
1613 vm_object_lock(src_object);
1614 vm_object_paging_begin(src_object);
1615
1616 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1617 result = vm_fault_page(src_object, src_offset,
1618 VM_PROT_READ, FALSE, interruptible,
1619 src_lo_offset, src_hi_offset,
1620 VM_BEHAVIOR_SEQUENTIAL,
1621 &prot, &_result_page, &top_page,
1622 (int *)0,
1623 &error_code, FALSE, FALSE, NULL, 0);
1624
1625 switch(result) {
1626 case VM_FAULT_SUCCESS:
1627 result_page = _result_page;
1628
1629 /*
1630 * We don't need to hold the object
1631 * lock -- the busy page will be enough.
1632 * [We don't care about picking up any
1633 * new modifications.]
1634 *
1635 * Copy the page to the new object.
1636 *
1637 * POLICY DECISION:
1638 * If result_page is clean,
1639 * we could steal it instead
1640 * of copying.
1641 */
1642
1643 vm_object_unlock(result_page->object);
1644 vm_page_copy(result_page, new_page);
1645
1646 /*
1647 * Let go of both pages (make them
1648 * not busy, perform wakeup, activate).
1649 */
1650
1651 new_page->busy = FALSE;
1652 new_page->dirty = TRUE;
1653 vm_object_lock(result_page->object);
1654 PAGE_WAKEUP_DONE(result_page);
1655
1656 vm_page_lock_queues();
1657 if (!result_page->active &&
1658 !result_page->inactive)
1659 vm_page_activate(result_page);
1660 vm_page_activate(new_page);
1661 vm_page_unlock_queues();
1662
1663 /*
1664 * Release paging references and
1665 * top-level placeholder page, if any.
1666 */
1667
1668 vm_fault_cleanup(result_page->object,
1669 top_page);
1670
1671 break;
1672
1673 case VM_FAULT_RETRY:
1674 break;
1675
1676 case VM_FAULT_FICTITIOUS_SHORTAGE:
1677 vm_page_more_fictitious();
1678 break;
1679
1680 case VM_FAULT_MEMORY_SHORTAGE:
1681 if (vm_page_wait(interruptible))
1682 break;
1683 /* fall thru */
1684
1685 case VM_FAULT_INTERRUPTED:
1686 vm_page_free(new_page);
1687 vm_object_deallocate(new_object);
1688 vm_object_deallocate(src_object);
1689 *_result_object = VM_OBJECT_NULL;
1690 return(MACH_SEND_INTERRUPTED);
1691
1692 case VM_FAULT_MEMORY_ERROR:
1693 /*
1694 * A policy choice:
1695 * (a) ignore pages that we can't
1696 * copy
1697 * (b) return the null object if
1698 * any page fails [chosen]
1699 */
1700
1701 vm_page_lock_queues();
1702 vm_page_free(new_page);
1703 vm_page_unlock_queues();
1704 vm_object_deallocate(new_object);
1705 vm_object_deallocate(src_object);
1706 *_result_object = VM_OBJECT_NULL;
1707 return(error_code ? error_code:
1708 KERN_MEMORY_ERROR);
1709 }
1710 } while (result != VM_FAULT_SUCCESS);
1711 }
1712
1713 /*
1714 * Lose the extra reference, and return our object.
1715 */
1716
1717 vm_object_deallocate(src_object);
1718 *_result_object = new_object;
1719 return(KERN_SUCCESS);
1720 }
1721
1722 /*
1723 * Routine: vm_object_copy_quickly
1724 *
1725 * Purpose:
1726 * Copy the specified range of the source virtual
1727 * memory object, if it can be done without waiting
1728 * for user-generated events.
1729 *
1730 * Results:
1731 * If the copy is successful, the copy is returned in
1732 * the arguments; otherwise, the arguments are not
1733 * affected.
1734 *
1735 * In/out conditions:
1736 * The object should be unlocked on entry and exit.
1737 */
1738
1739 /*ARGSUSED*/
1740 __private_extern__ boolean_t
1741 vm_object_copy_quickly(
1742 vm_object_t *_object, /* INOUT */
1743 vm_object_offset_t offset, /* IN */
1744 vm_object_size_t size, /* IN */
1745 boolean_t *_src_needs_copy, /* OUT */
1746 boolean_t *_dst_needs_copy) /* OUT */
1747 {
1748 vm_object_t object = *_object;
1749 memory_object_copy_strategy_t copy_strategy;
1750
1751 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1752 *_object, offset, size, 0, 0);
1753 if (object == VM_OBJECT_NULL) {
1754 *_src_needs_copy = FALSE;
1755 *_dst_needs_copy = FALSE;
1756 return(TRUE);
1757 }
1758
1759 vm_object_lock(object);
1760
1761 copy_strategy = object->copy_strategy;
1762
1763 switch (copy_strategy) {
1764 case MEMORY_OBJECT_COPY_SYMMETRIC:
1765
1766 /*
1767 * Symmetric copy strategy.
1768 * Make another reference to the object.
1769 * Leave object/offset unchanged.
1770 */
1771
1772 assert(object->ref_count > 0);
1773 object->ref_count++;
1774 vm_object_res_reference(object);
1775 object->shadowed = TRUE;
1776 vm_object_unlock(object);
1777
1778 /*
1779 * Both source and destination must make
1780 * shadows, and the source must be made
1781 * read-only if not already.
1782 */
1783
1784 *_src_needs_copy = TRUE;
1785 *_dst_needs_copy = TRUE;
1786
1787 break;
1788
1789 case MEMORY_OBJECT_COPY_DELAY:
1790 vm_object_unlock(object);
1791 return(FALSE);
1792
1793 default:
1794 vm_object_unlock(object);
1795 return(FALSE);
1796 }
1797 return(TRUE);
1798 }
1799
1800 static int copy_call_count = 0;
1801 static int copy_call_sleep_count = 0;
1802 static int copy_call_restart_count = 0;
1803
1804 /*
1805 * Routine: vm_object_copy_call [internal]
1806 *
1807 * Description:
1808 * Copy the source object (src_object), using the
1809 * user-managed copy algorithm.
1810 *
1811 * In/out conditions:
1812 * The source object must be locked on entry. It
1813 * will be *unlocked* on exit.
1814 *
1815 * Results:
1816 * If the copy is successful, KERN_SUCCESS is returned.
1817 * A new object that represents the copied virtual
1818 * memory is returned in a parameter (*_result_object).
1819 * If the return value indicates an error, this parameter
1820 * is not valid.
1821 */
1822 static kern_return_t
1823 vm_object_copy_call(
1824 vm_object_t src_object,
1825 vm_object_offset_t src_offset,
1826 vm_object_size_t size,
1827 vm_object_t *_result_object) /* OUT */
1828 {
1829 kern_return_t kr;
1830 vm_object_t copy;
1831 boolean_t check_ready = FALSE;
1832
1833 /*
1834 * If a copy is already in progress, wait and retry.
1835 *
1836 * XXX
1837 * Consider making this call interruptable, as Mike
1838 * intended it to be.
1839 *
1840 * XXXO
1841 * Need a counter or version or something to allow
1842 * us to use the copy that the currently requesting
1843 * thread is obtaining -- is it worth adding to the
1844 * vm object structure? Depends how common this case it.
1845 */
1846 copy_call_count++;
1847 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1848 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1849 THREAD_UNINT);
1850 copy_call_restart_count++;
1851 }
1852
1853 /*
1854 * Indicate (for the benefit of memory_object_create_copy)
1855 * that we want a copy for src_object. (Note that we cannot
1856 * do a real assert_wait before calling memory_object_copy,
1857 * so we simply set the flag.)
1858 */
1859
1860 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1861 vm_object_unlock(src_object);
1862
1863 /*
1864 * Ask the memory manager to give us a memory object
1865 * which represents a copy of the src object.
1866 * The memory manager may give us a memory object
1867 * which we already have, or it may give us a
1868 * new memory object. This memory object will arrive
1869 * via memory_object_create_copy.
1870 */
1871
1872 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1873 if (kr != KERN_SUCCESS) {
1874 return kr;
1875 }
1876
1877 /*
1878 * Wait for the copy to arrive.
1879 */
1880 vm_object_lock(src_object);
1881 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1882 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1883 THREAD_UNINT);
1884 copy_call_sleep_count++;
1885 }
1886 Retry:
1887 assert(src_object->copy != VM_OBJECT_NULL);
1888 copy = src_object->copy;
1889 if (!vm_object_lock_try(copy)) {
1890 vm_object_unlock(src_object);
1891 mutex_pause(); /* wait a bit */
1892 vm_object_lock(src_object);
1893 goto Retry;
1894 }
1895 if (copy->size < src_offset+size)
1896 copy->size = src_offset+size;
1897
1898 if (!copy->pager_ready)
1899 check_ready = TRUE;
1900
1901 /*
1902 * Return the copy.
1903 */
1904 *_result_object = copy;
1905 vm_object_unlock(copy);
1906 vm_object_unlock(src_object);
1907
1908 /* Wait for the copy to be ready. */
1909 if (check_ready == TRUE) {
1910 vm_object_lock(copy);
1911 while (!copy->pager_ready) {
1912 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1913 }
1914 vm_object_unlock(copy);
1915 }
1916
1917 return KERN_SUCCESS;
1918 }
1919
1920 static int copy_delayed_lock_collisions = 0;
1921 static int copy_delayed_max_collisions = 0;
1922 static int copy_delayed_lock_contention = 0;
1923 static int copy_delayed_protect_iterate = 0;
1924 static int copy_delayed_protect_lookup = 0;
1925 static int copy_delayed_protect_lookup_wait = 0;
1926
1927 /*
1928 * Routine: vm_object_copy_delayed [internal]
1929 *
1930 * Description:
1931 * Copy the specified virtual memory object, using
1932 * the asymmetric copy-on-write algorithm.
1933 *
1934 * In/out conditions:
1935 * The object must be unlocked on entry.
1936 *
1937 * This routine will not block waiting for user-generated
1938 * events. It is not interruptible.
1939 */
1940 __private_extern__ vm_object_t
1941 vm_object_copy_delayed(
1942 vm_object_t src_object,
1943 vm_object_offset_t src_offset,
1944 vm_object_size_t size)
1945 {
1946 vm_object_t new_copy = VM_OBJECT_NULL;
1947 vm_object_t old_copy;
1948 vm_page_t p;
1949 vm_object_size_t copy_size;
1950
1951 int collisions = 0;
1952 /*
1953 * The user-level memory manager wants to see all of the changes
1954 * to this object, but it has promised not to make any changes on
1955 * its own.
1956 *
1957 * Perform an asymmetric copy-on-write, as follows:
1958 * Create a new object, called a "copy object" to hold
1959 * pages modified by the new mapping (i.e., the copy,
1960 * not the original mapping).
1961 * Record the original object as the backing object for
1962 * the copy object. If the original mapping does not
1963 * change a page, it may be used read-only by the copy.
1964 * Record the copy object in the original object.
1965 * When the original mapping causes a page to be modified,
1966 * it must be copied to a new page that is "pushed" to
1967 * the copy object.
1968 * Mark the new mapping (the copy object) copy-on-write.
1969 * This makes the copy object itself read-only, allowing
1970 * it to be reused if the original mapping makes no
1971 * changes, and simplifying the synchronization required
1972 * in the "push" operation described above.
1973 *
1974 * The copy-on-write is said to be assymetric because the original
1975 * object is *not* marked copy-on-write. A copied page is pushed
1976 * to the copy object, regardless which party attempted to modify
1977 * the page.
1978 *
1979 * Repeated asymmetric copy operations may be done. If the
1980 * original object has not been changed since the last copy, its
1981 * copy object can be reused. Otherwise, a new copy object can be
1982 * inserted between the original object and its previous copy
1983 * object. Since any copy object is read-only, this cannot affect
1984 * affect the contents of the previous copy object.
1985 *
1986 * Note that a copy object is higher in the object tree than the
1987 * original object; therefore, use of the copy object recorded in
1988 * the original object must be done carefully, to avoid deadlock.
1989 */
1990
1991 Retry:
1992 vm_object_lock(src_object);
1993
1994 /*
1995 * See whether we can reuse the result of a previous
1996 * copy operation.
1997 */
1998
1999 old_copy = src_object->copy;
2000 if (old_copy != VM_OBJECT_NULL) {
2001 /*
2002 * Try to get the locks (out of order)
2003 */
2004 if (!vm_object_lock_try(old_copy)) {
2005 vm_object_unlock(src_object);
2006 mutex_pause();
2007
2008 /* Heisenberg Rules */
2009 copy_delayed_lock_collisions++;
2010 if (collisions++ == 0)
2011 copy_delayed_lock_contention++;
2012
2013 if (collisions > copy_delayed_max_collisions)
2014 copy_delayed_max_collisions = collisions;
2015
2016 goto Retry;
2017 }
2018
2019 /*
2020 * Determine whether the old copy object has
2021 * been modified.
2022 */
2023
2024 if (old_copy->resident_page_count == 0 &&
2025 !old_copy->pager_created) {
2026 /*
2027 * It has not been modified.
2028 *
2029 * Return another reference to
2030 * the existing copy-object.
2031 */
2032 assert(old_copy->ref_count > 0);
2033 old_copy->ref_count++;
2034
2035 if (old_copy->size < src_offset+size)
2036 old_copy->size = src_offset+size;
2037
2038 #if TASK_SWAPPER
2039 /*
2040 * We have to reproduce some of the code from
2041 * vm_object_res_reference because we've taken
2042 * the locks out of order here, and deadlock
2043 * would result if we simply called that function.
2044 */
2045 if (++old_copy->res_count == 1) {
2046 assert(old_copy->shadow == src_object);
2047 vm_object_res_reference(src_object);
2048 }
2049 #endif /* TASK_SWAPPER */
2050
2051 vm_object_unlock(old_copy);
2052 vm_object_unlock(src_object);
2053
2054 if (new_copy != VM_OBJECT_NULL) {
2055 vm_object_unlock(new_copy);
2056 vm_object_deallocate(new_copy);
2057 }
2058
2059 return(old_copy);
2060 }
2061 if (new_copy == VM_OBJECT_NULL) {
2062 vm_object_unlock(old_copy);
2063 vm_object_unlock(src_object);
2064 new_copy = vm_object_allocate(src_offset + size);
2065 vm_object_lock(new_copy);
2066 goto Retry;
2067 }
2068
2069 /*
2070 * Adjust the size argument so that the newly-created
2071 * copy object will be large enough to back either the
2072 * new old copy object or the new mapping.
2073 */
2074 if (old_copy->size > src_offset+size)
2075 size = old_copy->size - src_offset;
2076
2077 /*
2078 * The copy-object is always made large enough to
2079 * completely shadow the original object, since
2080 * it may have several users who want to shadow
2081 * the original object at different points.
2082 */
2083
2084 assert((old_copy->shadow == src_object) &&
2085 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2086
2087 /*
2088 * Make the old copy-object shadow the new one.
2089 * It will receive no more pages from the original
2090 * object.
2091 */
2092
2093 src_object->ref_count--; /* remove ref. from old_copy */
2094 assert(src_object->ref_count > 0);
2095 old_copy->shadow = new_copy;
2096 assert(new_copy->ref_count > 0);
2097 new_copy->ref_count++; /* for old_copy->shadow ref. */
2098
2099 #if TASK_SWAPPER
2100 if (old_copy->res_count) {
2101 VM_OBJ_RES_INCR(new_copy);
2102 VM_OBJ_RES_DECR(src_object);
2103 }
2104 #endif
2105
2106 vm_object_unlock(old_copy); /* done with old_copy */
2107 } else if (new_copy == VM_OBJECT_NULL) {
2108 vm_object_unlock(src_object);
2109 new_copy = vm_object_allocate(src_offset + size);
2110 vm_object_lock(new_copy);
2111 goto Retry;
2112 }
2113
2114 /*
2115 * Readjust the copy-object size if necessary.
2116 */
2117 copy_size = new_copy->size;
2118 if (copy_size < src_offset+size) {
2119 copy_size = src_offset+size;
2120 new_copy->size = copy_size;
2121 }
2122
2123 /*
2124 * Point the new copy at the existing object.
2125 */
2126
2127 new_copy->shadow = src_object;
2128 new_copy->shadow_offset = 0;
2129 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2130 assert(src_object->ref_count > 0);
2131 src_object->ref_count++;
2132 VM_OBJ_RES_INCR(src_object);
2133 src_object->copy = new_copy;
2134 vm_object_unlock(new_copy);
2135
2136 /*
2137 * Mark all (current) pages of the existing object copy-on-write.
2138 * This object may have a shadow chain below it, but
2139 * those pages will already be marked copy-on-write.
2140 */
2141
2142 vm_object_paging_wait(src_object, THREAD_UNINT);
2143 copy_delayed_protect_iterate++;
2144 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2145 if (!p->fictitious)
2146 pmap_page_protect(p->phys_addr,
2147 (VM_PROT_ALL & ~VM_PROT_WRITE &
2148 ~p->page_lock));
2149 }
2150 vm_object_unlock(src_object);
2151 XPR(XPR_VM_OBJECT,
2152 "vm_object_copy_delayed: used copy object %X for source %X\n",
2153 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2154
2155 return(new_copy);
2156 }
2157
2158 /*
2159 * Routine: vm_object_copy_strategically
2160 *
2161 * Purpose:
2162 * Perform a copy according to the source object's
2163 * declared strategy. This operation may block,
2164 * and may be interrupted.
2165 */
2166 __private_extern__ kern_return_t
2167 vm_object_copy_strategically(
2168 register vm_object_t src_object,
2169 vm_object_offset_t src_offset,
2170 vm_object_size_t size,
2171 vm_object_t *dst_object, /* OUT */
2172 vm_object_offset_t *dst_offset, /* OUT */
2173 boolean_t *dst_needs_copy) /* OUT */
2174 {
2175 boolean_t result;
2176 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2177 memory_object_copy_strategy_t copy_strategy;
2178
2179 assert(src_object != VM_OBJECT_NULL);
2180
2181 vm_object_lock(src_object);
2182
2183 /*
2184 * The copy strategy is only valid if the memory manager
2185 * is "ready". Internal objects are always ready.
2186 */
2187
2188 while (!src_object->internal && !src_object->pager_ready) {
2189 wait_result_t wait_result;
2190
2191 wait_result = vm_object_sleep( src_object,
2192 VM_OBJECT_EVENT_PAGER_READY,
2193 interruptible);
2194 if (wait_result != THREAD_AWAKENED) {
2195 vm_object_unlock(src_object);
2196 *dst_object = VM_OBJECT_NULL;
2197 *dst_offset = 0;
2198 *dst_needs_copy = FALSE;
2199 return(MACH_SEND_INTERRUPTED);
2200 }
2201 }
2202
2203 copy_strategy = src_object->copy_strategy;
2204
2205 /*
2206 * Use the appropriate copy strategy.
2207 */
2208
2209 switch (copy_strategy) {
2210 case MEMORY_OBJECT_COPY_NONE:
2211 result = vm_object_copy_slowly(src_object, src_offset, size,
2212 interruptible, dst_object);
2213 if (result == KERN_SUCCESS) {
2214 *dst_offset = 0;
2215 *dst_needs_copy = FALSE;
2216 }
2217 break;
2218
2219 case MEMORY_OBJECT_COPY_CALL:
2220 result = vm_object_copy_call(src_object, src_offset, size,
2221 dst_object);
2222 if (result == KERN_SUCCESS) {
2223 *dst_offset = src_offset;
2224 *dst_needs_copy = TRUE;
2225 }
2226 break;
2227
2228 case MEMORY_OBJECT_COPY_DELAY:
2229 vm_object_unlock(src_object);
2230 *dst_object = vm_object_copy_delayed(src_object,
2231 src_offset, size);
2232 *dst_offset = src_offset;
2233 *dst_needs_copy = TRUE;
2234 result = KERN_SUCCESS;
2235 break;
2236
2237 case MEMORY_OBJECT_COPY_SYMMETRIC:
2238 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2239 vm_object_unlock(src_object);
2240 result = KERN_MEMORY_RESTART_COPY;
2241 break;
2242
2243 default:
2244 panic("copy_strategically: bad strategy");
2245 result = KERN_INVALID_ARGUMENT;
2246 }
2247 return(result);
2248 }
2249
2250 /*
2251 * vm_object_shadow:
2252 *
2253 * Create a new object which is backed by the
2254 * specified existing object range. The source
2255 * object reference is deallocated.
2256 *
2257 * The new object and offset into that object
2258 * are returned in the source parameters.
2259 */
2260 boolean_t vm_object_shadow_check = FALSE;
2261
2262 __private_extern__ boolean_t
2263 vm_object_shadow(
2264 vm_object_t *object, /* IN/OUT */
2265 vm_object_offset_t *offset, /* IN/OUT */
2266 vm_object_size_t length)
2267 {
2268 register vm_object_t source;
2269 register vm_object_t result;
2270
2271 source = *object;
2272 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2273
2274 /*
2275 * Determine if we really need a shadow.
2276 */
2277
2278 if (vm_object_shadow_check && source->ref_count == 1 &&
2279 (source->shadow == VM_OBJECT_NULL ||
2280 source->shadow->copy == VM_OBJECT_NULL))
2281 {
2282 source->shadowed = FALSE;
2283 return FALSE;
2284 }
2285
2286 /*
2287 * Allocate a new object with the given length
2288 */
2289
2290 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2291 panic("vm_object_shadow: no object for shadowing");
2292
2293 /*
2294 * The new object shadows the source object, adding
2295 * a reference to it. Our caller changes his reference
2296 * to point to the new object, removing a reference to
2297 * the source object. Net result: no change of reference
2298 * count.
2299 */
2300 result->shadow = source;
2301
2302 /*
2303 * Store the offset into the source object,
2304 * and fix up the offset into the new object.
2305 */
2306
2307 result->shadow_offset = *offset;
2308
2309 /*
2310 * Return the new things
2311 */
2312
2313 *offset = 0;
2314 *object = result;
2315 return TRUE;
2316 }
2317
2318 /*
2319 * The relationship between vm_object structures and
2320 * the memory_object requires careful synchronization.
2321 *
2322 * All associations are created by memory_object_create_named
2323 * for external pagers and vm_object_pager_create for internal
2324 * objects as follows:
2325 *
2326 * pager: the memory_object itself, supplied by
2327 * the user requesting a mapping (or the kernel,
2328 * when initializing internal objects); the
2329 * kernel simulates holding send rights by keeping
2330 * a port reference;
2331 *
2332 * pager_request:
2333 * the memory object control port,
2334 * created by the kernel; the kernel holds
2335 * receive (and ownership) rights to this
2336 * port, but no other references.
2337 *
2338 * When initialization is complete, the "initialized" field
2339 * is asserted. Other mappings using a particular memory object,
2340 * and any references to the vm_object gained through the
2341 * port association must wait for this initialization to occur.
2342 *
2343 * In order to allow the memory manager to set attributes before
2344 * requests (notably virtual copy operations, but also data or
2345 * unlock requests) are made, a "ready" attribute is made available.
2346 * Only the memory manager may affect the value of this attribute.
2347 * Its value does not affect critical kernel functions, such as
2348 * internal object initialization or destruction. [Furthermore,
2349 * memory objects created by the kernel are assumed to be ready
2350 * immediately; the default memory manager need not explicitly
2351 * set the "ready" attribute.]
2352 *
2353 * [Both the "initialized" and "ready" attribute wait conditions
2354 * use the "pager" field as the wait event.]
2355 *
2356 * The port associations can be broken down by any of the
2357 * following routines:
2358 * vm_object_terminate:
2359 * No references to the vm_object remain, and
2360 * the object cannot (or will not) be cached.
2361 * This is the normal case, and is done even
2362 * though one of the other cases has already been
2363 * done.
2364 * memory_object_destroy:
2365 * The memory manager has requested that the
2366 * kernel relinquish references to the memory
2367 * object. [The memory manager may not want to
2368 * destroy the memory object, but may wish to
2369 * refuse or tear down existing memory mappings.]
2370 *
2371 * Each routine that breaks an association must break all of
2372 * them at once. At some later time, that routine must clear
2373 * the pager field and release the memory object references.
2374 * [Furthermore, each routine must cope with the simultaneous
2375 * or previous operations of the others.]
2376 *
2377 * In addition to the lock on the object, the vm_object_cache_lock
2378 * governs the associations. References gained through the
2379 * association require use of the cache lock.
2380 *
2381 * Because the pager field may be cleared spontaneously, it
2382 * cannot be used to determine whether a memory object has
2383 * ever been associated with a particular vm_object. [This
2384 * knowledge is important to the shadow object mechanism.]
2385 * For this reason, an additional "created" attribute is
2386 * provided.
2387 *
2388 * During various paging operations, the pager reference found in the
2389 * vm_object must be valid. To prevent this from being released,
2390 * (other than being removed, i.e., made null), routines may use
2391 * the vm_object_paging_begin/end routines [actually, macros].
2392 * The implementation uses the "paging_in_progress" and "wanted" fields.
2393 * [Operations that alter the validity of the pager values include the
2394 * termination routines and vm_object_collapse.]
2395 */
2396
2397 #if 0
2398 /*
2399 * Routine: vm_object_pager_dead
2400 *
2401 * Purpose:
2402 * A port is being destroy, and the IPC kobject code
2403 * can't tell if it represents a pager port or not.
2404 * So this function is called each time it sees a port
2405 * die.
2406 * THIS IS HORRIBLY INEFFICIENT. We should only call
2407 * this routine if we had requested a notification on
2408 * the port.
2409 */
2410
2411 __private_extern__ void
2412 vm_object_pager_dead(
2413 ipc_port_t pager)
2414 {
2415 vm_object_t object;
2416 vm_object_hash_entry_t entry;
2417
2418 /*
2419 * Perform essentially the same operations as in vm_object_lookup,
2420 * except that this time we look up based on the memory_object
2421 * port, not the control port.
2422 */
2423 vm_object_cache_lock();
2424 entry = vm_object_hash_lookup(pager, FALSE);
2425 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2426 entry->object == VM_OBJECT_NULL) {
2427 vm_object_cache_unlock();
2428 return;
2429 }
2430
2431 object = entry->object;
2432 entry->object = VM_OBJECT_NULL;
2433
2434 vm_object_lock(object);
2435 if (object->ref_count == 0) {
2436 XPR(XPR_VM_OBJECT_CACHE,
2437 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2438 (integer_t)object,
2439 (integer_t)vm_object_cached_list.next,
2440 (integer_t)vm_object_cached_list.prev, 0,0);
2441
2442 queue_remove(&vm_object_cached_list, object,
2443 vm_object_t, cached_list);
2444 vm_object_cached_count--;
2445 }
2446 object->ref_count++;
2447 vm_object_res_reference(object);
2448
2449 object->can_persist = FALSE;
2450
2451 assert(object->pager == pager);
2452
2453 /*
2454 * Remove the pager association.
2455 *
2456 * Note that the memory_object itself is dead, so
2457 * we don't bother with it.
2458 */
2459
2460 object->pager = MEMORY_OBJECT_NULL;
2461
2462 vm_object_unlock(object);
2463 vm_object_cache_unlock();
2464
2465 vm_object_pager_wakeup(pager);
2466
2467 /*
2468 * Release the pager reference. Note that there's no
2469 * point in trying the memory_object_terminate call
2470 * because the memory_object itself is dead. Also
2471 * release the memory_object_control reference, since
2472 * the pager didn't do that either.
2473 */
2474
2475 memory_object_deallocate(pager);
2476 memory_object_control_deallocate(object->pager_request);
2477
2478
2479 /*
2480 * Restart pending page requests
2481 */
2482 vm_object_lock(object);
2483 vm_object_abort_activity(object);
2484 vm_object_unlock(object);
2485
2486 /*
2487 * Lose the object reference.
2488 */
2489
2490 vm_object_deallocate(object);
2491 }
2492 #endif
2493
2494 /*
2495 * Routine: vm_object_enter
2496 * Purpose:
2497 * Find a VM object corresponding to the given
2498 * pager; if no such object exists, create one,
2499 * and initialize the pager.
2500 */
2501 vm_object_t
2502 vm_object_enter(
2503 memory_object_t pager,
2504 vm_object_size_t size,
2505 boolean_t internal,
2506 boolean_t init,
2507 boolean_t named)
2508 {
2509 register vm_object_t object;
2510 vm_object_t new_object;
2511 boolean_t must_init;
2512 vm_object_hash_entry_t entry, new_entry;
2513
2514 if (pager == MEMORY_OBJECT_NULL)
2515 return(vm_object_allocate(size));
2516
2517 new_object = VM_OBJECT_NULL;
2518 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2519 must_init = init;
2520
2521 /*
2522 * Look for an object associated with this port.
2523 */
2524
2525 restart:
2526 vm_object_cache_lock();
2527 for (;;) {
2528 entry = vm_object_hash_lookup(pager, FALSE);
2529
2530 /*
2531 * If a previous object is being terminated,
2532 * we must wait for the termination message
2533 * to be queued.
2534 *
2535 * We set kobject to a non-null value to let the
2536 * terminator know that someone is waiting.
2537 * Among the possibilities is that the port
2538 * could die while we're waiting. Must restart
2539 * instead of continuing the loop.
2540 */
2541
2542 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
2543 if (entry->object != VM_OBJECT_NULL)
2544 break;
2545
2546 entry->waiting = TRUE;
2547 assert_wait((event_t) pager, THREAD_UNINT);
2548 vm_object_cache_unlock();
2549 thread_block((void (*)(void))0);
2550 goto restart;
2551 }
2552
2553 /*
2554 * We must unlock to create a new object;
2555 * if we do so, we must try the lookup again.
2556 */
2557
2558 if (new_object == VM_OBJECT_NULL) {
2559 vm_object_cache_unlock();
2560 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2561 new_entry = vm_object_hash_entry_alloc(pager);
2562 new_object = vm_object_allocate(size);
2563 vm_object_cache_lock();
2564 } else {
2565 /*
2566 * Lookup failed twice, and we have something
2567 * to insert; set the object.
2568 */
2569
2570 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2571 vm_object_hash_insert(new_entry);
2572 entry = new_entry;
2573 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2574 }
2575
2576 entry->object = new_object;
2577 new_object = VM_OBJECT_NULL;
2578 must_init = TRUE;
2579 }
2580 }
2581
2582 object = entry->object;
2583 assert(object != VM_OBJECT_NULL);
2584
2585 if (!must_init) {
2586 vm_object_lock(object);
2587 assert(object->pager_created);
2588 assert(!internal || object->internal);
2589 if (named) {
2590 assert(!object->named);
2591 object->named = TRUE;
2592 }
2593 if (object->ref_count == 0) {
2594 XPR(XPR_VM_OBJECT_CACHE,
2595 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2596 (integer_t)object,
2597 (integer_t)vm_object_cached_list.next,
2598 (integer_t)vm_object_cached_list.prev, 0,0);
2599 queue_remove(&vm_object_cached_list, object,
2600 vm_object_t, cached_list);
2601 vm_object_cached_count--;
2602 }
2603 object->ref_count++;
2604 vm_object_res_reference(object);
2605 vm_object_unlock(object);
2606
2607 VM_STAT(hits++);
2608 }
2609 assert(object->ref_count > 0);
2610
2611 VM_STAT(lookups++);
2612
2613 vm_object_cache_unlock();
2614
2615 XPR(XPR_VM_OBJECT,
2616 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2617 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2618
2619 /*
2620 * If we raced to create a vm_object but lost, let's
2621 * throw away ours.
2622 */
2623
2624 if (new_object != VM_OBJECT_NULL)
2625 vm_object_deallocate(new_object);
2626
2627 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2628 vm_object_hash_entry_free(new_entry);
2629
2630 if (must_init) {
2631 pager_request_t pager_request;
2632
2633 /*
2634 * Allocate request port.
2635 */
2636
2637 pager_request = memory_object_control_allocate(object);
2638 assert (pager_request != PAGER_REQUEST_NULL);
2639
2640 vm_object_lock(object);
2641
2642 /*
2643 * Copy the reference we were given.
2644 */
2645
2646 memory_object_reference(pager);
2647 object->pager_created = TRUE;
2648 object->pager = pager;
2649 object->internal = internal;
2650 object->pager_trusted = internal;
2651 if (!internal) {
2652 /* copy strategy invalid until set by memory manager */
2653 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2654 }
2655 object->pager_request = pager_request;
2656 object->pager_ready = FALSE;
2657
2658 vm_object_unlock(object);
2659
2660 /*
2661 * Let the pager know we're using it.
2662 */
2663
2664 (void) memory_object_init(pager,
2665 object->pager_request,
2666 PAGE_SIZE);
2667
2668 vm_object_lock(object);
2669 if (named)
2670 object->named = TRUE;
2671 if (internal) {
2672 object->pager_ready = TRUE;
2673 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2674 }
2675
2676 object->pager_initialized = TRUE;
2677 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2678 } else {
2679 vm_object_lock(object);
2680 }
2681
2682 /*
2683 * [At this point, the object must be locked]
2684 */
2685
2686 /*
2687 * Wait for the work above to be done by the first
2688 * thread to map this object.
2689 */
2690
2691 while (!object->pager_initialized) {
2692 vm_object_sleep(object,
2693 VM_OBJECT_EVENT_INITIALIZED,
2694 THREAD_UNINT);
2695 }
2696 vm_object_unlock(object);
2697
2698 XPR(XPR_VM_OBJECT,
2699 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2700 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2701 return(object);
2702 }
2703
2704 /*
2705 * Routine: vm_object_pager_create
2706 * Purpose:
2707 * Create a memory object for an internal object.
2708 * In/out conditions:
2709 * The object is locked on entry and exit;
2710 * it may be unlocked within this call.
2711 * Limitations:
2712 * Only one thread may be performing a
2713 * vm_object_pager_create on an object at
2714 * a time. Presumably, only the pageout
2715 * daemon will be using this routine.
2716 */
2717
2718 void
2719 vm_object_pager_create(
2720 register vm_object_t object)
2721 {
2722 memory_object_t pager;
2723 vm_object_hash_entry_t entry;
2724 #if MACH_PAGEMAP
2725 vm_object_size_t size;
2726 vm_external_map_t map;
2727 #endif /* MACH_PAGEMAP */
2728
2729 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2730 (integer_t)object, 0,0,0,0);
2731
2732 if (memory_manager_default_check() != KERN_SUCCESS)
2733 return;
2734
2735 /*
2736 * Prevent collapse or termination by holding a paging reference
2737 */
2738
2739 vm_object_paging_begin(object);
2740 if (object->pager_created) {
2741 /*
2742 * Someone else got to it first...
2743 * wait for them to finish initializing the ports
2744 */
2745 while (!object->pager_initialized) {
2746 vm_object_sleep(object,
2747 VM_OBJECT_EVENT_INITIALIZED,
2748 THREAD_UNINT);
2749 }
2750 vm_object_paging_end(object);
2751 return;
2752 }
2753
2754 /*
2755 * Indicate that a memory object has been assigned
2756 * before dropping the lock, to prevent a race.
2757 */
2758
2759 object->pager_created = TRUE;
2760 object->paging_offset = 0;
2761
2762 #if MACH_PAGEMAP
2763 size = object->size;
2764 #endif /* MACH_PAGEMAP */
2765 vm_object_unlock(object);
2766
2767 #if MACH_PAGEMAP
2768 map = vm_external_create(size);
2769 vm_object_lock(object);
2770 assert(object->size == size);
2771 object->existence_map = map;
2772 vm_object_unlock(object);
2773 #endif /* MACH_PAGEMAP */
2774
2775 /*
2776 * Create the [internal] pager, and associate it with this object.
2777 *
2778 * We make the association here so that vm_object_enter()
2779 * can look up the object to complete initializing it. No
2780 * user will ever map this object.
2781 */
2782 {
2783 memory_object_default_t dmm;
2784 vm_size_t cluster_size;
2785
2786 /* acquire a reference for the default memory manager */
2787 dmm = memory_manager_default_reference(&cluster_size);
2788 assert(cluster_size >= PAGE_SIZE);
2789
2790 object->cluster_size = cluster_size; /* XXX ??? */
2791 assert(object->temporary);
2792
2793 /* create our new memory object */
2794 (void) memory_object_create(dmm, object->size, &pager);
2795
2796 memory_object_default_deallocate(dmm);
2797 }
2798
2799 entry = vm_object_hash_entry_alloc(pager);
2800
2801 vm_object_cache_lock();
2802 vm_object_hash_insert(entry);
2803
2804 entry->object = object;
2805 vm_object_cache_unlock();
2806
2807 /*
2808 * A reference was returned by
2809 * memory_object_create(), and it is
2810 * copied by vm_object_enter().
2811 */
2812
2813 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2814 panic("vm_object_pager_create: mismatch");
2815
2816 /*
2817 * Drop the reference we were passed.
2818 */
2819 memory_object_deallocate(pager);
2820
2821 vm_object_lock(object);
2822
2823 /*
2824 * Release the paging reference
2825 */
2826 vm_object_paging_end(object);
2827 }
2828
2829 /*
2830 * Routine: vm_object_remove
2831 * Purpose:
2832 * Eliminate the pager/object association
2833 * for this pager.
2834 * Conditions:
2835 * The object cache must be locked.
2836 */
2837 __private_extern__ void
2838 vm_object_remove(
2839 vm_object_t object)
2840 {
2841 memory_object_t pager;
2842 pager_request_t pager_request;
2843
2844 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
2845 vm_object_hash_entry_t entry;
2846
2847 entry = vm_object_hash_lookup(pager, FALSE);
2848 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2849 entry->object = VM_OBJECT_NULL;
2850 }
2851
2852 }
2853
2854 /*
2855 * Global variables for vm_object_collapse():
2856 *
2857 * Counts for normal collapses and bypasses.
2858 * Debugging variables, to watch or disable collapse.
2859 */
2860 static long object_collapses = 0;
2861 static long object_bypasses = 0;
2862
2863 static boolean_t vm_object_collapse_allowed = TRUE;
2864 static boolean_t vm_object_bypass_allowed = TRUE;
2865
2866 static int vm_external_discarded;
2867 static int vm_external_collapsed;
2868
2869 /*
2870 * Routine: vm_object_do_collapse
2871 * Purpose:
2872 * Collapse an object with the object backing it.
2873 * Pages in the backing object are moved into the
2874 * parent, and the backing object is deallocated.
2875 * Conditions:
2876 * Both objects and the cache are locked; the page
2877 * queues are unlocked.
2878 *
2879 */
2880 static void
2881 vm_object_do_collapse(
2882 vm_object_t object,
2883 vm_object_t backing_object)
2884 {
2885 vm_page_t p, pp;
2886 vm_object_offset_t new_offset, backing_offset;
2887 vm_object_size_t size;
2888
2889 backing_offset = object->shadow_offset;
2890 size = object->size;
2891
2892 /*
2893 * Move all in-memory pages from backing_object
2894 * to the parent. Pages that have been paged out
2895 * will be overwritten by any of the parent's
2896 * pages that shadow them.
2897 */
2898
2899 while (!queue_empty(&backing_object->memq)) {
2900
2901 p = (vm_page_t) queue_first(&backing_object->memq);
2902
2903 new_offset = (p->offset - backing_offset);
2904
2905 assert(!p->busy || p->absent);
2906
2907 /*
2908 * If the parent has a page here, or if
2909 * this page falls outside the parent,
2910 * dispose of it.
2911 *
2912 * Otherwise, move it as planned.
2913 */
2914
2915 if (p->offset < backing_offset || new_offset >= size) {
2916 VM_PAGE_FREE(p);
2917 } else {
2918 pp = vm_page_lookup(object, new_offset);
2919 if (pp == VM_PAGE_NULL) {
2920
2921 /*
2922 * Parent now has no page.
2923 * Move the backing object's page up.
2924 */
2925
2926 vm_page_rename(p, object, new_offset);
2927 #if MACH_PAGEMAP
2928 } else if (pp->absent) {
2929
2930 /*
2931 * Parent has an absent page...
2932 * it's not being paged in, so
2933 * it must really be missing from
2934 * the parent.
2935 *
2936 * Throw out the absent page...
2937 * any faults looking for that
2938 * page will restart with the new
2939 * one.
2940 */
2941
2942 VM_PAGE_FREE(pp);
2943 vm_page_rename(p, object, new_offset);
2944 #endif /* MACH_PAGEMAP */
2945 } else {
2946 assert(! pp->absent);
2947
2948 /*
2949 * Parent object has a real page.
2950 * Throw away the backing object's
2951 * page.
2952 */
2953 VM_PAGE_FREE(p);
2954 }
2955 }
2956 }
2957
2958 assert(object->pager == MEMORY_OBJECT_NULL ||
2959 backing_object->pager == MEMORY_OBJECT_NULL);
2960
2961 if (backing_object->pager != MEMORY_OBJECT_NULL) {
2962 vm_object_hash_entry_t entry;
2963
2964 /*
2965 * Move the pager from backing_object to object.
2966 *
2967 * XXX We're only using part of the paging space
2968 * for keeps now... we ought to discard the
2969 * unused portion.
2970 */
2971
2972 object->pager = backing_object->pager;
2973 entry = vm_object_hash_lookup(object->pager, FALSE);
2974 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
2975 entry->object = object;
2976 object->pager_created = backing_object->pager_created;
2977 object->pager_request = backing_object->pager_request;
2978 object->pager_ready = backing_object->pager_ready;
2979 object->pager_initialized = backing_object->pager_initialized;
2980 object->cluster_size = backing_object->cluster_size;
2981 object->paging_offset =
2982 backing_object->paging_offset + backing_offset;
2983 if (object->pager_request != PAGER_REQUEST_NULL) {
2984 memory_object_control_collapse(object->pager_request,
2985 object);
2986 }
2987 }
2988
2989 vm_object_cache_unlock();
2990
2991 object->paging_offset = backing_object->paging_offset + backing_offset;
2992
2993 #if MACH_PAGEMAP
2994 /*
2995 * If the shadow offset is 0, the use the existence map from
2996 * the backing object if there is one. If the shadow offset is
2997 * not zero, toss it.
2998 *
2999 * XXX - If the shadow offset is not 0 then a bit copy is needed
3000 * if the map is to be salvaged. For now, we just just toss the
3001 * old map, giving the collapsed object no map. This means that
3002 * the pager is invoked for zero fill pages. If analysis shows
3003 * that this happens frequently and is a performance hit, then
3004 * this code should be fixed to salvage the map.
3005 */
3006 assert(object->existence_map == VM_EXTERNAL_NULL);
3007 if (backing_offset || (size != backing_object->size)) {
3008 vm_external_discarded++;
3009 vm_external_destroy(backing_object->existence_map,
3010 backing_object->size);
3011 }
3012 else {
3013 vm_external_collapsed++;
3014 object->existence_map = backing_object->existence_map;
3015 }
3016 backing_object->existence_map = VM_EXTERNAL_NULL;
3017 #endif /* MACH_PAGEMAP */
3018
3019 /*
3020 * Object now shadows whatever backing_object did.
3021 * Note that the reference to backing_object->shadow
3022 * moves from within backing_object to within object.
3023 */
3024
3025 object->shadow = backing_object->shadow;
3026 object->shadow_offset += backing_object->shadow_offset;
3027 assert((object->shadow == VM_OBJECT_NULL) ||
3028 (object->shadow->copy == VM_OBJECT_NULL));
3029
3030 /*
3031 * Discard backing_object.
3032 *
3033 * Since the backing object has no pages, no
3034 * pager left, and no object references within it,
3035 * all that is necessary is to dispose of it.
3036 */
3037
3038 assert((backing_object->ref_count == 1) &&
3039 (backing_object->resident_page_count == 0) &&
3040 (backing_object->paging_in_progress == 0));
3041
3042 backing_object->alive = FALSE;
3043 vm_object_unlock(backing_object);
3044
3045 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3046 (integer_t)backing_object, 0,0,0,0);
3047
3048 zfree(vm_object_zone, (vm_offset_t) backing_object);
3049
3050 object_collapses++;
3051 }
3052
3053 static void
3054 vm_object_do_bypass(
3055 vm_object_t object,
3056 vm_object_t backing_object)
3057 {
3058 /*
3059 * Make the parent shadow the next object
3060 * in the chain.
3061 */
3062
3063 #if TASK_SWAPPER
3064 /*
3065 * Do object reference in-line to
3066 * conditionally increment shadow's
3067 * residence count. If object is not
3068 * resident, leave residence count
3069 * on shadow alone.
3070 */
3071 if (backing_object->shadow != VM_OBJECT_NULL) {
3072 vm_object_lock(backing_object->shadow);
3073 backing_object->shadow->ref_count++;
3074 if (object->res_count != 0)
3075 vm_object_res_reference(backing_object->shadow);
3076 vm_object_unlock(backing_object->shadow);
3077 }
3078 #else /* TASK_SWAPPER */
3079 vm_object_reference(backing_object->shadow);
3080 #endif /* TASK_SWAPPER */
3081
3082 object->shadow = backing_object->shadow;
3083 object->shadow_offset += backing_object->shadow_offset;
3084
3085 /*
3086 * Backing object might have had a copy pointer
3087 * to us. If it did, clear it.
3088 */
3089 if (backing_object->copy == object) {
3090 backing_object->copy = VM_OBJECT_NULL;
3091 }
3092
3093 /*
3094 * Drop the reference count on backing_object.
3095 #if TASK_SWAPPER
3096 * Since its ref_count was at least 2, it
3097 * will not vanish; so we don't need to call
3098 * vm_object_deallocate.
3099 * [FBDP: that doesn't seem to be true any more]
3100 *
3101 * The res_count on the backing object is
3102 * conditionally decremented. It's possible
3103 * (via vm_pageout_scan) to get here with
3104 * a "swapped" object, which has a 0 res_count,
3105 * in which case, the backing object res_count
3106 * is already down by one.
3107 #else
3108 * Don't call vm_object_deallocate unless
3109 * ref_count drops to zero.
3110 *
3111 * The ref_count can drop to zero here if the
3112 * backing object could be bypassed but not
3113 * collapsed, such as when the backing object
3114 * is temporary and cachable.
3115 #endif
3116 */
3117 if (backing_object->ref_count > 1) {
3118 backing_object->ref_count--;
3119 #if TASK_SWAPPER
3120 if (object->res_count != 0)
3121 vm_object_res_deallocate(backing_object);
3122 assert(backing_object->ref_count > 0);
3123 #endif /* TASK_SWAPPER */
3124 vm_object_unlock(backing_object);
3125 } else {
3126
3127 /*
3128 * Drop locks so that we can deallocate
3129 * the backing object.
3130 */
3131
3132 #if TASK_SWAPPER
3133 if (object->res_count == 0) {
3134 /* XXX get a reference for the deallocate below */
3135 vm_object_res_reference(backing_object);
3136 }
3137 #endif /* TASK_SWAPPER */
3138 vm_object_unlock(object);
3139 vm_object_unlock(backing_object);
3140 vm_object_deallocate(backing_object);
3141
3142 /*
3143 * Relock object. We don't have to reverify
3144 * its state since vm_object_collapse will
3145 * do that for us as it starts at the
3146 * top of its loop.
3147 */
3148
3149 vm_object_lock(object);
3150 }
3151
3152 object_bypasses++;
3153 }
3154
3155
3156 /*
3157 * vm_object_collapse:
3158 *
3159 * Perform an object collapse or an object bypass if appropriate.
3160 * The real work of collapsing and bypassing is performed in
3161 * the routines vm_object_do_collapse and vm_object_do_bypass.
3162 *
3163 * Requires that the object be locked and the page queues be unlocked.
3164 *
3165 */
3166 __private_extern__ void
3167 vm_object_collapse(
3168 register vm_object_t object)
3169 {
3170 register vm_object_t backing_object;
3171 register vm_object_offset_t backing_offset;
3172 register vm_object_size_t size;
3173 register vm_object_offset_t new_offset;
3174 register vm_page_t p;
3175
3176 vm_offset_t current_offset;
3177
3178 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3179 return;
3180 }
3181
3182 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3183 (integer_t)object, 0,0,0,0);
3184
3185 while (TRUE) {
3186 /*
3187 * Verify that the conditions are right for either
3188 * collapse or bypass:
3189 *
3190 * The object exists and no pages in it are currently
3191 * being paged out, and
3192 */
3193 if (object == VM_OBJECT_NULL ||
3194 object->paging_in_progress != 0 ||
3195 object->absent_count != 0)
3196 return;
3197
3198 /*
3199 * There is a backing object, and
3200 */
3201
3202 if ((backing_object = object->shadow) == VM_OBJECT_NULL)
3203 return;
3204
3205 vm_object_lock(backing_object);
3206
3207 /*
3208 * ...
3209 * The backing object is not read_only,
3210 * and no pages in the backing object are
3211 * currently being paged out.
3212 * The backing object is internal.
3213 *
3214 */
3215
3216 if (!backing_object->internal ||
3217 backing_object->paging_in_progress != 0) {
3218 vm_object_unlock(backing_object);
3219 return;
3220 }
3221
3222 /*
3223 * The backing object can't be a copy-object:
3224 * the shadow_offset for the copy-object must stay
3225 * as 0. Furthermore (for the 'we have all the
3226 * pages' case), if we bypass backing_object and
3227 * just shadow the next object in the chain, old
3228 * pages from that object would then have to be copied
3229 * BOTH into the (former) backing_object and into the
3230 * parent object.
3231 */
3232 if (backing_object->shadow != VM_OBJECT_NULL &&
3233 backing_object->shadow->copy != VM_OBJECT_NULL) {
3234 vm_object_unlock(backing_object);
3235 return;
3236 }
3237
3238 /*
3239 * We can now try to either collapse the backing
3240 * object (if the parent is the only reference to
3241 * it) or (perhaps) remove the parent's reference
3242 * to it.
3243 *
3244 * If there is exactly one reference to the backing
3245 * object, we may be able to collapse it into the
3246 * parent.
3247 *
3248 * The backing object must not have a pager
3249 * created for it, since collapsing an object
3250 * into a backing_object dumps new pages into
3251 * the backing_object that its pager doesn't
3252 * know about.
3253 */
3254
3255 if (backing_object->ref_count == 1 &&
3256 ! object->pager_created &&
3257 vm_object_collapse_allowed) {
3258
3259 XPR(XPR_VM_OBJECT,
3260 "vm_object_collapse: %x to %x, pager %x, pager_request %x\n",
3261 (integer_t)backing_object, (integer_t)object,
3262 (integer_t)backing_object->pager,
3263 (integer_t)backing_object->pager_request, 0);
3264
3265 /*
3266 * We need the cache lock for collapsing,
3267 * but we must not deadlock.
3268 */
3269
3270 if (! vm_object_cache_lock_try()) {
3271 vm_object_unlock(backing_object);
3272 return;
3273 }
3274
3275 /*
3276 * Collapse the object with its backing
3277 * object, and try again with the object's
3278 * new backing object.
3279 */
3280
3281 vm_object_do_collapse(object, backing_object);
3282 continue;
3283 }
3284
3285
3286 /*
3287 * Collapsing the backing object was not possible
3288 * or permitted, so let's try bypassing it.
3289 */
3290
3291 if (! vm_object_bypass_allowed) {
3292 vm_object_unlock(backing_object);
3293 return;
3294 }
3295
3296
3297 /*
3298 * If the backing object has a pager but no pagemap,
3299 * then we cannot bypass it, because we don't know
3300 * what pages it has.
3301 */
3302 if (backing_object->pager_created
3303 #if MACH_PAGEMAP
3304 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3305 #endif /* MACH_PAGEMAP */
3306 ) {
3307 vm_object_unlock(backing_object);
3308 return;
3309 }
3310
3311 /*
3312 * If the object has a pager but no pagemap,
3313 * then we cannot bypass it, because we don't know
3314 * what pages it has.
3315 */
3316 if (object->pager_created
3317 #if MACH_PAGEMAP
3318 && (object->existence_map == VM_EXTERNAL_NULL)
3319 #endif /* MACH_PAGEMAP */
3320 ) {
3321 vm_object_unlock(backing_object);
3322 return;
3323 }
3324
3325 backing_offset = object->shadow_offset;
3326 size = object->size;
3327
3328 /*
3329 * If all of the pages in the backing object are
3330 * shadowed by the parent object, the parent
3331 * object no longer has to shadow the backing
3332 * object; it can shadow the next one in the
3333 * chain.
3334 *
3335 * If the backing object has existence info,
3336 * we must check examine its existence info
3337 * as well.
3338 *
3339 */
3340
3341 if(object->cow_hint >= size)
3342 object->cow_hint = 0;
3343 current_offset = object->cow_hint;
3344 while(TRUE) {
3345 if (vm_page_lookup(object,
3346 (vm_object_offset_t)current_offset)
3347 != VM_PAGE_NULL) {
3348 current_offset+=PAGE_SIZE;
3349 } else if ((object->pager_created) &&
3350 (object->existence_map != NULL) &&
3351 (vm_external_state_get(object->existence_map,
3352 current_offset)
3353 != VM_EXTERNAL_STATE_ABSENT)) {
3354 current_offset+=PAGE_SIZE;
3355 } else if (vm_page_lookup(backing_object,
3356 (vm_object_offset_t)current_offset
3357 + backing_offset)!= VM_PAGE_NULL) {
3358 /* found a dependency */
3359 object->cow_hint = current_offset;
3360 vm_object_unlock(backing_object);
3361 return;
3362 } else if ((backing_object->pager_created) &&
3363 (backing_object->existence_map != NULL) &&
3364 (vm_external_state_get(
3365 backing_object->existence_map,
3366 current_offset + backing_offset)
3367 != VM_EXTERNAL_STATE_ABSENT)) {
3368 /* found a dependency */
3369 object->cow_hint = current_offset;
3370 vm_object_unlock(backing_object);
3371 return;
3372 } else {
3373 current_offset+=PAGE_SIZE;
3374 }
3375 if(current_offset >= size) {
3376 /* wrap at end of object */
3377 current_offset = 0;
3378 }
3379 if(current_offset == object->cow_hint) {
3380 /* we are free of shadow influence */
3381 break;
3382 }
3383 }
3384 /* reset the cow_hint for any objects deeper in the chain */
3385 object->cow_hint = 0;
3386
3387
3388
3389 /*
3390 * All interesting pages in the backing object
3391 * already live in the parent or its pager.
3392 * Thus we can bypass the backing object.
3393 */
3394
3395 vm_object_do_bypass(object, backing_object);
3396
3397 /*
3398 * Try again with this object's new backing object.
3399 */
3400
3401 continue;
3402 }
3403 }
3404
3405 /*
3406 * Routine: vm_object_page_remove: [internal]
3407 * Purpose:
3408 * Removes all physical pages in the specified
3409 * object range from the object's list of pages.
3410 *
3411 * In/out conditions:
3412 * The object must be locked.
3413 * The object must not have paging_in_progress, usually
3414 * guaranteed by not having a pager.
3415 */
3416 unsigned int vm_object_page_remove_lookup = 0;
3417 unsigned int vm_object_page_remove_iterate = 0;
3418
3419 __private_extern__ void
3420 vm_object_page_remove(
3421 register vm_object_t object,
3422 register vm_object_offset_t start,
3423 register vm_object_offset_t end)
3424 {
3425 register vm_page_t p, next;
3426
3427 /*
3428 * One and two page removals are most popular.
3429 * The factor of 16 here is somewhat arbitrary.
3430 * It balances vm_object_lookup vs iteration.
3431 */
3432
3433 if (atop(end - start) < (unsigned)object->resident_page_count/16) {
3434 vm_object_page_remove_lookup++;
3435
3436 for (; start < end; start += PAGE_SIZE_64) {
3437 p = vm_page_lookup(object, start);
3438 if (p != VM_PAGE_NULL) {
3439 assert(!p->cleaning && !p->pageout);
3440 if (!p->fictitious)
3441 pmap_page_protect(p->phys_addr,
3442 VM_PROT_NONE);
3443 VM_PAGE_FREE(p);
3444 }
3445 }
3446 } else {
3447 vm_object_page_remove_iterate++;
3448
3449 p = (vm_page_t) queue_first(&object->memq);
3450 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3451 next = (vm_page_t) queue_next(&p->listq);
3452 if ((start <= p->offset) && (p->offset < end)) {
3453 assert(!p->cleaning && !p->pageout);
3454 if (!p->fictitious)
3455 pmap_page_protect(p->phys_addr,
3456 VM_PROT_NONE);
3457 VM_PAGE_FREE(p);
3458 }
3459 p = next;
3460 }
3461 }
3462 }
3463
3464
3465 /*
3466 * Routine: vm_object_coalesce
3467 * Function: Coalesces two objects backing up adjoining
3468 * regions of memory into a single object.
3469 *
3470 * returns TRUE if objects were combined.
3471 *
3472 * NOTE: Only works at the moment if the second object is NULL -
3473 * if it's not, which object do we lock first?
3474 *
3475 * Parameters:
3476 * prev_object First object to coalesce
3477 * prev_offset Offset into prev_object
3478 * next_object Second object into coalesce
3479 * next_offset Offset into next_object
3480 *
3481 * prev_size Size of reference to prev_object
3482 * next_size Size of reference to next_object
3483 *
3484 * Conditions:
3485 * The object(s) must *not* be locked. The map must be locked
3486 * to preserve the reference to the object(s).
3487 */
3488 static int vm_object_coalesce_count = 0;
3489
3490 __private_extern__ boolean_t
3491 vm_object_coalesce(
3492 register vm_object_t prev_object,
3493 vm_object_t next_object,
3494 vm_object_offset_t prev_offset,
3495 vm_object_offset_t next_offset,
3496 vm_object_size_t prev_size,
3497 vm_object_size_t next_size)
3498 {
3499 vm_object_size_t newsize;
3500
3501 #ifdef lint
3502 next_offset++;
3503 #endif /* lint */
3504
3505 if (next_object != VM_OBJECT_NULL) {
3506 return(FALSE);
3507 }
3508
3509 if (prev_object == VM_OBJECT_NULL) {
3510 return(TRUE);
3511 }
3512
3513 XPR(XPR_VM_OBJECT,
3514 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3515 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3516
3517 vm_object_lock(prev_object);
3518
3519 /*
3520 * Try to collapse the object first
3521 */
3522 vm_object_collapse(prev_object);
3523
3524 /*
3525 * Can't coalesce if pages not mapped to
3526 * prev_entry may be in use any way:
3527 * . more than one reference
3528 * . paged out
3529 * . shadows another object
3530 * . has a copy elsewhere
3531 * . paging references (pages might be in page-list)
3532 */
3533
3534 if ((prev_object->ref_count > 1) ||
3535 prev_object->pager_created ||
3536 (prev_object->shadow != VM_OBJECT_NULL) ||
3537 (prev_object->copy != VM_OBJECT_NULL) ||
3538 (prev_object->true_share != FALSE) ||
3539 (prev_object->paging_in_progress != 0)) {
3540 vm_object_unlock(prev_object);
3541 return(FALSE);
3542 }
3543
3544 vm_object_coalesce_count++;
3545
3546 /*
3547 * Remove any pages that may still be in the object from
3548 * a previous deallocation.
3549 */
3550 vm_object_page_remove(prev_object,
3551 prev_offset + prev_size,
3552 prev_offset + prev_size + next_size);
3553
3554 /*
3555 * Extend the object if necessary.
3556 */
3557 newsize = prev_offset + prev_size + next_size;
3558 if (newsize > prev_object->size) {
3559 #if MACH_PAGEMAP
3560 /*
3561 * We cannot extend an object that has existence info,
3562 * since the existence info might then fail to cover
3563 * the entire object.
3564 *
3565 * This assertion must be true because the object
3566 * has no pager, and we only create existence info
3567 * for objects with pagers.
3568 */
3569 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3570 #endif /* MACH_PAGEMAP */
3571 prev_object->size = newsize;
3572 }
3573
3574 vm_object_unlock(prev_object);
3575 return(TRUE);
3576 }
3577
3578 /*
3579 * Attach a set of physical pages to an object, so that they can
3580 * be mapped by mapping the object. Typically used to map IO memory.
3581 *
3582 * The mapping function and its private data are used to obtain the
3583 * physical addresses for each page to be mapped.
3584 */
3585 void
3586 vm_object_page_map(
3587 vm_object_t object,
3588 vm_object_offset_t offset,
3589 vm_object_size_t size,
3590 vm_object_offset_t (*map_fn)(void *map_fn_data,
3591 vm_object_offset_t offset),
3592 void *map_fn_data) /* private to map_fn */
3593 {
3594 int num_pages;
3595 int i;
3596 vm_page_t m;
3597 vm_page_t old_page;
3598 vm_object_offset_t addr;
3599
3600 num_pages = atop(size);
3601
3602 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3603
3604 addr = (*map_fn)(map_fn_data, offset);
3605
3606 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3607 vm_page_more_fictitious();
3608
3609 vm_object_lock(object);
3610 if ((old_page = vm_page_lookup(object, offset))
3611 != VM_PAGE_NULL)
3612 {
3613 vm_page_lock_queues();
3614 vm_page_free(old_page);
3615 vm_page_unlock_queues();
3616 }
3617
3618 vm_page_init(m, addr);
3619 /* private normally requires lock_queues but since we */
3620 /* are initializing the page, its not necessary here */
3621 m->private = TRUE; /* don`t free page */
3622 m->wire_count = 1;
3623 vm_page_insert(m, object, offset);
3624
3625 PAGE_WAKEUP_DONE(m);
3626 vm_object_unlock(object);
3627 }
3628 }
3629
3630 #include <mach_kdb.h>
3631
3632 #if MACH_KDB
3633 #include <ddb/db_output.h>
3634 #include <vm/vm_print.h>
3635
3636 #define printf kdbprintf
3637
3638 extern boolean_t vm_object_cached(
3639 vm_object_t object);
3640
3641 extern void print_bitstring(
3642 char byte);
3643
3644 boolean_t vm_object_print_pages = FALSE;
3645
3646 void
3647 print_bitstring(
3648 char byte)
3649 {
3650 printf("%c%c%c%c%c%c%c%c",
3651 ((byte & (1 << 0)) ? '1' : '0'),
3652 ((byte & (1 << 1)) ? '1' : '0'),
3653 ((byte & (1 << 2)) ? '1' : '0'),
3654 ((byte & (1 << 3)) ? '1' : '0'),
3655 ((byte & (1 << 4)) ? '1' : '0'),
3656 ((byte & (1 << 5)) ? '1' : '0'),
3657 ((byte & (1 << 6)) ? '1' : '0'),
3658 ((byte & (1 << 7)) ? '1' : '0'));
3659 }
3660
3661 boolean_t
3662 vm_object_cached(
3663 register vm_object_t object)
3664 {
3665 register vm_object_t o;
3666
3667 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3668 if (object == o) {
3669 return TRUE;
3670 }
3671 }
3672 return FALSE;
3673 }
3674
3675 #if MACH_PAGEMAP
3676 /*
3677 * vm_external_print: [ debug ]
3678 */
3679 void
3680 vm_external_print(
3681 vm_external_map_t map,
3682 vm_size_t size)
3683 {
3684 if (map == VM_EXTERNAL_NULL) {
3685 printf("0 ");
3686 } else {
3687 vm_size_t existence_size = stob(size);
3688 printf("{ size=%d, map=[", existence_size);
3689 if (existence_size > 0) {
3690 print_bitstring(map[0]);
3691 }
3692 if (existence_size > 1) {
3693 print_bitstring(map[1]);
3694 }
3695 if (existence_size > 2) {
3696 printf("...");
3697 print_bitstring(map[existence_size-1]);
3698 }
3699 printf("] }\n");
3700 }
3701 return;
3702 }
3703 #endif /* MACH_PAGEMAP */
3704
3705 int
3706 vm_follow_object(
3707 vm_object_t object)
3708 {
3709 extern db_indent;
3710
3711 int count = 0;
3712 int orig_db_indent = db_indent;
3713
3714 while (TRUE) {
3715 if (object == VM_OBJECT_NULL) {
3716 db_indent = orig_db_indent;
3717 return count;
3718 }
3719
3720 count += 1;
3721
3722 iprintf("object 0x%x", object);
3723 printf(", shadow=0x%x", object->shadow);
3724 printf(", copy=0x%x", object->copy);
3725 printf(", pager=0x%x", object->pager);
3726 printf(", ref=%d\n", object->ref_count);
3727
3728 db_indent += 2;
3729 object = object->shadow;
3730 }
3731
3732 }
3733
3734 /*
3735 * vm_object_print: [ debug ]
3736 */
3737 void
3738 vm_object_print(
3739 vm_object_t object,
3740 boolean_t have_addr,
3741 int arg_count,
3742 char *modif)
3743 {
3744 register vm_page_t p;
3745 extern db_indent;
3746 char *s;
3747
3748 register int count;
3749
3750 if (object == VM_OBJECT_NULL)
3751 return;
3752
3753 iprintf("object 0x%x\n", object);
3754
3755 db_indent += 2;
3756
3757 iprintf("size=0x%x", object->size);
3758 printf(", cluster=0x%x", object->cluster_size);
3759 printf(", frozen=0x%x", object->frozen_size);
3760 printf(", ref_count=%d\n", object->ref_count);
3761 iprintf("");
3762 #if TASK_SWAPPER
3763 printf("res_count=%d, ", object->res_count);
3764 #endif /* TASK_SWAPPER */
3765 printf("resident_page_count=%d\n", object->resident_page_count);
3766
3767 iprintf("shadow=0x%x", object->shadow);
3768 if (object->shadow) {
3769 register int i = 0;
3770 vm_object_t shadow = object;
3771 while(shadow = shadow->shadow)
3772 i++;
3773 printf(" (depth %d)", i);
3774 }
3775 printf(", copy=0x%x", object->copy);
3776 printf(", shadow_offset=0x%x", object->shadow_offset);
3777 printf(", last_alloc=0x%x\n", object->last_alloc);
3778
3779 iprintf("pager=0x%x", object->pager);
3780 printf(", paging_offset=0x%x", object->paging_offset);
3781 printf(", pager_request=0x%x\n", object->pager_request);
3782
3783 iprintf("copy_strategy=%d[", object->copy_strategy);
3784 switch (object->copy_strategy) {
3785 case MEMORY_OBJECT_COPY_NONE:
3786 printf("copy_none");
3787 break;
3788
3789 case MEMORY_OBJECT_COPY_CALL:
3790 printf("copy_call");
3791 break;
3792
3793 case MEMORY_OBJECT_COPY_DELAY:
3794 printf("copy_delay");
3795 break;
3796
3797 case MEMORY_OBJECT_COPY_SYMMETRIC:
3798 printf("copy_symmetric");
3799 break;
3800
3801 case MEMORY_OBJECT_COPY_INVALID:
3802 printf("copy_invalid");
3803 break;
3804
3805 default:
3806 printf("?");
3807 }
3808 printf("]");
3809 printf(", absent_count=%d\n", object->absent_count);
3810
3811 iprintf("all_wanted=0x%x<", object->all_wanted);
3812 s = "";
3813 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
3814 printf("%sinit", s);
3815 s = ",";
3816 }
3817 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
3818 printf("%sready", s);
3819 s = ",";
3820 }
3821 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
3822 printf("%spaging", s);
3823 s = ",";
3824 }
3825 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
3826 printf("%sabsent", s);
3827 s = ",";
3828 }
3829 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
3830 printf("%slock", s);
3831 s = ",";
3832 }
3833 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
3834 printf("%suncaching", s);
3835 s = ",";
3836 }
3837 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
3838 printf("%scopy_call", s);
3839 s = ",";
3840 }
3841 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
3842 printf("%scaching", s);
3843 s = ",";
3844 }
3845 printf(">");
3846 printf(", paging_in_progress=%d\n", object->paging_in_progress);
3847
3848 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
3849 (object->pager_created ? "" : "!"),
3850 (object->pager_initialized ? "" : "!"),
3851 (object->pager_ready ? "" : "!"),
3852 (object->can_persist ? "" : "!"),
3853 (object->pager_trusted ? "" : "!"),
3854 (object->pageout ? "" : "!"),
3855 (object->internal ? "internal" : "external"),
3856 (object->temporary ? "temporary" : "permanent"));
3857 iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n",
3858 (object->alive ? "" : "!"),
3859 (object->lock_in_progress ? "" : "!"),
3860 (object->lock_restart ? "" : "!"),
3861 (object->shadowed ? "" : "!"),
3862 (vm_object_cached(object) ? "" : "!"),
3863 (object->private ? "" : "!"));
3864 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
3865 (object->advisory_pageout ? "" : "!"),
3866 (object->silent_overwrite ? "" : "!"));
3867
3868 #if MACH_PAGEMAP
3869 iprintf("existence_map=");
3870 vm_external_print(object->existence_map, object->size);
3871 #endif /* MACH_PAGEMAP */
3872 #if MACH_ASSERT
3873 iprintf("paging_object=0x%x\n", object->paging_object);
3874 #endif /* MACH_ASSERT */
3875
3876 if (vm_object_print_pages) {
3877 count = 0;
3878 p = (vm_page_t) queue_first(&object->memq);
3879 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3880 if (count == 0) {
3881 iprintf("memory:=");
3882 } else if (count == 2) {
3883 printf("\n");
3884 iprintf(" ...");
3885 count = 0;
3886 } else {
3887 printf(",");
3888 }
3889 count++;
3890
3891 printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p);
3892 p = (vm_page_t) queue_next(&p->listq);
3893 }
3894 if (count != 0) {
3895 printf("\n");
3896 }
3897 }
3898 db_indent -= 2;
3899 }
3900
3901
3902 /*
3903 * vm_object_find [ debug ]
3904 *
3905 * Find all tasks which reference the given vm_object.
3906 */
3907
3908 boolean_t vm_object_find(vm_object_t object);
3909 boolean_t vm_object_print_verbose = FALSE;
3910
3911 boolean_t
3912 vm_object_find(
3913 vm_object_t object)
3914 {
3915 task_t task;
3916 vm_map_t map;
3917 vm_map_entry_t entry;
3918 processor_set_t pset = &default_pset;
3919 boolean_t found = FALSE;
3920
3921 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
3922 map = task->map;
3923 for (entry = vm_map_first_entry(map);
3924 entry && entry != vm_map_to_entry(map);
3925 entry = entry->vme_next) {
3926
3927 vm_object_t obj;
3928
3929 /*
3930 * For the time being skip submaps,
3931 * only the kernel can have submaps,
3932 * and unless we are interested in
3933 * kernel objects, we can simply skip
3934 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
3935 * for a full solution.
3936 */
3937 if (entry->is_sub_map)
3938 continue;
3939 if (entry)
3940 obj = entry->object.vm_object;
3941 else
3942 continue;
3943
3944 while (obj != VM_OBJECT_NULL) {
3945 if (obj == object) {
3946 if (!found) {
3947 printf("TASK\t\tMAP\t\tENTRY\n");
3948 found = TRUE;
3949 }
3950 printf("0x%x\t0x%x\t0x%x\n",
3951 task, map, entry);
3952 }
3953 obj = obj->shadow;
3954 }
3955 }
3956 }
3957
3958 return(found);
3959 }
3960
3961 #endif /* MACH_KDB */
3962
3963 kern_return_t
3964 vm_object_populate_with_private(
3965 vm_object_t object,
3966 vm_object_offset_t offset,
3967 vm_offset_t phys_addr,
3968 vm_size_t size)
3969 {
3970 vm_offset_t base_addr;
3971 vm_object_offset_t base_offset;
3972
3973
3974 if(!object->private)
3975 return KERN_FAILURE;
3976
3977 if((base_addr = trunc_page(phys_addr)) != phys_addr) {
3978 return KERN_FAILURE;
3979 }
3980
3981
3982 vm_object_lock(object);
3983 if(!object->phys_contiguous) {
3984 vm_page_t m;
3985 if((base_offset = trunc_page(offset)) != offset) {
3986 vm_object_unlock(object);
3987 return KERN_FAILURE;
3988 }
3989 base_offset += object->paging_offset;
3990 while(size) {
3991 m = vm_page_lookup(object, base_offset);
3992 if(m != VM_PAGE_NULL) {
3993 if(m->fictitious) {
3994 vm_page_lock_queues();
3995 m->fictitious = FALSE;
3996 m->private = TRUE;
3997 m->phys_addr = base_addr;
3998 if(!m->busy) {
3999 m->busy = TRUE;
4000 }
4001 if(!m->absent) {
4002 m->absent = TRUE;
4003 object->absent_count++;
4004 }
4005 m->list_req_pending = TRUE;
4006 vm_page_unlock_queues();
4007 } else if (m->phys_addr != base_addr) {
4008 /* pmap call to clear old mapping */
4009 pmap_page_protect(m->phys_addr,
4010 VM_PROT_NONE);
4011 m->phys_addr = base_addr;
4012 }
4013 } else {
4014 while ((m = vm_page_grab_fictitious())
4015 == VM_PAGE_NULL)
4016 vm_page_more_fictitious();
4017 vm_page_lock_queues();
4018 m->fictitious = FALSE;
4019 m->private = TRUE;
4020 m->phys_addr = base_addr;
4021 m->list_req_pending = TRUE;
4022 m->absent = TRUE;
4023 m->unusual = TRUE;
4024 object->absent_count++;
4025 vm_page_unlock_queues();
4026 vm_page_insert(m, object, base_offset);
4027 }
4028 base_addr += PAGE_SIZE;
4029 base_offset += PAGE_SIZE;
4030 size -= PAGE_SIZE;
4031 }
4032 } else {
4033 /* NOTE: we should check the original settings here */
4034 /* if we have a size > zero a pmap call should be made */
4035 /* to disable the range */
4036
4037 /* pmap_? */
4038
4039 /* shadows on contiguous memory are not allowed */
4040 /* we therefore can use the offset field */
4041 object->shadow_offset = (vm_object_offset_t)phys_addr;
4042 object->size = size;
4043 }
4044 vm_object_unlock(object);
4045 return KERN_SUCCESS;
4046 }
4047
4048 /*
4049 * memory_object_free_from_cache:
4050 *
4051 * Walk the vm_object cache list, removing and freeing vm_objects
4052 * which are backed by the pager identified by the caller, (pager_id).
4053 * Remove up to "count" objects, if there are that may available
4054 * in the cache.
4055 *
4056 * Walk the list at most once, return the number of vm_objects
4057 * actually freed.
4058 */
4059
4060 __private_extern__ kern_return_t
4061 memory_object_free_from_cache(
4062 host_t host,
4063 int *pager_id,
4064 int *count)
4065 {
4066
4067 int object_released = 0;
4068 int i;
4069
4070 register vm_object_t object = VM_OBJECT_NULL;
4071 vm_object_t shadow;
4072
4073 /*
4074 if(host == HOST_NULL)
4075 return(KERN_INVALID_ARGUMENT);
4076 */
4077
4078 try_again:
4079 vm_object_cache_lock();
4080
4081 queue_iterate(&vm_object_cached_list, object,
4082 vm_object_t, cached_list) {
4083 if (object->pager && (pager_id == object->pager->pager)) {
4084 vm_object_lock(object);
4085 queue_remove(&vm_object_cached_list, object,
4086 vm_object_t, cached_list);
4087 vm_object_cached_count--;
4088
4089 /*
4090 * Since this object is in the cache, we know
4091 * that it is initialized and has only a pager's
4092 * (implicit) reference. Take a reference to avoid
4093 * recursive deallocations.
4094 */
4095
4096 assert(object->pager_initialized);
4097 assert(object->ref_count == 0);
4098 object->ref_count++;
4099
4100 /*
4101 * Terminate the object.
4102 * If the object had a shadow, we let
4103 * vm_object_deallocate deallocate it.
4104 * "pageout" objects have a shadow, but
4105 * maintain a "paging reference" rather
4106 * than a normal reference.
4107 * (We are careful here to limit recursion.)
4108 */
4109 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4110 if ((vm_object_terminate(object) == KERN_SUCCESS)
4111 && (shadow != VM_OBJECT_NULL)) {
4112 vm_object_deallocate(shadow);
4113 }
4114
4115 if(object_released++ == *count)
4116 return KERN_SUCCESS;
4117 goto try_again;
4118 }
4119 }
4120 vm_object_cache_unlock();
4121 *count = object_released;
4122 return KERN_SUCCESS;
4123 }
4124
4125
4126
4127 kern_return_t
4128 memory_object_create_named(
4129 memory_object_t pager,
4130 memory_object_offset_t size,
4131 memory_object_control_t *control)
4132 {
4133 vm_object_t object;
4134 vm_object_hash_entry_t entry;
4135
4136 *control = MEMORY_OBJECT_CONTROL_NULL;
4137 if (pager == MEMORY_OBJECT_NULL)
4138 return KERN_INVALID_ARGUMENT;
4139
4140 vm_object_cache_lock();
4141 entry = vm_object_hash_lookup(pager, FALSE);
4142 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4143 (entry->object != VM_OBJECT_NULL)) {
4144 if (entry->object->named == TRUE)
4145 panic("memory_object_create_named: caller already holds the right"); }
4146
4147 vm_object_cache_unlock();
4148 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4149 == VM_OBJECT_NULL) {
4150 return(KERN_INVALID_OBJECT);
4151 }
4152
4153 /* wait for object (if any) to be ready */
4154 if (object != VM_OBJECT_NULL) {
4155 vm_object_lock(object);
4156 object->named = TRUE;
4157 while (!object->pager_ready) {
4158 vm_object_sleep(object,
4159 VM_OBJECT_EVENT_PAGER_READY,
4160 THREAD_UNINT);
4161 }
4162 *control = object->pager_request;
4163 vm_object_unlock(object);
4164 }
4165 return (KERN_SUCCESS);
4166 }
4167
4168
4169 /*
4170 * Routine: memory_object_recover_named [user interface]
4171 * Purpose:
4172 * Attempt to recover a named reference for a VM object.
4173 * VM will verify that the object has not already started
4174 * down the termination path, and if it has, will optionally
4175 * wait for that to finish.
4176 * Returns:
4177 * KERN_SUCCESS - we recovered a named reference on the object
4178 * KERN_FAILURE - we could not recover a reference (object dead)
4179 * KERN_INVALID_ARGUMENT - bad memory object control
4180 */
4181 kern_return_t
4182 memory_object_recover_named(
4183 memory_object_control_t control,
4184 boolean_t wait_on_terminating)
4185 {
4186 vm_object_t object;
4187
4188 vm_object_cache_lock();
4189 object = memory_object_control_to_vm_object(control);
4190 if (object == VM_OBJECT_NULL) {
4191 vm_object_cache_unlock();
4192 return (KERN_INVALID_ARGUMENT);
4193 }
4194
4195 restart:
4196 vm_object_lock(object);
4197
4198 if (object->terminating && wait_on_terminating) {
4199 vm_object_cache_unlock();
4200 vm_object_wait(object,
4201 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4202 THREAD_UNINT);
4203 vm_object_cache_lock();
4204 goto restart;
4205 }
4206
4207 if (!object->alive) {
4208 vm_object_cache_unlock();
4209 vm_object_unlock(object);
4210 return KERN_FAILURE;
4211 }
4212
4213 if (object->named == TRUE) {
4214 vm_object_cache_unlock();
4215 vm_object_unlock(object);
4216 return KERN_SUCCESS;
4217 }
4218
4219 if((object->ref_count == 0) && (!object->terminating)){
4220 queue_remove(&vm_object_cached_list, object,
4221 vm_object_t, cached_list);
4222 vm_object_cached_count--;
4223 XPR(XPR_VM_OBJECT_CACHE,
4224 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4225 (integer_t)object,
4226 (integer_t)vm_object_cached_list.next,
4227 (integer_t)vm_object_cached_list.prev, 0,0);
4228 }
4229
4230 vm_object_cache_unlock();
4231
4232 object->named = TRUE;
4233 object->ref_count++;
4234 vm_object_res_reference(object);
4235 while (!object->pager_ready) {
4236 vm_object_sleep(object,
4237 VM_OBJECT_EVENT_PAGER_READY,
4238 THREAD_UNINT);
4239 }
4240 vm_object_unlock(object);
4241 return (KERN_SUCCESS);
4242 }
4243
4244
4245 /*
4246 * vm_object_release_name:
4247 *
4248 * Enforces name semantic on memory_object reference count decrement
4249 * This routine should not be called unless the caller holds a name
4250 * reference gained through the memory_object_create_named.
4251 *
4252 * If the TERMINATE_IDLE flag is set, the call will return if the
4253 * reference count is not 1. i.e. idle with the only remaining reference
4254 * being the name.
4255 * If the decision is made to proceed the name field flag is set to
4256 * false and the reference count is decremented. If the RESPECT_CACHE
4257 * flag is set and the reference count has gone to zero, the
4258 * memory_object is checked to see if it is cacheable otherwise when
4259 * the reference count is zero, it is simply terminated.
4260 */
4261
4262 __private_extern__ kern_return_t
4263 vm_object_release_name(
4264 vm_object_t object,
4265 int flags)
4266 {
4267 vm_object_t shadow;
4268 boolean_t original_object = TRUE;
4269
4270 while (object != VM_OBJECT_NULL) {
4271
4272 /*
4273 * The cache holds a reference (uncounted) to
4274 * the object. We must locke it before removing
4275 * the object.
4276 *
4277 */
4278
4279 vm_object_cache_lock();
4280 vm_object_lock(object);
4281 assert(object->alive);
4282 if(original_object)
4283 assert(object->named);
4284 assert(object->ref_count > 0);
4285
4286 /*
4287 * We have to wait for initialization before
4288 * destroying or caching the object.
4289 */
4290
4291 if (object->pager_created && !object->pager_initialized) {
4292 assert(!object->can_persist);
4293 vm_object_assert_wait(object,
4294 VM_OBJECT_EVENT_INITIALIZED,
4295 THREAD_UNINT);
4296 vm_object_unlock(object);
4297 vm_object_cache_unlock();
4298 thread_block(THREAD_CONTINUE_NULL);
4299 continue;
4300 }
4301
4302 if (((object->ref_count > 1)
4303 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4304 || (object->terminating)) {
4305 vm_object_unlock(object);
4306 vm_object_cache_unlock();
4307 return KERN_FAILURE;
4308 } else {
4309 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4310 vm_object_unlock(object);
4311 vm_object_cache_unlock();
4312 return KERN_SUCCESS;
4313 }
4314 }
4315
4316 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4317 (object->ref_count == 1)) {
4318 if(original_object)
4319 object->named = FALSE;
4320 vm_object_unlock(object);
4321 vm_object_cache_unlock();
4322 /* let vm_object_deallocate push this thing into */
4323 /* the cache, if that it is where it is bound */
4324 vm_object_deallocate(object);
4325 return KERN_SUCCESS;
4326 }
4327 VM_OBJ_RES_DECR(object);
4328 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4329 if(object->ref_count == 1) {
4330 if(vm_object_terminate(object) != KERN_SUCCESS) {
4331 if(original_object) {
4332 return KERN_FAILURE;
4333 } else {
4334 return KERN_SUCCESS;
4335 }
4336 }
4337 if (shadow != VM_OBJECT_NULL) {
4338 original_object = FALSE;
4339 object = shadow;
4340 continue;
4341 }
4342 return KERN_SUCCESS;
4343 } else {
4344 object->ref_count--;
4345 assert(object->ref_count > 0);
4346 if(original_object)
4347 object->named = FALSE;
4348 vm_object_unlock(object);
4349 vm_object_cache_unlock();
4350 return KERN_SUCCESS;
4351 }
4352 }
4353 }
4354
4355
4356 __private_extern__ kern_return_t
4357 vm_object_lock_request(
4358 vm_object_t object,
4359 vm_object_offset_t offset,
4360 vm_object_size_t size,
4361 memory_object_return_t should_return,
4362 int flags,
4363 vm_prot_t prot)
4364 {
4365 vm_object_offset_t original_offset = offset;
4366 boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH;
4367
4368 XPR(XPR_MEMORY_OBJECT,
4369 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4370 (integer_t)object, offset, size,
4371 (((should_return&1)<<1)|should_flush), prot);
4372
4373 /*
4374 * Check for bogus arguments.
4375 */
4376 if (object == VM_OBJECT_NULL)
4377 return (KERN_INVALID_ARGUMENT);
4378
4379 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4380 return (KERN_INVALID_ARGUMENT);
4381
4382 size = round_page(size);
4383
4384 /*
4385 * Lock the object, and acquire a paging reference to
4386 * prevent the memory_object reference from being released.
4387 */
4388 vm_object_lock(object);
4389 vm_object_paging_begin(object);
4390 offset -= object->paging_offset;
4391
4392 (void)vm_object_update(object,
4393 offset, size, should_return, flags, prot);
4394
4395 vm_object_paging_end(object);
4396 vm_object_unlock(object);
4397
4398 return (KERN_SUCCESS);
4399 }
4400
4401
4402
4403 #if TASK_SWAPPER
4404 /*
4405 * vm_object_res_deallocate
4406 *
4407 * (recursively) decrement residence counts on vm objects and their shadows.
4408 * Called from vm_object_deallocate and when swapping out an object.
4409 *
4410 * The object is locked, and remains locked throughout the function,
4411 * even as we iterate down the shadow chain. Locks on intermediate objects
4412 * will be dropped, but not the original object.
4413 *
4414 * NOTE: this function used to use recursion, rather than iteration.
4415 */
4416
4417 __private_extern__ void
4418 vm_object_res_deallocate(
4419 vm_object_t object)
4420 {
4421 vm_object_t orig_object = object;
4422 /*
4423 * Object is locked so it can be called directly
4424 * from vm_object_deallocate. Original object is never
4425 * unlocked.
4426 */
4427 assert(object->res_count > 0);
4428 while (--object->res_count == 0) {
4429 assert(object->ref_count >= object->res_count);
4430 vm_object_deactivate_all_pages(object);
4431 /* iterate on shadow, if present */
4432 if (object->shadow != VM_OBJECT_NULL) {
4433 vm_object_t tmp_object = object->shadow;
4434 vm_object_lock(tmp_object);
4435 if (object != orig_object)
4436 vm_object_unlock(object);
4437 object = tmp_object;
4438 assert(object->res_count > 0);
4439 } else
4440 break;
4441 }
4442 if (object != orig_object)
4443 vm_object_unlock(object);
4444 }
4445
4446 /*
4447 * vm_object_res_reference
4448 *
4449 * Internal function to increment residence count on a vm object
4450 * and its shadows. It is called only from vm_object_reference, and
4451 * when swapping in a vm object, via vm_map_swap.
4452 *
4453 * The object is locked, and remains locked throughout the function,
4454 * even as we iterate down the shadow chain. Locks on intermediate objects
4455 * will be dropped, but not the original object.
4456 *
4457 * NOTE: this function used to use recursion, rather than iteration.
4458 */
4459
4460 __private_extern__ void
4461 vm_object_res_reference(
4462 vm_object_t object)
4463 {
4464 vm_object_t orig_object = object;
4465 /*
4466 * Object is locked, so this can be called directly
4467 * from vm_object_reference. This lock is never released.
4468 */
4469 while ((++object->res_count == 1) &&
4470 (object->shadow != VM_OBJECT_NULL)) {
4471 vm_object_t tmp_object = object->shadow;
4472
4473 assert(object->ref_count >= object->res_count);
4474 vm_object_lock(tmp_object);
4475 if (object != orig_object)
4476 vm_object_unlock(object);
4477 object = tmp_object;
4478 }
4479 if (object != orig_object)
4480 vm_object_unlock(object);
4481 assert(orig_object->ref_count >= orig_object->res_count);
4482 }
4483 #endif /* TASK_SWAPPER */
4484
4485 /*
4486 * vm_object_reference:
4487 *
4488 * Gets another reference to the given object.
4489 */
4490 #ifdef vm_object_reference
4491 #undef vm_object_reference
4492 #endif
4493 __private_extern__ void
4494 vm_object_reference(
4495 register vm_object_t object)
4496 {
4497 if (object == VM_OBJECT_NULL)
4498 return;
4499
4500 vm_object_lock(object);
4501 assert(object->ref_count > 0);
4502 vm_object_reference_locked(object);
4503 vm_object_unlock(object);
4504 }
4505
4506 #ifdef MACH_BSD
4507 /*
4508 * Scale the vm_object_cache
4509 * This is required to make sure that the vm_object_cache is big
4510 * enough to effectively cache the mapped file.
4511 * This is really important with UBC as all the regular file vnodes
4512 * have memory object associated with them. Havving this cache too
4513 * small results in rapid reclaim of vnodes and hurts performance a LOT!
4514 *
4515 * This is also needed as number of vnodes can be dynamically scaled.
4516 */
4517 kern_return_t
4518 adjust_vm_object_cache(vm_size_t oval, vm_size_t nval)
4519 {
4520 vm_object_cached_max = nval;
4521 vm_object_cache_trim(FALSE);
4522 return (KERN_SUCCESS);
4523 }
4524 #endif /* MACH_BSD */
4525