]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_object.c
xnu-124.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: vm/vm_object.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Virtual memory object module.
57 */
58
59#ifdef MACH_BSD
60/* remove as part of compoenent support merge */
61extern int vnode_pager_workaround;
62#endif
63
64#include <mach_pagemap.h>
65#include <task_swapper.h>
66
67#include <mach/memory_object.h>
68#include <mach/memory_object_default.h>
69#include <mach/memory_object_control_server.h>
70#include <mach/vm_param.h>
71#include <ipc/ipc_port.h>
72#include <ipc/ipc_space.h>
73#include <kern/assert.h>
74#include <kern/lock.h>
75#include <kern/queue.h>
76#include <kern/xpr.h>
77#include <kern/zalloc.h>
78#include <kern/host.h>
79#include <kern/host_statistics.h>
80#include <kern/processor.h>
81#include <vm/memory_object.h>
82#include <vm/vm_fault.h>
83#include <vm/vm_map.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
87#include <kern/misc_protos.h>
88
89
90
91/*
92 * Virtual memory objects maintain the actual data
93 * associated with allocated virtual memory. A given
94 * page of memory exists within exactly one object.
95 *
96 * An object is only deallocated when all "references"
97 * are given up. Only one "reference" to a given
98 * region of an object should be writeable.
99 *
100 * Associated with each object is a list of all resident
101 * memory pages belonging to that object; this list is
102 * maintained by the "vm_page" module, but locked by the object's
103 * lock.
104 *
105 * Each object also records the memory object port
106 * that is used by the kernel to request and write
107 * back data (the memory object port, field "pager"),
108 * and the ports provided to the memory manager, the server that
109 * manages that data, to return data and control its
110 * use (the memory object control port, field "pager_request")
111 * and for naming (the memory object name port, field "pager_name").
112 *
113 * Virtual memory objects are allocated to provide
114 * zero-filled memory (vm_allocate) or map a user-defined
115 * memory object into a virtual address space (vm_map).
116 *
117 * Virtual memory objects that refer to a user-defined
118 * memory object are called "permanent", because all changes
119 * made in virtual memory are reflected back to the
120 * memory manager, which may then store it permanently.
121 * Other virtual memory objects are called "temporary",
122 * meaning that changes need be written back only when
123 * necessary to reclaim pages, and that storage associated
124 * with the object can be discarded once it is no longer
125 * mapped.
126 *
127 * A permanent memory object may be mapped into more
128 * than one virtual address space. Moreover, two threads
129 * may attempt to make the first mapping of a memory
130 * object concurrently. Only one thread is allowed to
131 * complete this mapping; all others wait for the
132 * "pager_initialized" field is asserted, indicating
133 * that the first thread has initialized all of the
134 * necessary fields in the virtual memory object structure.
135 *
136 * The kernel relies on a *default memory manager* to
137 * provide backing storage for the zero-filled virtual
138 * memory objects. The memory object ports associated
139 * with these temporary virtual memory objects are only
140 * generated and passed to the default memory manager
141 * when it becomes necessary. Virtual memory objects
142 * that depend on the default memory manager are called
143 * "internal". The "pager_created" field is provided to
144 * indicate whether these ports have ever been allocated.
145 *
146 * The kernel may also create virtual memory objects to
147 * hold changed pages after a copy-on-write operation.
148 * In this case, the virtual memory object (and its
149 * backing storage -- its memory object) only contain
150 * those pages that have been changed. The "shadow"
151 * field refers to the virtual memory object that contains
152 * the remainder of the contents. The "shadow_offset"
153 * field indicates where in the "shadow" these contents begin.
154 * The "copy" field refers to a virtual memory object
155 * to which changed pages must be copied before changing
156 * this object, in order to implement another form
157 * of copy-on-write optimization.
158 *
159 * The virtual memory object structure also records
160 * the attributes associated with its memory object.
161 * The "pager_ready", "can_persist" and "copy_strategy"
162 * fields represent those attributes. The "cached_list"
163 * field is used in the implementation of the persistence
164 * attribute.
165 *
166 * ZZZ Continue this comment.
167 */
168
169/* Forward declarations for internal functions. */
170extern void _vm_object_allocate(
171 vm_object_size_t size,
172 vm_object_t object);
173
174extern kern_return_t vm_object_terminate(
175 vm_object_t object);
176
177extern void vm_object_remove(
178 vm_object_t object);
179
180extern vm_object_t vm_object_cache_trim(
181 boolean_t called_from_vm_object_deallocate);
182
183extern void vm_object_deactivate_pages(
184 vm_object_t object);
185
186extern void vm_object_abort_activity(
187 vm_object_t object);
188
189extern kern_return_t vm_object_copy_call(
190 vm_object_t src_object,
191 vm_object_offset_t src_offset,
192 vm_object_size_t size,
193 vm_object_t *_result_object);
194
195extern void vm_object_do_collapse(
196 vm_object_t object,
197 vm_object_t backing_object);
198
199extern void vm_object_do_bypass(
200 vm_object_t object,
201 vm_object_t backing_object);
202
203extern void memory_object_release(
204 ipc_port_t pager,
205 pager_request_t pager_request);
206
207zone_t vm_object_zone; /* vm backing store zone */
208
209/*
210 * All wired-down kernel memory belongs to a single virtual
211 * memory object (kernel_object) to avoid wasting data structures.
212 */
213struct vm_object kernel_object_store;
214vm_object_t kernel_object = &kernel_object_store;
215
216/*
217 * The submap object is used as a placeholder for vm_map_submap
218 * operations. The object is declared in vm_map.c because it
219 * is exported by the vm_map module. The storage is declared
220 * here because it must be initialized here.
221 */
222struct vm_object vm_submap_object_store;
223
224/*
225 * Virtual memory objects are initialized from
226 * a template (see vm_object_allocate).
227 *
228 * When adding a new field to the virtual memory
229 * object structure, be sure to add initialization
230 * (see vm_object_init).
231 */
232struct vm_object vm_object_template;
233
234/*
235 * Virtual memory objects that are not referenced by
236 * any address maps, but that are allowed to persist
237 * (an attribute specified by the associated memory manager),
238 * are kept in a queue (vm_object_cached_list).
239 *
240 * When an object from this queue is referenced again,
241 * for example to make another address space mapping,
242 * it must be removed from the queue. That is, the
243 * queue contains *only* objects with zero references.
244 *
245 * The kernel may choose to terminate objects from this
246 * queue in order to reclaim storage. The current policy
247 * is to permit a fixed maximum number of unreferenced
248 * objects (vm_object_cached_max).
249 *
250 * A spin lock (accessed by routines
251 * vm_object_cache_{lock,lock_try,unlock}) governs the
252 * object cache. It must be held when objects are
253 * added to or removed from the cache (in vm_object_terminate).
254 * The routines that acquire a reference to a virtual
255 * memory object based on one of the memory object ports
256 * must also lock the cache.
257 *
258 * Ideally, the object cache should be more isolated
259 * from the reference mechanism, so that the lock need
260 * not be held to make simple references.
261 */
262queue_head_t vm_object_cached_list;
263int vm_object_cached_count;
264int vm_object_cached_high; /* highest # of cached objects */
265int vm_object_cached_max = 500; /* may be patched*/
266
267decl_mutex_data(,vm_object_cached_lock_data)
268
269#define vm_object_cache_lock() \
270 mutex_lock(&vm_object_cached_lock_data)
271#define vm_object_cache_lock_try() \
272 mutex_try(&vm_object_cached_lock_data)
273#define vm_object_cache_unlock() \
274 mutex_unlock(&vm_object_cached_lock_data)
275
276#define VM_OBJECT_HASH_COUNT 1024
277queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
278struct zone *vm_object_hash_zone;
279
280struct vm_object_hash_entry {
281 queue_chain_t hash_link; /* hash chain link */
282 ipc_port_t pager; /* pager we represent */
283 vm_object_t object; /* corresponding object */
284 boolean_t waiting; /* someone waiting for
285 * termination */
286};
287
288typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
289#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
290
291#define VM_OBJECT_HASH_SHIFT 8
292#define vm_object_hash(pager) \
293 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
294
295/*
296 * vm_object_hash_lookup looks up a pager in the hashtable
297 * and returns the corresponding entry, with optional removal.
298 */
299
300vm_object_hash_entry_t
301vm_object_hash_lookup(
302 ipc_port_t pager,
303 boolean_t remove_entry)
304{
305 register queue_t bucket;
306 register vm_object_hash_entry_t entry;
307
308 bucket = &vm_object_hashtable[vm_object_hash(pager)];
309
310 entry = (vm_object_hash_entry_t)queue_first(bucket);
311 while (!queue_end(bucket, (queue_entry_t)entry)) {
312 if (entry->pager == pager && !remove_entry)
313 return(entry);
314 else if (entry->pager == pager) {
315 queue_remove(bucket, entry,
316 vm_object_hash_entry_t, hash_link);
317 return(entry);
318 }
319
320 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
321 }
322
323 return(VM_OBJECT_HASH_ENTRY_NULL);
324}
325
326/*
327 * vm_object_hash_enter enters the specified
328 * pager / cache object association in the hashtable.
329 */
330
331void
332vm_object_hash_insert(
333 vm_object_hash_entry_t entry)
334{
335 register queue_t bucket;
336
337 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
338
339 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
340}
341
342vm_object_hash_entry_t
343vm_object_hash_entry_alloc(
344 ipc_port_t pager)
345{
346 vm_object_hash_entry_t entry;
347
348 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
349 entry->pager = pager;
350 entry->object = VM_OBJECT_NULL;
351 entry->waiting = FALSE;
352
353 return(entry);
354}
355
356void
357vm_object_hash_entry_free(
358 vm_object_hash_entry_t entry)
359{
360 zfree(vm_object_hash_zone, (vm_offset_t)entry);
361}
362
363/*
364 * vm_object_allocate:
365 *
366 * Returns a new object with the given size.
367 */
368
369void
370_vm_object_allocate(
371 vm_object_size_t size,
372 vm_object_t object)
373{
374 XPR(XPR_VM_OBJECT,
375 "vm_object_allocate, object 0x%X size 0x%X\n",
376 (integer_t)object, size, 0,0,0);
377
378 *object = vm_object_template;
379 queue_init(&object->memq);
380 queue_init(&object->msr_q);
381#ifdef UBC_DEBUG
382 queue_init(&object->uplq);
383#endif /* UBC_DEBUG */
384 vm_object_lock_init(object);
385 object->size = size;
386}
387
388vm_object_t
389vm_object_allocate(
390 vm_object_size_t size)
391{
392 register vm_object_t object;
393 register ipc_port_t port;
394
395 object = (vm_object_t) zalloc(vm_object_zone);
396
397// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
398
399 _vm_object_allocate(size, object);
400
401 return object;
402}
403
404/*
405 * vm_object_bootstrap:
406 *
407 * Initialize the VM objects module.
408 */
409void
410vm_object_bootstrap(void)
411{
412 register i;
413
414 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
415 round_page(512*1024),
416 round_page(12*1024),
417 "vm objects");
418
419 queue_init(&vm_object_cached_list);
420 mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE);
421
422 vm_object_hash_zone =
423 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
424 round_page(512*1024),
425 round_page(12*1024),
426 "vm object hash entries");
427
428 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
429 queue_init(&vm_object_hashtable[i]);
430
431 /*
432 * Fill in a template object, for quick initialization
433 */
434
435 /* memq; Lock; init after allocation */
436 vm_object_template.size = 0;
437 vm_object_template.frozen_size = 0;
438 vm_object_template.ref_count = 1;
439#if TASK_SWAPPER
440 vm_object_template.res_count = 1;
441#endif /* TASK_SWAPPER */
442 vm_object_template.resident_page_count = 0;
443 vm_object_template.copy = VM_OBJECT_NULL;
444 vm_object_template.shadow = VM_OBJECT_NULL;
445 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
446 vm_object_template.true_share = FALSE;
447
448 vm_object_template.pager = IP_NULL;
449 vm_object_template.paging_offset = 0;
450 vm_object_template.pager_request = PAGER_REQUEST_NULL;
451 /* msr_q; init after allocation */
452
453 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
454 vm_object_template.absent_count = 0;
455 vm_object_template.paging_in_progress = 0;
456
457 /* Begin bitfields */
458 vm_object_template.all_wanted = 0; /* all bits FALSE */
459 vm_object_template.pager_created = FALSE;
460 vm_object_template.pager_initialized = FALSE;
461 vm_object_template.pager_ready = FALSE;
462 vm_object_template.pager_trusted = FALSE;
463 vm_object_template.can_persist = FALSE;
464 vm_object_template.internal = TRUE;
465 vm_object_template.temporary = TRUE;
466 vm_object_template.private = FALSE;
467 vm_object_template.pageout = FALSE;
468 vm_object_template.alive = TRUE;
469 vm_object_template.lock_in_progress = FALSE;
470 vm_object_template.lock_restart = FALSE;
471 vm_object_template.silent_overwrite = FALSE;
472 vm_object_template.advisory_pageout = FALSE;
473 vm_object_template.shadowed = FALSE;
474 vm_object_template.terminating = FALSE;
475 vm_object_template.shadow_severed = FALSE;
476 vm_object_template.phys_contiguous = FALSE;
477 /* End bitfields */
478
479 /* cached_list; init after allocation */
480 vm_object_template.last_alloc = (vm_object_offset_t) 0;
481 vm_object_template.cluster_size = 0;
482#if MACH_PAGEMAP
483 vm_object_template.existence_map = VM_EXTERNAL_NULL;
484#endif /* MACH_PAGEMAP */
485#if MACH_ASSERT
486 vm_object_template.paging_object = VM_OBJECT_NULL;
487#endif /* MACH_ASSERT */
488
489 /*
490 * Initialize the "kernel object"
491 */
492
493 kernel_object = &kernel_object_store;
494
495/*
496 * Note that in the following size specifications, we need to add 1 because
497 * VM_MAX_KERNEL_ADDRESS is a maximum address, not a size.
498 */
499 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
500 kernel_object);
501
502 /*
503 * Initialize the "submap object". Make it as large as the
504 * kernel object so that no limit is imposed on submap sizes.
505 */
506
507 vm_submap_object = &vm_submap_object_store;
508 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
509 vm_submap_object);
510 /*
511 * Create an "extra" reference to this object so that we never
512 * try to deallocate it; zfree doesn't like to be called with
513 * non-zone memory.
514 */
515 vm_object_reference(vm_submap_object);
516
517#if MACH_PAGEMAP
518 vm_external_module_initialize();
519#endif /* MACH_PAGEMAP */
520}
521
522void
523vm_object_init(void)
524{
525 /*
526 * Finish initializing the kernel object.
527 */
528}
529
530#if TASK_SWAPPER
531/*
532 * vm_object_res_deallocate
533 *
534 * (recursively) decrement residence counts on vm objects and their shadows.
535 * Called from vm_object_deallocate and when swapping out an object.
536 *
537 * The object is locked, and remains locked throughout the function,
538 * even as we iterate down the shadow chain. Locks on intermediate objects
539 * will be dropped, but not the original object.
540 *
541 * NOTE: this function used to use recursion, rather than iteration.
542 */
543
544void
545vm_object_res_deallocate(
546 vm_object_t object)
547{
548 vm_object_t orig_object = object;
549 /*
550 * Object is locked so it can be called directly
551 * from vm_object_deallocate. Original object is never
552 * unlocked.
553 */
554 assert(object->res_count > 0);
555 while (--object->res_count == 0) {
556 assert(object->ref_count >= object->res_count);
557 vm_object_deactivate_pages(object);
558 /* iterate on shadow, if present */
559 if (object->shadow != VM_OBJECT_NULL) {
560 vm_object_t tmp_object = object->shadow;
561 vm_object_lock(tmp_object);
562 if (object != orig_object)
563 vm_object_unlock(object);
564 object = tmp_object;
565 assert(object->res_count > 0);
566 } else
567 break;
568 }
569 if (object != orig_object)
570 vm_object_unlock(object);
571}
572
573/*
574 * vm_object_res_reference
575 *
576 * Internal function to increment residence count on a vm object
577 * and its shadows. It is called only from vm_object_reference, and
578 * when swapping in a vm object, via vm_map_swap.
579 *
580 * The object is locked, and remains locked throughout the function,
581 * even as we iterate down the shadow chain. Locks on intermediate objects
582 * will be dropped, but not the original object.
583 *
584 * NOTE: this function used to use recursion, rather than iteration.
585 */
586
587void
588vm_object_res_reference(
589 vm_object_t object)
590{
591 vm_object_t orig_object = object;
592 /*
593 * Object is locked, so this can be called directly
594 * from vm_object_reference. This lock is never released.
595 */
596 while ((++object->res_count == 1) &&
597 (object->shadow != VM_OBJECT_NULL)) {
598 vm_object_t tmp_object = object->shadow;
599
600 assert(object->ref_count >= object->res_count);
601 vm_object_lock(tmp_object);
602 if (object != orig_object)
603 vm_object_unlock(object);
604 object = tmp_object;
605 }
606 if (object != orig_object)
607 vm_object_unlock(object);
608 assert(orig_object->ref_count >= orig_object->res_count);
609}
610#endif /* TASK_SWAPPER */
611
612#if MACH_ASSERT
613/*
614 * vm_object_reference:
615 *
616 * Gets another reference to the given object.
617 */
618void
619vm_object_reference(
620 register vm_object_t object)
621{
622 if (object == VM_OBJECT_NULL)
623 return;
624
625 vm_object_lock(object);
626 assert(object->ref_count > 0);
627 object->ref_count++;
628 vm_object_res_reference(object);
629 vm_object_unlock(object);
630}
631#endif /* MACH_ASSERT */
632
633/* remove the typedef below when emergency work-around is taken out */
634typedef struct vnode_pager {
635 ipc_port_t pager; /* pager */
636 ipc_port_t pager_handle; /* pager handle */
637 ipc_port_t vm_obj_handle; /* memory object's control handle */
638 void *vnode_handle; /* vnode handle */
639} *vnode_pager_t;
640
641#define MIGHT_NOT_CACHE_SHADOWS 1
642#if MIGHT_NOT_CACHE_SHADOWS
643int cache_shadows = TRUE;
644#endif /* MIGHT_NOT_CACHE_SHADOWS */
645
646/*
647 * vm_object_deallocate:
648 *
649 * Release a reference to the specified object,
650 * gained either through a vm_object_allocate
651 * or a vm_object_reference call. When all references
652 * are gone, storage associated with this object
653 * may be relinquished.
654 *
655 * No object may be locked.
656 */
657void
658vm_object_deallocate(
659 register vm_object_t object)
660{
661 boolean_t retry_cache_trim = FALSE;
662 vm_object_t shadow;
663
664// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
665// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
666
667
668 while (object != VM_OBJECT_NULL) {
669
670 /*
671 * The cache holds a reference (uncounted) to
672 * the object; we must lock it before removing
673 * the object.
674 */
675
676 vm_object_cache_lock();
677 vm_object_lock(object);
678 assert(object->alive);
679
680 /*
681 * Lose the reference. If other references
682 * remain, then we are done, unless we need
683 * to retry a cache trim.
684 * If it is the last reference, then keep it
685 * until any pending initialization is completed.
686 */
687
688 assert(object->ref_count > 0);
689 if ((object->ref_count > 1) || (object->terminating)) {
690 /* if the object is terminating, it cannot go into */
691 /* the cache and we obviously should not call */
692 /* terminate again. */
693 object->ref_count--;
694 {
695 /* The following is an emergency work-around for */
696 /* no-mappings left notification to UBC. This fix */
697 /* violates numerous layering boundaries, is not */
698 /* provable with respect to races for new mappings */
699 /* from the UBC layer and is just plain ugly. The */
700 /* proper fix requires a guarantee of state */
701 /* between the vnode and the memory object and a */
702 /* sequenced delivery of empty status. This can */
703 /* be provided by the object_named interface and */
704 /* the effort to convert over should be undertaken */
705 /* at the earliest possible moment. */
706 if(object->ref_count == 1) {
707 vnode_pager_t vnode_pager;
708 if(object->pager) {
709 vnode_pager = (vnode_pager_t)
710 vnode_port_hash_lookup(
711 object->pager);
712 if(vnode_pager) {
713 extern void ubc_unmap(void *);
714 ubc_unmap(vnode_pager->vnode_handle);
715 }
716 }
717 }
718 }
719 vm_object_res_deallocate(object);
720 vm_object_unlock(object);
721 vm_object_cache_unlock();
722 if (retry_cache_trim &&
723 ((object = vm_object_cache_trim(TRUE)) !=
724 VM_OBJECT_NULL)) {
725 continue;
726 }
727 return;
728 }
729
730 /*
731 * We have to wait for initialization
732 * before destroying or caching the object.
733 */
734
735 if (object->pager_created && ! object->pager_initialized) {
736 assert(! object->can_persist);
737 vm_object_assert_wait(object,
738 VM_OBJECT_EVENT_INITIALIZED,
739 THREAD_UNINT);
740 vm_object_unlock(object);
741 vm_object_cache_unlock();
742 thread_block((void (*)(void))0);
743 continue;
744 }
745
746 /*
747 * If this object can persist, then enter it in
748 * the cache. Otherwise, terminate it.
749 *
750 * NOTE: Only permanent objects are cached, and
751 * permanent objects cannot have shadows. This
752 * affects the residence counting logic in a minor
753 * way (can do it in-line, mostly).
754 */
755
756 if (object->can_persist) {
757 /*
758 * Now it is safe to decrement reference count,
759 * and to return if reference count is > 0.
760 */
761 if (--object->ref_count > 0) {
762 vm_object_res_deallocate(object);
763 vm_object_unlock(object);
764 vm_object_cache_unlock();
765 if (retry_cache_trim &&
766 ((object = vm_object_cache_trim(TRUE)) !=
767 VM_OBJECT_NULL)) {
768 continue;
769 }
770 return;
771 }
772
773#if MIGHT_NOT_CACHE_SHADOWS
774 /*
775 * Remove shadow now if we don't
776 * want to cache shadows.
777 */
778 if (! cache_shadows) {
779 shadow = object->shadow;
780 object->shadow = VM_OBJECT_NULL;
781 }
782#endif /* MIGHT_NOT_CACHE_SHADOWS */
783
784 /*
785 * Enter the object onto the queue of
786 * cached objects, and deactivate
787 * all of its pages.
788 */
789 assert(object->shadow == VM_OBJECT_NULL);
790 VM_OBJ_RES_DECR(object);
791 XPR(XPR_VM_OBJECT,
792 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
793 (integer_t)object,
794 (integer_t)vm_object_cached_list.next,
795 (integer_t)vm_object_cached_list.prev,0,0);
796
797 vm_object_cached_count++;
798 if (vm_object_cached_count > vm_object_cached_high)
799 vm_object_cached_high = vm_object_cached_count;
800 queue_enter(&vm_object_cached_list, object,
801 vm_object_t, cached_list);
802 vm_object_cache_unlock();
803 vm_object_deactivate_pages(object);
804 vm_object_unlock(object);
805
806#if MIGHT_NOT_CACHE_SHADOWS
807 /*
808 * If we have a shadow that we need
809 * to deallocate, do so now, remembering
810 * to trim the cache later.
811 */
812 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
813 object = shadow;
814 retry_cache_trim = TRUE;
815 continue;
816 }
817#endif /* MIGHT_NOT_CACHE_SHADOWS */
818
819 /*
820 * Trim the cache. If the cache trim
821 * returns with a shadow for us to deallocate,
822 * then remember to retry the cache trim
823 * when we are done deallocating the shadow.
824 * Otherwise, we are done.
825 */
826
827 object = vm_object_cache_trim(TRUE);
828 if (object == VM_OBJECT_NULL) {
829 return;
830 }
831 retry_cache_trim = TRUE;
832
833 } else {
834 /*
835 * This object is not cachable; terminate it.
836 */
837 XPR(XPR_VM_OBJECT,
838 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n",
839 (integer_t)object, object->resident_page_count,
840 object->paging_in_progress,
841 (natural_t)current_thread(),object->ref_count);
842
843 VM_OBJ_RES_DECR(object); /* XXX ? */
844 /*
845 * Terminate this object. If it had a shadow,
846 * then deallocate it; otherwise, if we need
847 * to retry a cache trim, do so now; otherwise,
848 * we are done. "pageout" objects have a shadow,
849 * but maintain a "paging reference" rather than
850 * a normal reference.
851 */
852 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
853 if(vm_object_terminate(object) != KERN_SUCCESS) {
854 return;
855 }
856 if (shadow != VM_OBJECT_NULL) {
857 object = shadow;
858 continue;
859 }
860 if (retry_cache_trim &&
861 ((object = vm_object_cache_trim(TRUE)) !=
862 VM_OBJECT_NULL)) {
863 continue;
864 }
865 return;
866 }
867 }
868 assert(! retry_cache_trim);
869}
870
871/*
872 * Check to see whether we really need to trim
873 * down the cache. If so, remove an object from
874 * the cache, terminate it, and repeat.
875 *
876 * Called with, and returns with, cache lock unlocked.
877 */
878vm_object_t
879vm_object_cache_trim(
880 boolean_t called_from_vm_object_deallocate)
881{
882 register vm_object_t object = VM_OBJECT_NULL;
883 vm_object_t shadow;
884
885 for (;;) {
886
887 /*
888 * If we no longer need to trim the cache,
889 * then we are done.
890 */
891
892 vm_object_cache_lock();
893 if (vm_object_cached_count <= vm_object_cached_max) {
894 vm_object_cache_unlock();
895 return VM_OBJECT_NULL;
896 }
897
898 /*
899 * We must trim down the cache, so remove
900 * the first object in the cache.
901 */
902 XPR(XPR_VM_OBJECT,
903 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
904 (integer_t)vm_object_cached_list.next,
905 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
906
907 object = (vm_object_t) queue_first(&vm_object_cached_list);
908 vm_object_lock(object);
909 queue_remove(&vm_object_cached_list, object, vm_object_t,
910 cached_list);
911 vm_object_cached_count--;
912
913 /*
914 * Since this object is in the cache, we know
915 * that it is initialized and has no references.
916 * Take a reference to avoid recursive deallocations.
917 */
918
919 assert(object->pager_initialized);
920 assert(object->ref_count == 0);
921 object->ref_count++;
922
923 /*
924 * Terminate the object.
925 * If the object had a shadow, we let vm_object_deallocate
926 * deallocate it. "pageout" objects have a shadow, but
927 * maintain a "paging reference" rather than a normal
928 * reference.
929 * (We are careful here to limit recursion.)
930 */
931 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
932 if(vm_object_terminate(object) != KERN_SUCCESS)
933 continue;
934 if (shadow != VM_OBJECT_NULL) {
935 if (called_from_vm_object_deallocate) {
936 return shadow;
937 } else {
938 vm_object_deallocate(shadow);
939 }
940 }
941 }
942}
943
944boolean_t vm_object_terminate_remove_all = FALSE;
945
946/*
947 * Routine: vm_object_terminate
948 * Purpose:
949 * Free all resources associated with a vm_object.
950 * In/out conditions:
951 * Upon entry, the object and the cache must be locked,
952 * and the object must have exactly one reference.
953 *
954 * The shadow object reference is left alone.
955 *
956 * The object must be unlocked if its found that pages
957 * must be flushed to a backing object. If someone
958 * manages to map the object while it is being flushed
959 * the object is returned unlocked and unchanged. Otherwise,
960 * upon exit, the cache will be unlocked, and the
961 * object will cease to exist.
962 */
963kern_return_t
964vm_object_terminate(
965 register vm_object_t object)
966{
967 register vm_page_t p;
968 vm_object_t shadow_object;
969
970 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
971 (integer_t)object, object->ref_count, 0, 0, 0);
972
973 /*
974 * Make sure the object isn't already being terminated
975 */
976
977 assert(object->alive);
978 if(object->terminating) {
979 vm_object_cache_unlock();
980 object->ref_count -= 1;
981 vm_object_unlock(object);
982 return KERN_FAILURE;
983 }
984 object->terminating = TRUE;
985
986 vm_object_cache_unlock();
987 if (!object->pageout && (!object->temporary || object->can_persist)
988 && (object->pager != NULL || object->shadow_severed)) {
989 while (!queue_empty(&object->memq)) {
990 /*
991 * Clear pager_trusted bit so that the pages get yanked
992 * out of the object instead of cleaned in place. This
993 * prevents a deadlock in XMM and makes more sense anyway.
994 */
995 object->pager_trusted = FALSE;
996
997 p = (vm_page_t) queue_first(&object->memq);
998
999 VM_PAGE_CHECK(p);
1000
1001 if (p->busy || p->cleaning) {
1002 if(p->cleaning || p->absent) {
1003 vm_object_paging_wait(object, THREAD_UNINT);
1004 continue;
1005 } else {
1006 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
1007 }
1008 }
1009
1010 vm_page_lock_queues();
1011 VM_PAGE_QUEUES_REMOVE(p);
1012 vm_page_unlock_queues();
1013
1014 if (p->absent || p->private) {
1015
1016 /*
1017 * For private pages, VM_PAGE_FREE just
1018 * leaves the page structure around for
1019 * its owner to clean up. For absent
1020 * pages, the structure is returned to
1021 * the appropriate pool.
1022 */
1023
1024 goto free_page;
1025 }
1026
1027 if (p->fictitious)
1028 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
1029
1030 if (!p->dirty)
1031 p->dirty = pmap_is_modified(p->phys_addr);
1032
1033 if (p->dirty || p->precious) {
1034 p->busy = TRUE;
1035 vm_object_paging_begin(object);
1036 /* protect the object from re-use/caching while it */
1037 /* is unlocked */
1038 vm_object_unlock(object);
1039 vm_pageout_cluster(p); /* flush page */
1040 vm_object_lock(object);
1041 vm_object_paging_wait(object, THREAD_UNINT);
1042 XPR(XPR_VM_OBJECT,
1043 "vm_object_terminate restart, object 0x%X ref %d\n",
1044 (integer_t)object, object->ref_count, 0, 0, 0);
1045 } else {
1046 free_page:
1047 VM_PAGE_FREE(p);
1048 }
1049 }
1050 }
1051 if(object->ref_count != 1) {
1052 object->ref_count -= 1;
1053 vm_object_res_deallocate(object);
1054 object->terminating = FALSE;
1055 /* kick off anyone waiting on terminating */
1056 vm_object_paging_begin(object);
1057 vm_object_paging_end(object);
1058 vm_object_unlock(object);
1059 return KERN_FAILURE;
1060 }
1061
1062 object->alive = FALSE;
1063
1064 /*
1065 * Make sure no one can look us up now.
1066 */
1067
1068 vm_object_cache_lock();
1069
1070 if(object->pager != IP_NULL) {
1071 vm_object_hash_entry_t entry;
1072
1073 entry = vm_object_hash_lookup(object->pager, FALSE);
1074 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1075 entry->object = VM_OBJECT_NULL;
1076 }
1077
1078 vm_object_cache_unlock();
1079
1080 /*
1081 * Detach the object from its shadow if we are the shadow's
1082 * copy.
1083 */
1084 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1085 !(object->pageout)) {
1086 vm_object_lock(shadow_object);
1087 assert((shadow_object->copy == object) ||
1088 (shadow_object->copy == VM_OBJECT_NULL));
1089 shadow_object->copy = VM_OBJECT_NULL;
1090 vm_object_unlock(shadow_object);
1091 }
1092
1093 /*
1094 * The pageout daemon might be playing with our pages.
1095 * Now that the object is dead, it won't touch any more
1096 * pages, but some pages might already be on their way out.
1097 * Hence, we wait until the active paging activities have ceased.
1098 */
1099 vm_object_paging_wait(object, THREAD_UNINT);
1100 object->ref_count--;
1101#if TASK_SWAPPER
1102 assert(object->res_count == 0);
1103#endif /* TASK_SWAPPER */
1104
1105Restart:
1106 assert (object->ref_count == 0);
1107
1108 /*
1109 * Clean or free the pages, as appropriate.
1110 * It is possible for us to find busy/absent pages,
1111 * if some faults on this object were aborted.
1112 */
1113 if (object->pageout) {
1114 assert(shadow_object != VM_OBJECT_NULL);
1115 assert(shadow_object == object->shadow);
1116
1117 vm_pageout_object_terminate(object);
1118
1119 } else if (object->temporary && ! object->can_persist ||
1120 object->pager == IP_NULL) {
1121 while (!queue_empty(&object->memq)) {
1122 p = (vm_page_t) queue_first(&object->memq);
1123
1124 VM_PAGE_CHECK(p);
1125 VM_PAGE_FREE(p);
1126 }
1127 } else if (!queue_empty(&object->memq)) {
1128 panic("vm_object_terminate: queue just emptied isn't");
1129 }
1130
1131 assert(object->paging_in_progress == 0);
1132 assert(object->ref_count == 0);
1133
1134 vm_object_remove(object);
1135
1136 /*
1137 * Throw away port rights... note that they may
1138 * already have been thrown away (by vm_object_destroy
1139 * or memory_object_destroy).
1140 *
1141 * Instead of destroying the control port,
1142 * we send all rights off to the memory manager,
1143 * using memory_object_terminate.
1144 */
1145
1146 vm_object_unlock(object);
1147 if (object->pager != IP_NULL) {
1148 /* consumes our rights for pager, pager_request */
1149 memory_object_release(object->pager, object->pager_request);
1150 }
1151 /* kick off anyone waiting on terminating */
1152 vm_object_lock(object);
1153 vm_object_paging_begin(object);
1154 vm_object_paging_end(object);
1155 vm_object_unlock(object);
1156
1157#if MACH_PAGEMAP
1158 vm_external_destroy(object->existence_map, object->size);
1159#endif /* MACH_PAGEMAP */
1160
1161 /*
1162 * Free the space for the object.
1163 */
1164
1165 zfree(vm_object_zone, (vm_offset_t) object);
1166 return KERN_SUCCESS;
1167}
1168
1169/*
1170 * Routine: vm_object_pager_wakeup
1171 * Purpose: Wake up anyone waiting for termination of a pager.
1172 */
1173
1174void
1175vm_object_pager_wakeup(
1176 ipc_port_t pager)
1177{
1178 vm_object_hash_entry_t entry;
1179 boolean_t waiting = FALSE;
1180
1181 /*
1182 * If anyone was waiting for the memory_object_terminate
1183 * to be queued, wake them up now.
1184 */
1185 vm_object_cache_lock();
1186 entry = vm_object_hash_lookup(pager, TRUE);
1187 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1188 waiting = entry->waiting;
1189 vm_object_cache_unlock();
1190 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1191 if (waiting)
1192 thread_wakeup((event_t) pager);
1193 vm_object_hash_entry_free(entry);
1194 }
1195}
1196
1197/*
1198 * memory_object_release_name:
1199 * Enforces name semantic on memory_object reference count decrement
1200 * This routine should not be called unless the caller holds a name
1201 * reference gained through the memory_object_named_create or the
1202 * memory_object_rename call.
1203 * If the TERMINATE_IDLE flag is set, the call will return if the
1204 * reference count is not 1. i.e. idle with the only remaining reference
1205 * being the name.
1206 * If the decision is made to proceed the name field flag is set to
1207 * false and the reference count is decremented. If the RESPECT_CACHE
1208 * flag is set and the reference count has gone to zero, the
1209 * memory_object is checked to see if it is cacheable otherwise when
1210 * the reference count is zero, it is simply terminated.
1211 */
1212
1213kern_return_t
1214memory_object_release_name(
1215 vm_object_t object,
1216 int flags)
1217{
1218 vm_object_t shadow;
1219 boolean_t original_object = TRUE;
1220
1221 while (object != VM_OBJECT_NULL) {
1222
1223 /*
1224 * The cache holds a reference (uncounted) to
1225 * the object. We must locke it before removing
1226 * the object.
1227 *
1228 */
1229
1230 vm_object_cache_lock();
1231 vm_object_lock(object);
1232 assert(object->alive);
1233 if(original_object)
1234 assert(object->named);
1235 assert(object->ref_count > 0);
1236
1237 /*
1238 * We have to wait for initialization before
1239 * destroying or caching the object.
1240 */
1241
1242 if (object->pager_created && !object->pager_initialized) {
1243 assert(!object->can_persist);
1244 vm_object_assert_wait(object,
1245 VM_OBJECT_EVENT_INITIALIZED,
1246 THREAD_UNINT);
1247 vm_object_unlock(object);
1248 vm_object_cache_unlock();
1249 thread_block((void (*)(void)) 0);
1250 continue;
1251 }
1252
1253 if (((object->ref_count > 1)
1254 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
1255 || (object->terminating)) {
1256 vm_object_unlock(object);
1257 vm_object_cache_unlock();
1258 return KERN_FAILURE;
1259 } else {
1260 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
1261 vm_object_unlock(object);
1262 vm_object_cache_unlock();
1263 return KERN_SUCCESS;
1264 }
1265 }
1266
1267 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
1268 (object->ref_count == 1)) {
1269 if(original_object)
1270 object->named = FALSE;
1271 vm_object_unlock(object);
1272 vm_object_cache_unlock();
1273 /* let vm_object_deallocate push this thing into */
1274 /* the cache, if that it is where it is bound */
1275 vm_object_deallocate(object);
1276 return KERN_SUCCESS;
1277 }
1278 VM_OBJ_RES_DECR(object);
1279 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
1280 if(object->ref_count == 1) {
1281 if(vm_object_terminate(object) != KERN_SUCCESS) {
1282 if(original_object) {
1283 return KERN_FAILURE;
1284 } else {
1285 return KERN_SUCCESS;
1286 }
1287 }
1288 if (shadow != VM_OBJECT_NULL) {
1289 original_object = FALSE;
1290 object = shadow;
1291 continue;
1292 }
1293 return KERN_SUCCESS;
1294 } else {
1295 object->ref_count--;
1296 if(original_object)
1297 object->named = FALSE;
1298 vm_object_unlock(object);
1299 vm_object_cache_unlock();
1300 return KERN_SUCCESS;
1301 }
1302 }
1303}
1304
1305/*
1306 * Routine: memory_object_release
1307 * Purpose: Terminate the pager and release port rights,
1308 * just like memory_object_terminate, except
1309 * that we wake up anyone blocked in vm_object_enter
1310 * waiting for termination message to be queued
1311 * before calling memory_object_init.
1312 */
1313void
1314memory_object_release(
1315 ipc_port_t pager,
1316 pager_request_t pager_request)
1317{
1318#ifdef MACH_BSD
1319 kern_return_t vnode_pager_terminate(ipc_port_t, ipc_port_t);
1320#endif
1321
1322 /*
1323 * Keep a reference to pager port;
1324 * the terminate might otherwise release all references.
1325 */
1326 ipc_port_copy_send(pager);
1327
1328 /*
1329 * Terminate the pager.
1330 */
1331
1332#ifdef MACH_BSD
1333 if(((rpc_subsystem_t)pager_mux_hash_lookup(pager)) ==
1334 ((rpc_subsystem_t) &vnode_pager_workaround)) {
1335 (void) vnode_pager_terminate(pager, pager_request);
1336 } else {
1337 (void) memory_object_terminate(pager, pager_request);
1338 }
1339#else
1340 (void) memory_object_terminate(pager, pager_request);
1341#endif
1342
1343 /*
1344 * Wakeup anyone waiting for this terminate
1345 */
1346 vm_object_pager_wakeup(pager);
1347
1348 /*
1349 * Release reference to pager port.
1350 */
1351 ipc_port_release_send(pager);
1352}
1353
1354/*
1355 * Routine: vm_object_abort_activity [internal use only]
1356 * Purpose:
1357 * Abort paging requests pending on this object.
1358 * In/out conditions:
1359 * The object is locked on entry and exit.
1360 */
1361void
1362vm_object_abort_activity(
1363 vm_object_t object)
1364{
1365 register
1366 vm_page_t p;
1367 vm_page_t next;
1368
1369 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
1370 (integer_t)object, 0, 0, 0, 0);
1371
1372 /*
1373 * Abort all activity that would be waiting
1374 * for a result on this memory object.
1375 *
1376 * We could also choose to destroy all pages
1377 * that we have in memory for this object, but
1378 * we don't.
1379 */
1380
1381 p = (vm_page_t) queue_first(&object->memq);
1382 while (!queue_end(&object->memq, (queue_entry_t) p)) {
1383 next = (vm_page_t) queue_next(&p->listq);
1384
1385 /*
1386 * If it's being paged in, destroy it.
1387 * If an unlock has been requested, start it again.
1388 */
1389
1390 if (p->busy && p->absent) {
1391 VM_PAGE_FREE(p);
1392 }
1393 else {
1394 if (p->unlock_request != VM_PROT_NONE)
1395 p->unlock_request = VM_PROT_NONE;
1396 PAGE_WAKEUP(p);
1397 }
1398
1399 p = next;
1400 }
1401
1402 /*
1403 * Wake up threads waiting for the memory object to
1404 * become ready.
1405 */
1406
1407 object->pager_ready = TRUE;
1408 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1409}
1410
1411/*
1412 * Routine: memory_object_destroy [user interface]
1413 * Purpose:
1414 * Shut down a memory object, despite the
1415 * presence of address map (or other) references
1416 * to the vm_object.
1417 */
1418kern_return_t
1419memory_object_destroy(
1420 register vm_object_t object,
1421 kern_return_t reason)
1422{
1423 ipc_port_t old_object;
1424 pager_request_t old_pager_request;
1425
1426#ifdef lint
1427 reason++;
1428#endif /* lint */
1429
1430 if (object == VM_OBJECT_NULL)
1431 return(KERN_SUCCESS);
1432
1433 /*
1434 * Remove the port associations immediately.
1435 *
1436 * This will prevent the memory manager from further
1437 * meddling. [If it wanted to flush data or make
1438 * other changes, it should have done so before performing
1439 * the destroy call.]
1440 */
1441
1442 vm_object_cache_lock();
1443 vm_object_lock(object);
1444 vm_object_remove(object);
1445 object->can_persist = FALSE;
1446 object->named = FALSE;
1447 vm_object_cache_unlock();
1448
1449 /*
1450 * Rip out the ports from the vm_object now... this
1451 * will prevent new memory_object calls from succeeding.
1452 */
1453
1454 old_object = object->pager;
1455 old_pager_request = object->pager_request;
1456
1457 object->pager = IP_NULL;
1458 object->pager_request = PAGER_REQUEST_NULL;
1459
1460 /*
1461 * Wait for existing paging activity (that might
1462 * have the old ports) to subside.
1463 */
1464
1465 vm_object_paging_wait(object, THREAD_UNINT);
1466 vm_object_unlock(object);
1467
1468 /*
1469 * Shut down the ports now.
1470 *
1471 * [Paging operations may be proceeding concurrently --
1472 * they'll get the null values established above.]
1473 */
1474
1475 if (old_object != IP_NULL) {
1476 /* consumes our rights for object, control */
1477 memory_object_release(old_object, old_pager_request);
1478 }
1479
1480 /*
1481 * Lose the reference that was donated for this routine
1482 */
1483
1484 vm_object_deallocate(object);
1485
1486 return(KERN_SUCCESS);
1487}
1488
1489/*
1490 * vm_object_deactivate_pages
1491 *
1492 * Deactivate all pages in the specified object. (Keep its pages
1493 * in memory even though it is no longer referenced.)
1494 *
1495 * The object must be locked.
1496 */
1497void
1498vm_object_deactivate_pages(
1499 register vm_object_t object)
1500{
1501 register vm_page_t p;
1502
1503 queue_iterate(&object->memq, p, vm_page_t, listq) {
1504 vm_page_lock_queues();
1505 if (!p->busy)
1506 vm_page_deactivate(p);
1507 vm_page_unlock_queues();
1508 }
1509}
1510
1511
1512/*
1513 * Routine: vm_object_pmap_protect
1514 *
1515 * Purpose:
1516 * Reduces the permission for all physical
1517 * pages in the specified object range.
1518 *
1519 * If removing write permission only, it is
1520 * sufficient to protect only the pages in
1521 * the top-level object; only those pages may
1522 * have write permission.
1523 *
1524 * If removing all access, we must follow the
1525 * shadow chain from the top-level object to
1526 * remove access to all pages in shadowed objects.
1527 *
1528 * The object must *not* be locked. The object must
1529 * be temporary/internal.
1530 *
1531 * If pmap is not NULL, this routine assumes that
1532 * the only mappings for the pages are in that
1533 * pmap.
1534 */
1535
1536void
1537vm_object_pmap_protect(
1538 register vm_object_t object,
1539 register vm_object_offset_t offset,
1540 vm_size_t size,
1541 pmap_t pmap,
1542 vm_offset_t pmap_start,
1543 vm_prot_t prot)
1544{
1545 if (object == VM_OBJECT_NULL)
1546 return;
1547
1548 vm_object_lock(object);
1549
1550 assert(object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
1551
1552 while (TRUE) {
1553 if (object->resident_page_count > atop(size) / 2 &&
1554 pmap != PMAP_NULL) {
1555 vm_object_unlock(object);
1556 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1557 return;
1558 }
1559
1560 {
1561 register vm_page_t p;
1562 register vm_object_offset_t end;
1563
1564 end = offset + size;
1565
1566 if (pmap != PMAP_NULL) {
1567 queue_iterate(&object->memq, p, vm_page_t, listq) {
1568 if (!p->fictitious &&
1569 (offset <= p->offset) && (p->offset < end)) {
1570
1571 vm_offset_t start = pmap_start +
1572 (vm_offset_t)(p->offset - offset);
1573
1574 pmap_protect(pmap, start, start + PAGE_SIZE, prot);
1575 }
1576 }
1577 } else {
1578 queue_iterate(&object->memq, p, vm_page_t, listq) {
1579 if (!p->fictitious &&
1580 (offset <= p->offset) && (p->offset < end)) {
1581
1582 pmap_page_protect(p->phys_addr,
1583 prot & ~p->page_lock);
1584 }
1585 }
1586 }
1587 }
1588
1589 if (prot == VM_PROT_NONE) {
1590 /*
1591 * Must follow shadow chain to remove access
1592 * to pages in shadowed objects.
1593 */
1594 register vm_object_t next_object;
1595
1596 next_object = object->shadow;
1597 if (next_object != VM_OBJECT_NULL) {
1598 offset += object->shadow_offset;
1599 vm_object_lock(next_object);
1600 vm_object_unlock(object);
1601 object = next_object;
1602 }
1603 else {
1604 /*
1605 * End of chain - we are done.
1606 */
1607 break;
1608 }
1609 }
1610 else {
1611 /*
1612 * Pages in shadowed objects may never have
1613 * write permission - we may stop here.
1614 */
1615 break;
1616 }
1617 }
1618
1619 vm_object_unlock(object);
1620}
1621
1622/*
1623 * Routine: vm_object_copy_slowly
1624 *
1625 * Description:
1626 * Copy the specified range of the source
1627 * virtual memory object without using
1628 * protection-based optimizations (such
1629 * as copy-on-write). The pages in the
1630 * region are actually copied.
1631 *
1632 * In/out conditions:
1633 * The caller must hold a reference and a lock
1634 * for the source virtual memory object. The source
1635 * object will be returned *unlocked*.
1636 *
1637 * Results:
1638 * If the copy is completed successfully, KERN_SUCCESS is
1639 * returned. If the caller asserted the interruptible
1640 * argument, and an interruption occurred while waiting
1641 * for a user-generated event, MACH_SEND_INTERRUPTED is
1642 * returned. Other values may be returned to indicate
1643 * hard errors during the copy operation.
1644 *
1645 * A new virtual memory object is returned in a
1646 * parameter (_result_object). The contents of this
1647 * new object, starting at a zero offset, are a copy
1648 * of the source memory region. In the event of
1649 * an error, this parameter will contain the value
1650 * VM_OBJECT_NULL.
1651 */
1652kern_return_t
1653vm_object_copy_slowly(
1654 register vm_object_t src_object,
1655 vm_object_offset_t src_offset,
1656 vm_object_size_t size,
1657 boolean_t interruptible,
1658 vm_object_t *_result_object) /* OUT */
1659{
1660 vm_object_t new_object;
1661 vm_object_offset_t new_offset;
1662
1663 vm_object_offset_t src_lo_offset = src_offset;
1664 vm_object_offset_t src_hi_offset = src_offset + size;
1665
1666 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1667 src_object, src_offset, size, 0, 0);
1668
1669 if (size == 0) {
1670 vm_object_unlock(src_object);
1671 *_result_object = VM_OBJECT_NULL;
1672 return(KERN_INVALID_ARGUMENT);
1673 }
1674
1675 /*
1676 * Prevent destruction of the source object while we copy.
1677 */
1678
1679 assert(src_object->ref_count > 0);
1680 src_object->ref_count++;
1681 VM_OBJ_RES_INCR(src_object);
1682 vm_object_unlock(src_object);
1683
1684 /*
1685 * Create a new object to hold the copied pages.
1686 * A few notes:
1687 * We fill the new object starting at offset 0,
1688 * regardless of the input offset.
1689 * We don't bother to lock the new object within
1690 * this routine, since we have the only reference.
1691 */
1692
1693 new_object = vm_object_allocate(size);
1694 new_offset = 0;
1695
1696 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1697
1698 for ( ;
1699 size != 0 ;
1700 src_offset += PAGE_SIZE_64,
1701 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1702 ) {
1703 vm_page_t new_page;
1704 vm_fault_return_t result;
1705
1706 while ((new_page = vm_page_alloc(new_object, new_offset))
1707 == VM_PAGE_NULL) {
1708 if (!vm_page_wait(interruptible)) {
1709 vm_object_deallocate(new_object);
1710 *_result_object = VM_OBJECT_NULL;
1711 return(MACH_SEND_INTERRUPTED);
1712 }
1713 }
1714
1715 do {
1716 vm_prot_t prot = VM_PROT_READ;
1717 vm_page_t _result_page;
1718 vm_page_t top_page;
1719 register
1720 vm_page_t result_page;
1721 kern_return_t error_code;
1722
1723 vm_object_lock(src_object);
1724 vm_object_paging_begin(src_object);
1725
1726 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1727 result = vm_fault_page(src_object, src_offset,
1728 VM_PROT_READ, FALSE, interruptible,
1729 src_lo_offset, src_hi_offset,
1730 VM_BEHAVIOR_SEQUENTIAL,
1731 &prot, &_result_page, &top_page,
1732 (int *)0,
1733 &error_code, FALSE, FALSE);
1734
1735 switch(result) {
1736 case VM_FAULT_SUCCESS:
1737 result_page = _result_page;
1738
1739 /*
1740 * We don't need to hold the object
1741 * lock -- the busy page will be enough.
1742 * [We don't care about picking up any
1743 * new modifications.]
1744 *
1745 * Copy the page to the new object.
1746 *
1747 * POLICY DECISION:
1748 * If result_page is clean,
1749 * we could steal it instead
1750 * of copying.
1751 */
1752
1753 vm_object_unlock(result_page->object);
1754 vm_page_copy(result_page, new_page);
1755
1756 /*
1757 * Let go of both pages (make them
1758 * not busy, perform wakeup, activate).
1759 */
1760
1761 new_page->busy = FALSE;
1762 new_page->dirty = TRUE;
1763 vm_object_lock(result_page->object);
1764 PAGE_WAKEUP_DONE(result_page);
1765
1766 vm_page_lock_queues();
1767 if (!result_page->active &&
1768 !result_page->inactive)
1769 vm_page_activate(result_page);
1770 vm_page_activate(new_page);
1771 vm_page_unlock_queues();
1772
1773 /*
1774 * Release paging references and
1775 * top-level placeholder page, if any.
1776 */
1777
1778 vm_fault_cleanup(result_page->object,
1779 top_page);
1780
1781 break;
1782
1783 case VM_FAULT_RETRY:
1784 break;
1785
1786 case VM_FAULT_FICTITIOUS_SHORTAGE:
1787 vm_page_more_fictitious();
1788 break;
1789
1790 case VM_FAULT_MEMORY_SHORTAGE:
1791 if (vm_page_wait(interruptible))
1792 break;
1793 /* fall thru */
1794
1795 case VM_FAULT_INTERRUPTED:
1796 vm_page_free(new_page);
1797 vm_object_deallocate(new_object);
1798 vm_object_deallocate(src_object);
1799 *_result_object = VM_OBJECT_NULL;
1800 return(MACH_SEND_INTERRUPTED);
1801
1802 case VM_FAULT_MEMORY_ERROR:
1803 /*
1804 * A policy choice:
1805 * (a) ignore pages that we can't
1806 * copy
1807 * (b) return the null object if
1808 * any page fails [chosen]
1809 */
1810
1811 vm_page_lock_queues();
1812 vm_page_free(new_page);
1813 vm_page_unlock_queues();
1814 vm_object_deallocate(new_object);
1815 vm_object_deallocate(src_object);
1816 *_result_object = VM_OBJECT_NULL;
1817 return(error_code ? error_code:
1818 KERN_MEMORY_ERROR);
1819 }
1820 } while (result != VM_FAULT_SUCCESS);
1821 }
1822
1823 /*
1824 * Lose the extra reference, and return our object.
1825 */
1826
1827 vm_object_deallocate(src_object);
1828 *_result_object = new_object;
1829 return(KERN_SUCCESS);
1830}
1831
1832/*
1833 * Routine: vm_object_copy_quickly
1834 *
1835 * Purpose:
1836 * Copy the specified range of the source virtual
1837 * memory object, if it can be done without waiting
1838 * for user-generated events.
1839 *
1840 * Results:
1841 * If the copy is successful, the copy is returned in
1842 * the arguments; otherwise, the arguments are not
1843 * affected.
1844 *
1845 * In/out conditions:
1846 * The object should be unlocked on entry and exit.
1847 */
1848
1849/*ARGSUSED*/
1850boolean_t
1851vm_object_copy_quickly(
1852 vm_object_t *_object, /* INOUT */
1853 vm_object_offset_t offset, /* IN */
1854 vm_object_size_t size, /* IN */
1855 boolean_t *_src_needs_copy, /* OUT */
1856 boolean_t *_dst_needs_copy) /* OUT */
1857{
1858 vm_object_t object = *_object;
1859 memory_object_copy_strategy_t copy_strategy;
1860
1861 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1862 *_object, offset, size, 0, 0);
1863 if (object == VM_OBJECT_NULL) {
1864 *_src_needs_copy = FALSE;
1865 *_dst_needs_copy = FALSE;
1866 return(TRUE);
1867 }
1868
1869 vm_object_lock(object);
1870
1871 copy_strategy = object->copy_strategy;
1872
1873 switch (copy_strategy) {
1874 case MEMORY_OBJECT_COPY_SYMMETRIC:
1875
1876 /*
1877 * Symmetric copy strategy.
1878 * Make another reference to the object.
1879 * Leave object/offset unchanged.
1880 */
1881
1882 assert(object->ref_count > 0);
1883 object->ref_count++;
1884 vm_object_res_reference(object);
1885 object->shadowed = TRUE;
1886 vm_object_unlock(object);
1887
1888 /*
1889 * Both source and destination must make
1890 * shadows, and the source must be made
1891 * read-only if not already.
1892 */
1893
1894 *_src_needs_copy = TRUE;
1895 *_dst_needs_copy = TRUE;
1896
1897 break;
1898
1899 case MEMORY_OBJECT_COPY_DELAY:
1900 vm_object_unlock(object);
1901 return(FALSE);
1902
1903 default:
1904 vm_object_unlock(object);
1905 return(FALSE);
1906 }
1907 return(TRUE);
1908}
1909
1910int copy_call_count = 0;
1911int copy_call_sleep_count = 0;
1912int copy_call_restart_count = 0;
1913
1914/*
1915 * Routine: vm_object_copy_call [internal]
1916 *
1917 * Description:
1918 * Copy the source object (src_object), using the
1919 * user-managed copy algorithm.
1920 *
1921 * In/out conditions:
1922 * The source object must be locked on entry. It
1923 * will be *unlocked* on exit.
1924 *
1925 * Results:
1926 * If the copy is successful, KERN_SUCCESS is returned.
1927 * A new object that represents the copied virtual
1928 * memory is returned in a parameter (*_result_object).
1929 * If the return value indicates an error, this parameter
1930 * is not valid.
1931 */
1932kern_return_t
1933vm_object_copy_call(
1934 vm_object_t src_object,
1935 vm_object_offset_t src_offset,
1936 vm_object_size_t size,
1937 vm_object_t *_result_object) /* OUT */
1938{
1939 kern_return_t kr;
1940 vm_object_t copy;
1941 boolean_t check_ready = FALSE;
1942
1943 /*
1944 * If a copy is already in progress, wait and retry.
1945 *
1946 * XXX
1947 * Consider making this call interruptable, as Mike
1948 * intended it to be.
1949 *
1950 * XXXO
1951 * Need a counter or version or something to allow
1952 * us to use the copy that the currently requesting
1953 * thread is obtaining -- is it worth adding to the
1954 * vm object structure? Depends how common this case it.
1955 */
1956 copy_call_count++;
1957 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1958 vm_object_wait(src_object, VM_OBJECT_EVENT_COPY_CALL,
1959 THREAD_UNINT);
1960 vm_object_lock(src_object);
1961 copy_call_restart_count++;
1962 }
1963
1964 /*
1965 * Indicate (for the benefit of memory_object_create_copy)
1966 * that we want a copy for src_object. (Note that we cannot
1967 * do a real assert_wait before calling memory_object_copy,
1968 * so we simply set the flag.)
1969 */
1970
1971 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1972 vm_object_unlock(src_object);
1973
1974 /*
1975 * Ask the memory manager to give us a memory object
1976 * which represents a copy of the src object.
1977 * The memory manager may give us a memory object
1978 * which we already have, or it may give us a
1979 * new memory object. This memory object will arrive
1980 * via memory_object_create_copy.
1981 */
1982
1983 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1984 if (kr != KERN_SUCCESS) {
1985 return kr;
1986 }
1987
1988 /*
1989 * Wait for the copy to arrive.
1990 */
1991 vm_object_lock(src_object);
1992 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1993 vm_object_wait(src_object, VM_OBJECT_EVENT_COPY_CALL,
1994 THREAD_UNINT);
1995 vm_object_lock(src_object);
1996 copy_call_sleep_count++;
1997 }
1998Retry:
1999 assert(src_object->copy != VM_OBJECT_NULL);
2000 copy = src_object->copy;
2001 if (!vm_object_lock_try(copy)) {
2002 vm_object_unlock(src_object);
2003 mutex_pause(); /* wait a bit */
2004 vm_object_lock(src_object);
2005 goto Retry;
2006 }
2007 if (copy->size < src_offset+size)
2008 copy->size = src_offset+size;
2009
2010 if (!copy->pager_ready)
2011 check_ready = TRUE;
2012
2013 /*
2014 * Return the copy.
2015 */
2016 *_result_object = copy;
2017 vm_object_unlock(copy);
2018 vm_object_unlock(src_object);
2019
2020 /* Wait for the copy to be ready. */
2021 if (check_ready == TRUE) {
2022 vm_object_lock(copy);
2023 while (!copy->pager_ready) {
2024 vm_object_wait(copy, VM_OBJECT_EVENT_PAGER_READY,
2025 FALSE);
2026 vm_object_lock(copy);
2027 }
2028 vm_object_unlock(copy);
2029 }
2030
2031 return KERN_SUCCESS;
2032}
2033
2034int copy_delayed_lock_collisions = 0;
2035int copy_delayed_max_collisions = 0;
2036int copy_delayed_lock_contention = 0;
2037int copy_delayed_protect_iterate = 0;
2038int copy_delayed_protect_lookup = 0;
2039int copy_delayed_protect_lookup_wait = 0;
2040
2041/*
2042 * Routine: vm_object_copy_delayed [internal]
2043 *
2044 * Description:
2045 * Copy the specified virtual memory object, using
2046 * the asymmetric copy-on-write algorithm.
2047 *
2048 * In/out conditions:
2049 * The object must be unlocked on entry.
2050 *
2051 * This routine will not block waiting for user-generated
2052 * events. It is not interruptible.
2053 */
2054vm_object_t
2055vm_object_copy_delayed(
2056 vm_object_t src_object,
2057 vm_object_offset_t src_offset,
2058 vm_object_size_t size)
2059{
2060 vm_object_t new_copy = VM_OBJECT_NULL;
2061 vm_object_t old_copy;
2062 vm_page_t p;
2063 vm_object_size_t copy_size;
2064
2065 int collisions = 0;
2066 /*
2067 * The user-level memory manager wants to see all of the changes
2068 * to this object, but it has promised not to make any changes on
2069 * its own.
2070 *
2071 * Perform an asymmetric copy-on-write, as follows:
2072 * Create a new object, called a "copy object" to hold
2073 * pages modified by the new mapping (i.e., the copy,
2074 * not the original mapping).
2075 * Record the original object as the backing object for
2076 * the copy object. If the original mapping does not
2077 * change a page, it may be used read-only by the copy.
2078 * Record the copy object in the original object.
2079 * When the original mapping causes a page to be modified,
2080 * it must be copied to a new page that is "pushed" to
2081 * the copy object.
2082 * Mark the new mapping (the copy object) copy-on-write.
2083 * This makes the copy object itself read-only, allowing
2084 * it to be reused if the original mapping makes no
2085 * changes, and simplifying the synchronization required
2086 * in the "push" operation described above.
2087 *
2088 * The copy-on-write is said to be assymetric because the original
2089 * object is *not* marked copy-on-write. A copied page is pushed
2090 * to the copy object, regardless which party attempted to modify
2091 * the page.
2092 *
2093 * Repeated asymmetric copy operations may be done. If the
2094 * original object has not been changed since the last copy, its
2095 * copy object can be reused. Otherwise, a new copy object can be
2096 * inserted between the original object and its previous copy
2097 * object. Since any copy object is read-only, this cannot affect
2098 * affect the contents of the previous copy object.
2099 *
2100 * Note that a copy object is higher in the object tree than the
2101 * original object; therefore, use of the copy object recorded in
2102 * the original object must be done carefully, to avoid deadlock.
2103 */
2104
2105 Retry:
2106 vm_object_lock(src_object);
2107
2108 /*
2109 * See whether we can reuse the result of a previous
2110 * copy operation.
2111 */
2112
2113 old_copy = src_object->copy;
2114 if (old_copy != VM_OBJECT_NULL) {
2115 /*
2116 * Try to get the locks (out of order)
2117 */
2118 if (!vm_object_lock_try(old_copy)) {
2119 vm_object_unlock(src_object);
2120 mutex_pause();
2121
2122 /* Heisenberg Rules */
2123 copy_delayed_lock_collisions++;
2124 if (collisions++ == 0)
2125 copy_delayed_lock_contention++;
2126
2127 if (collisions > copy_delayed_max_collisions)
2128 copy_delayed_max_collisions = collisions;
2129
2130 goto Retry;
2131 }
2132
2133 /*
2134 * Determine whether the old copy object has
2135 * been modified.
2136 */
2137
2138 if (old_copy->resident_page_count == 0 &&
2139 !old_copy->pager_created) {
2140 /*
2141 * It has not been modified.
2142 *
2143 * Return another reference to
2144 * the existing copy-object.
2145 */
2146 assert(old_copy->ref_count > 0);
2147 old_copy->ref_count++;
2148
2149 if (old_copy->size < src_offset+size)
2150 old_copy->size = src_offset+size;
2151
2152#if TASK_SWAPPER
2153 /*
2154 * We have to reproduce some of the code from
2155 * vm_object_res_reference because we've taken
2156 * the locks out of order here, and deadlock
2157 * would result if we simply called that function.
2158 */
2159 if (++old_copy->res_count == 1) {
2160 assert(old_copy->shadow == src_object);
2161 vm_object_res_reference(src_object);
2162 }
2163#endif /* TASK_SWAPPER */
2164
2165 vm_object_unlock(old_copy);
2166 vm_object_unlock(src_object);
2167
2168 if (new_copy != VM_OBJECT_NULL) {
2169 vm_object_unlock(new_copy);
2170 vm_object_deallocate(new_copy);
2171 }
2172
2173 return(old_copy);
2174 }
2175 if (new_copy == VM_OBJECT_NULL) {
2176 vm_object_unlock(old_copy);
2177 vm_object_unlock(src_object);
2178 new_copy = vm_object_allocate(src_offset + size);
2179 vm_object_lock(new_copy);
2180 goto Retry;
2181 }
2182
2183 /*
2184 * Adjust the size argument so that the newly-created
2185 * copy object will be large enough to back either the
2186 * new old copy object or the new mapping.
2187 */
2188 if (old_copy->size > src_offset+size)
2189 size = old_copy->size - src_offset;
2190
2191 /*
2192 * The copy-object is always made large enough to
2193 * completely shadow the original object, since
2194 * it may have several users who want to shadow
2195 * the original object at different points.
2196 */
2197
2198 assert((old_copy->shadow == src_object) &&
2199 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2200
2201 /*
2202 * Make the old copy-object shadow the new one.
2203 * It will receive no more pages from the original
2204 * object.
2205 */
2206
2207 src_object->ref_count--; /* remove ref. from old_copy */
2208 assert(src_object->ref_count > 0);
2209 old_copy->shadow = new_copy;
2210 assert(new_copy->ref_count > 0);
2211 new_copy->ref_count++; /* for old_copy->shadow ref. */
2212
2213#if TASK_SWAPPER
2214 if (old_copy->res_count) {
2215 VM_OBJ_RES_INCR(new_copy);
2216 VM_OBJ_RES_DECR(src_object);
2217 }
2218#endif
2219
2220 vm_object_unlock(old_copy); /* done with old_copy */
2221 } else if (new_copy == VM_OBJECT_NULL) {
2222 vm_object_unlock(src_object);
2223 new_copy = vm_object_allocate(src_offset + size);
2224 vm_object_lock(new_copy);
2225 goto Retry;
2226 }
2227
2228 /*
2229 * Readjust the copy-object size if necessary.
2230 */
2231 copy_size = new_copy->size;
2232 if (copy_size < src_offset+size) {
2233 copy_size = src_offset+size;
2234 new_copy->size = copy_size;
2235 }
2236
2237 /*
2238 * Point the new copy at the existing object.
2239 */
2240
2241 new_copy->shadow = src_object;
2242 new_copy->shadow_offset = 0;
2243 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2244 assert(src_object->ref_count > 0);
2245 src_object->ref_count++;
2246 VM_OBJ_RES_INCR(src_object);
2247 src_object->copy = new_copy;
2248 vm_object_unlock(new_copy);
2249
2250 /*
2251 * Mark all (current) pages of the existing object copy-on-write.
2252 * This object may have a shadow chain below it, but
2253 * those pages will already be marked copy-on-write.
2254 */
2255
2256 vm_object_paging_wait(src_object, THREAD_UNINT);
2257 copy_delayed_protect_iterate++;
2258 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2259 if (!p->fictitious)
2260 pmap_page_protect(p->phys_addr,
2261 (VM_PROT_ALL & ~VM_PROT_WRITE &
2262 ~p->page_lock));
2263 }
2264 vm_object_unlock(src_object);
2265 XPR(XPR_VM_OBJECT,
2266 "vm_object_copy_delayed: used copy object %X for source %X\n",
2267 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2268
2269 return(new_copy);
2270}
2271
2272/*
2273 * Routine: vm_object_copy_strategically
2274 *
2275 * Purpose:
2276 * Perform a copy according to the source object's
2277 * declared strategy. This operation may block,
2278 * and may be interrupted.
2279 */
2280kern_return_t
2281vm_object_copy_strategically(
2282 register vm_object_t src_object,
2283 vm_object_offset_t src_offset,
2284 vm_object_size_t size,
2285 vm_object_t *dst_object, /* OUT */
2286 vm_object_offset_t *dst_offset, /* OUT */
2287 boolean_t *dst_needs_copy) /* OUT */
2288{
2289 boolean_t result;
2290 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2291 memory_object_copy_strategy_t copy_strategy;
2292
2293 assert(src_object != VM_OBJECT_NULL);
2294
2295 vm_object_lock(src_object);
2296
2297 /*
2298 * The copy strategy is only valid if the memory manager
2299 * is "ready". Internal objects are always ready.
2300 */
2301
2302 while (!src_object->internal && !src_object->pager_ready) {
2303
2304 vm_object_wait( src_object,
2305 VM_OBJECT_EVENT_PAGER_READY,
2306 interruptible);
2307 if (interruptible &&
2308 (current_thread()->wait_result != THREAD_AWAKENED)) {
2309 *dst_object = VM_OBJECT_NULL;
2310 *dst_offset = 0;
2311 *dst_needs_copy = FALSE;
2312 return(MACH_SEND_INTERRUPTED);
2313 }
2314 vm_object_lock(src_object);
2315 }
2316
2317 copy_strategy = src_object->copy_strategy;
2318
2319 /*
2320 * Use the appropriate copy strategy.
2321 */
2322
2323 switch (copy_strategy) {
2324 case MEMORY_OBJECT_COPY_NONE:
2325 result = vm_object_copy_slowly(src_object, src_offset, size,
2326 interruptible, dst_object);
2327 if (result == KERN_SUCCESS) {
2328 *dst_offset = 0;
2329 *dst_needs_copy = FALSE;
2330 }
2331 break;
2332
2333 case MEMORY_OBJECT_COPY_CALL:
2334 result = vm_object_copy_call(src_object, src_offset, size,
2335 dst_object);
2336 if (result == KERN_SUCCESS) {
2337 *dst_offset = src_offset;
2338 *dst_needs_copy = TRUE;
2339 }
2340 break;
2341
2342 case MEMORY_OBJECT_COPY_DELAY:
2343 vm_object_unlock(src_object);
2344 *dst_object = vm_object_copy_delayed(src_object,
2345 src_offset, size);
2346 *dst_offset = src_offset;
2347 *dst_needs_copy = TRUE;
2348 result = KERN_SUCCESS;
2349 break;
2350
2351 case MEMORY_OBJECT_COPY_SYMMETRIC:
2352 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2353 vm_object_unlock(src_object);
2354 result = KERN_MEMORY_RESTART_COPY;
2355 break;
2356
2357 default:
2358 panic("copy_strategically: bad strategy");
2359 result = KERN_INVALID_ARGUMENT;
2360 }
2361 return(result);
2362}
2363
2364/*
2365 * vm_object_shadow:
2366 *
2367 * Create a new object which is backed by the
2368 * specified existing object range. The source
2369 * object reference is deallocated.
2370 *
2371 * The new object and offset into that object
2372 * are returned in the source parameters.
2373 */
2374boolean_t vm_object_shadow_check = FALSE;
2375
2376boolean_t
2377vm_object_shadow(
2378 vm_object_t *object, /* IN/OUT */
2379 vm_object_offset_t *offset, /* IN/OUT */
2380 vm_object_size_t length)
2381{
2382 register vm_object_t source;
2383 register vm_object_t result;
2384
2385 source = *object;
2386 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2387
2388 /*
2389 * Determine if we really need a shadow.
2390 */
2391
2392 if (vm_object_shadow_check && source->ref_count == 1 &&
2393 (source->shadow == VM_OBJECT_NULL ||
2394 source->shadow->copy == VM_OBJECT_NULL))
2395 {
2396 source->shadowed = FALSE;
2397 return FALSE;
2398 }
2399
2400 /*
2401 * Allocate a new object with the given length
2402 */
2403
2404 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2405 panic("vm_object_shadow: no object for shadowing");
2406
2407 /*
2408 * The new object shadows the source object, adding
2409 * a reference to it. Our caller changes his reference
2410 * to point to the new object, removing a reference to
2411 * the source object. Net result: no change of reference
2412 * count.
2413 */
2414 result->shadow = source;
2415
2416 /*
2417 * Store the offset into the source object,
2418 * and fix up the offset into the new object.
2419 */
2420
2421 result->shadow_offset = *offset;
2422
2423 /*
2424 * Return the new things
2425 */
2426
2427 *offset = 0;
2428 *object = result;
2429 return TRUE;
2430}
2431
2432/*
2433 * The relationship between vm_object structures and
2434 * the memory_object ports requires careful synchronization.
2435 *
2436 * All associations are created by vm_object_enter. All three
2437 * port fields are filled in, as follows:
2438 * pager: the memory_object port itself, supplied by
2439 * the user requesting a mapping (or the kernel,
2440 * when initializing internal objects); the
2441 * kernel simulates holding send rights by keeping
2442 * a port reference;
2443 * pager_request:
2444 * the memory object control port,
2445 * created by the kernel; the kernel holds
2446 * receive (and ownership) rights to this
2447 * port, but no other references.
2448 * All of the ports are referenced by their global names.
2449 *
2450 * When initialization is complete, the "initialized" field
2451 * is asserted. Other mappings using a particular memory object,
2452 * and any references to the vm_object gained through the
2453 * port association must wait for this initialization to occur.
2454 *
2455 * In order to allow the memory manager to set attributes before
2456 * requests (notably virtual copy operations, but also data or
2457 * unlock requests) are made, a "ready" attribute is made available.
2458 * Only the memory manager may affect the value of this attribute.
2459 * Its value does not affect critical kernel functions, such as
2460 * internal object initialization or destruction. [Furthermore,
2461 * memory objects created by the kernel are assumed to be ready
2462 * immediately; the default memory manager need not explicitly
2463 * set the "ready" attribute.]
2464 *
2465 * [Both the "initialized" and "ready" attribute wait conditions
2466 * use the "pager" field as the wait event.]
2467 *
2468 * The port associations can be broken down by any of the
2469 * following routines:
2470 * vm_object_terminate:
2471 * No references to the vm_object remain, and
2472 * the object cannot (or will not) be cached.
2473 * This is the normal case, and is done even
2474 * though one of the other cases has already been
2475 * done.
2476 * vm_object_destroy:
2477 * The memory_object port has been destroyed,
2478 * meaning that the kernel cannot flush dirty
2479 * pages or request new data or unlock existing
2480 * data.
2481 * memory_object_destroy:
2482 * The memory manager has requested that the
2483 * kernel relinquish rights to the memory object
2484 * port. [The memory manager may not want to
2485 * destroy the port, but may wish to refuse or
2486 * tear down existing memory mappings.]
2487 * Each routine that breaks an association must break all of
2488 * them at once. At some later time, that routine must clear
2489 * the vm_object port fields and release the port rights.
2490 * [Furthermore, each routine must cope with the simultaneous
2491 * or previous operations of the others.]
2492 *
2493 * In addition to the lock on the object, the vm_object_cache_lock
2494 * governs the port associations. References gained through the
2495 * port association require use of the cache lock.
2496 *
2497 * Because the port fields may be cleared spontaneously, they
2498 * cannot be used to determine whether a memory object has
2499 * ever been associated with a particular vm_object. [This
2500 * knowledge is important to the shadow object mechanism.]
2501 * For this reason, an additional "created" attribute is
2502 * provided.
2503 *
2504 * During various paging operations, the port values found in the
2505 * vm_object must be valid. To prevent these port rights from being
2506 * released, and to prevent the port associations from changing
2507 * (other than being removed, i.e., made null), routines may use
2508 * the vm_object_paging_begin/end routines [actually, macros].
2509 * The implementation uses the "paging_in_progress" and "wanted" fields.
2510 * [Operations that alter the validity of the port values include the
2511 * termination routines and vm_object_collapse.]
2512 */
2513
2514#define IKOT_PAGER_LOOKUP_TYPE IKOT_PAGING_REQUEST
2515
2516vm_object_t
2517vm_object_lookup(
2518 ipc_port_t port)
2519{
2520 vm_object_t object;
2521
2522start_over:
2523 object = VM_OBJECT_NULL;
2524
2525 if (IP_VALID(port)) {
2526 vm_object_cache_lock();
2527 ip_lock(port);
2528 if (ip_active(port) &&
2529 (ip_kotype(port) == IKOT_PAGER_LOOKUP_TYPE)) {
2530 object = (vm_object_t) port->ip_kobject;
2531 if (!vm_object_lock_try(object)) {
2532 /*
2533 * failed to acquire object lock. Drop the
2534 * other two locks and wait for it, then go
2535 * back and start over in case the port
2536 * associations changed in the interim.
2537 */
2538 ip_unlock(port);
2539 vm_object_cache_unlock();
2540 vm_object_lock(object);
2541 vm_object_unlock(object);
2542 goto start_over;
2543 }
2544
2545 assert(object->alive);
2546
2547 if((object->ref_count == 0) && (!object->terminating)){
2548 queue_remove(&vm_object_cached_list, object,
2549 vm_object_t, cached_list);
2550 vm_object_cached_count--;
2551 XPR(XPR_VM_OBJECT_CACHE,
2552 "vm_object_lookup: removing %X, head (%X, %X)\n",
2553 (integer_t)object,
2554 (integer_t)vm_object_cached_list.next,
2555 (integer_t)vm_object_cached_list.prev, 0,0);
2556 }
2557
2558 object->ref_count++;
2559 vm_object_res_reference(object);
2560 vm_object_unlock(object);
2561 }
2562 ip_unlock(port);
2563 vm_object_cache_unlock();
2564 }
2565
2566 return object;
2567}
2568
2569
2570
2571void
2572vm_object_destroy(
2573 ipc_port_t pager)
2574{
2575 vm_object_t object;
2576 vm_object_hash_entry_t entry;
2577 pager_request_t old_pager_request;
2578
2579 /*
2580 * Perform essentially the same operations as in vm_object_lookup,
2581 * except that this time we look up based on the memory_object
2582 * port, not the control port.
2583 */
2584 vm_object_cache_lock();
2585 entry = vm_object_hash_lookup(pager, FALSE);
2586 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2587 entry->object == VM_OBJECT_NULL) {
2588 vm_object_cache_unlock();
2589 return;
2590 }
2591
2592 object = entry->object;
2593 entry->object = VM_OBJECT_NULL;
2594
2595 vm_object_lock(object);
2596 if (object->ref_count == 0) {
2597 XPR(XPR_VM_OBJECT_CACHE,
2598 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2599 (integer_t)object,
2600 (integer_t)vm_object_cached_list.next,
2601 (integer_t)vm_object_cached_list.prev, 0,0);
2602
2603 queue_remove(&vm_object_cached_list, object,
2604 vm_object_t, cached_list);
2605 vm_object_cached_count--;
2606 }
2607 object->ref_count++;
2608 vm_object_res_reference(object);
2609
2610 object->can_persist = FALSE;
2611
2612 assert(object->pager == pager);
2613
2614 /*
2615 * Remove the port associations.
2616 *
2617 * Note that the memory_object itself is dead, so
2618 * we don't bother with it.
2619 */
2620
2621 object->pager = IP_NULL;
2622 vm_object_remove(object);
2623
2624 old_pager_request = object->pager_request;
2625
2626 object->pager_request = PAGER_REQUEST_NULL;
2627
2628 vm_object_unlock(object);
2629 vm_object_cache_unlock();
2630
2631 vm_object_pager_wakeup(pager);
2632
2633 /*
2634 * Clean up the port references. Note that there's no
2635 * point in trying the memory_object_terminate call
2636 * because the memory_object itself is dead.
2637 */
2638
2639 ipc_port_release_send(pager);
2640
2641 if ((ipc_port_t)old_pager_request != IP_NULL)
2642 ipc_port_dealloc_kernel((ipc_port_t)old_pager_request);
2643
2644 /*
2645 * Restart pending page requests
2646 */
2647 vm_object_lock(object);
2648
2649 vm_object_abort_activity(object);
2650
2651 vm_object_unlock(object);
2652
2653 /*
2654 * Lose the object reference.
2655 */
2656
2657 vm_object_deallocate(object);
2658}
2659
2660/*
2661 * Routine: vm_object_enter
2662 * Purpose:
2663 * Find a VM object corresponding to the given
2664 * pager; if no such object exists, create one,
2665 * and initialize the pager.
2666 */
2667vm_object_t
2668vm_object_enter(
2669 ipc_port_t pager,
2670 vm_object_size_t size,
2671 boolean_t internal,
2672 boolean_t init,
2673 boolean_t check_named)
2674{
2675 register vm_object_t object;
2676 vm_object_t new_object;
2677 boolean_t must_init;
2678 ipc_port_t pager_request;
2679 vm_object_hash_entry_t entry, new_entry;
2680#ifdef MACH_BSD
2681kern_return_t vnode_pager_init( ipc_port_t, ipc_port_t, vm_object_size_t);
2682#endif
2683
2684 if (!IP_VALID(pager))
2685 return(vm_object_allocate(size));
2686
2687 new_object = VM_OBJECT_NULL;
2688 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2689 must_init = init;
2690
2691 /*
2692 * Look for an object associated with this port.
2693 */
2694
2695restart:
2696 vm_object_cache_lock();
2697 for (;;) {
2698 entry = vm_object_hash_lookup(pager, FALSE);
2699
2700 /*
2701 * If a previous object is being terminated,
2702 * we must wait for the termination message
2703 * to be queued.
2704 *
2705 * We set kobject to a non-null value to let the
2706 * terminator know that someone is waiting.
2707 * Among the possibilities is that the port
2708 * could die while we're waiting. Must restart
2709 * instead of continuing the loop.
2710 */
2711
2712 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
2713 if (entry->object != VM_OBJECT_NULL) {
2714 if(check_named) {
2715 if(entry->object->named) {
2716 vm_object_cache_unlock();
2717 return(entry->object);
2718 }
2719 }
2720 break;
2721 }
2722
2723 entry->waiting = TRUE;
2724 assert_wait((event_t) pager, THREAD_UNINT);
2725 vm_object_cache_unlock();
2726 thread_block((void (*)(void))0);
2727 goto restart;
2728 }
2729
2730 /*
2731 * We must unlock to create a new object;
2732 * if we do so, we must try the lookup again.
2733 */
2734
2735 if (new_object == VM_OBJECT_NULL) {
2736 vm_object_cache_unlock();
2737 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2738 new_entry = vm_object_hash_entry_alloc(pager);
2739 new_object = vm_object_allocate(size);
2740 vm_object_cache_lock();
2741 } else {
2742 /*
2743 * Lookup failed twice, and we have something
2744 * to insert; set the object.
2745 */
2746
2747 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2748 vm_object_hash_insert(new_entry);
2749 entry = new_entry;
2750 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2751 }
2752
2753 entry->object = new_object;
2754 new_object = VM_OBJECT_NULL;
2755 must_init = TRUE;
2756 }
2757 }
2758
2759 object = entry->object;
2760 assert(object != VM_OBJECT_NULL);
2761
2762 if (!must_init) {
2763 vm_object_lock(object);
2764 assert(object->pager_created);
2765 assert(!internal || object->internal);
2766 if (check_named)
2767 object->named = TRUE;
2768 if (object->ref_count == 0) {
2769 XPR(XPR_VM_OBJECT_CACHE,
2770 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2771 (integer_t)object,
2772 (integer_t)vm_object_cached_list.next,
2773 (integer_t)vm_object_cached_list.prev, 0,0);
2774 queue_remove(&vm_object_cached_list, object,
2775 vm_object_t, cached_list);
2776 vm_object_cached_count--;
2777 }
2778 object->ref_count++;
2779 vm_object_res_reference(object);
2780 vm_object_unlock(object);
2781
2782 VM_STAT(hits++);
2783 }
2784 assert(object->ref_count > 0);
2785
2786 VM_STAT(lookups++);
2787
2788 vm_object_cache_unlock();
2789
2790 XPR(XPR_VM_OBJECT,
2791 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2792 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2793
2794 /*
2795 * If we raced to create a vm_object but lost, let's
2796 * throw away ours.
2797 */
2798
2799 if (new_object != VM_OBJECT_NULL)
2800 vm_object_deallocate(new_object);
2801
2802 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2803 vm_object_hash_entry_free(new_entry);
2804
2805 if (must_init) {
2806
2807 /*
2808 * Allocate request port.
2809 */
2810
2811 pager_request = ipc_port_alloc_kernel();
2812 assert (pager_request != IP_NULL);
2813 ipc_kobject_set(pager_request, (ipc_kobject_t) object,
2814 IKOT_PAGING_REQUEST);
2815
2816 vm_object_lock(object);
2817
2818 /*
2819 * Copy the naked send right we were given.
2820 */
2821
2822 pager = ipc_port_copy_send(pager);
2823 if (!IP_VALID(pager))
2824 panic("vm_object_enter: port died"); /* XXX */
2825
2826 object->pager_created = TRUE;
2827 object->pager = pager;
2828 object->internal = internal;
2829 object->pager_trusted = internal;
2830 if (!internal) {
2831 /* copy strategy invalid until set by memory manager */
2832 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2833 }
2834 object->pager_request = pager_request;
2835 object->pager_ready = FALSE;
2836
2837 if (check_named)
2838 object->named = TRUE;
2839 vm_object_unlock(object);
2840
2841 /*
2842 * Let the pager know we're using it.
2843 */
2844
2845#ifdef MACH_BSD
2846 if(((rpc_subsystem_t)pager_mux_hash_lookup(pager)) ==
2847 ((rpc_subsystem_t) &vnode_pager_workaround)) {
2848 (void) vnode_pager_init(pager,
2849 object->pager_request,
2850 PAGE_SIZE);
2851 } else {
2852 (void) memory_object_init(pager,
2853 object->pager_request,
2854 PAGE_SIZE);
2855 }
2856#else
2857 (void) memory_object_init(pager,
2858 object->pager_request,
2859 PAGE_SIZE);
2860#endif
2861
2862 vm_object_lock(object);
2863 if (internal) {
2864 object->pager_ready = TRUE;
2865 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2866 }
2867
2868 object->pager_initialized = TRUE;
2869 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2870 } else {
2871 vm_object_lock(object);
2872 }
2873
2874 /*
2875 * [At this point, the object must be locked]
2876 */
2877
2878 /*
2879 * Wait for the work above to be done by the first
2880 * thread to map this object.
2881 */
2882
2883 while (!object->pager_initialized) {
2884 vm_object_wait( object,
2885 VM_OBJECT_EVENT_INITIALIZED,
2886 THREAD_UNINT);
2887 vm_object_lock(object);
2888 }
2889 vm_object_unlock(object);
2890
2891 XPR(XPR_VM_OBJECT,
2892 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2893 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2894 return(object);
2895}
2896
2897/*
2898 * Routine: vm_object_pager_create
2899 * Purpose:
2900 * Create a memory object for an internal object.
2901 * In/out conditions:
2902 * The object is locked on entry and exit;
2903 * it may be unlocked within this call.
2904 * Limitations:
2905 * Only one thread may be performing a
2906 * vm_object_pager_create on an object at
2907 * a time. Presumably, only the pageout
2908 * daemon will be using this routine.
2909 */
2910
2911void
2912vm_object_pager_create(
2913 register vm_object_t object)
2914{
2915 ipc_port_t pager;
2916 vm_object_hash_entry_t entry;
2917#if MACH_PAGEMAP
2918 vm_object_size_t size;
2919 vm_external_map_t map;
2920#endif /* MACH_PAGEMAP */
2921
2922 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2923 (integer_t)object, 0,0,0,0);
2924
2925 if (memory_manager_default_check() != KERN_SUCCESS)
2926 return;
2927
2928 /*
2929 * Prevent collapse or termination by holding a paging reference
2930 */
2931
2932 vm_object_paging_begin(object);
2933 if (object->pager_created) {
2934 /*
2935 * Someone else got to it first...
2936 * wait for them to finish initializing the ports
2937 */
2938 while (!object->pager_initialized) {
2939 vm_object_wait( object,
2940 VM_OBJECT_EVENT_INITIALIZED,
2941 THREAD_UNINT);
2942 vm_object_lock(object);
2943 }
2944 vm_object_paging_end(object);
2945 return;
2946 }
2947
2948 /*
2949 * Indicate that a memory object has been assigned
2950 * before dropping the lock, to prevent a race.
2951 */
2952
2953 object->pager_created = TRUE;
2954 object->paging_offset = 0;
2955
2956#if MACH_PAGEMAP
2957 size = object->size;
2958#endif /* MACH_PAGEMAP */
2959 vm_object_unlock(object);
2960
2961#if MACH_PAGEMAP
2962 map = vm_external_create(size);
2963 vm_object_lock(object);
2964 assert(object->size == size);
2965 object->existence_map = map;
2966 vm_object_unlock(object);
2967#endif /* MACH_PAGEMAP */
2968
2969 /*
2970 * Create the pager ports, and associate them with this object.
2971 *
2972 * We make the port association here so that vm_object_enter()
2973 * can look up the object to complete initializing it. No
2974 * user will ever map this object.
2975 */
2976 {
2977 ipc_port_t DMM;
2978 vm_size_t cluster_size;
2979
2980 /* acquire a naked send right for the DMM */
2981 DMM = memory_manager_default_reference(&cluster_size);
2982 assert(cluster_size >= PAGE_SIZE);
2983
2984 object->cluster_size = cluster_size; /* XXX ??? */
2985 assert(object->temporary);
2986
2987 /* consumes the naked send right for DMM */
2988 (void) memory_object_create(DMM, &pager, object->size);
2989 assert(IP_VALID(pager));
2990 }
2991
2992 entry = vm_object_hash_entry_alloc(pager);
2993
2994 vm_object_cache_lock();
2995 vm_object_hash_insert(entry);
2996
2997 entry->object = object;
2998 vm_object_cache_unlock();
2999
3000 /*
3001 * A naked send right was returned by
3002 * memory_object_create(), and it is
3003 * copied by vm_object_enter().
3004 */
3005
3006 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
3007 panic("vm_object_pager_create: mismatch");
3008
3009 /*
3010 * Drop the naked send right.
3011 */
3012 ipc_port_release_send(pager);
3013
3014 vm_object_lock(object);
3015
3016 /*
3017 * Release the paging reference
3018 */
3019 vm_object_paging_end(object);
3020}
3021
3022/*
3023 * Routine: vm_object_remove
3024 * Purpose:
3025 * Eliminate the pager/object association
3026 * for this pager.
3027 * Conditions:
3028 * The object cache must be locked.
3029 */
3030void
3031vm_object_remove(
3032 vm_object_t object)
3033{
3034 ipc_port_t port;
3035
3036 if ((port = object->pager) != IP_NULL) {
3037 vm_object_hash_entry_t entry;
3038
3039 entry = vm_object_hash_lookup(port, FALSE);
3040 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
3041 entry->object = VM_OBJECT_NULL;
3042 }
3043
3044 if ((port = object->pager_request) != IP_NULL) {
3045 if (ip_kotype(port) == IKOT_PAGING_REQUEST)
3046 ipc_kobject_set(port, IKO_NULL, IKOT_NONE);
3047 else if (ip_kotype(port) != IKOT_NONE)
3048 panic("vm_object_remove: bad request port");
3049 }
3050}
3051
3052/*
3053 * Global variables for vm_object_collapse():
3054 *
3055 * Counts for normal collapses and bypasses.
3056 * Debugging variables, to watch or disable collapse.
3057 */
3058long object_collapses = 0;
3059long object_bypasses = 0;
3060
3061boolean_t vm_object_collapse_allowed = TRUE;
3062boolean_t vm_object_bypass_allowed = TRUE;
3063
3064int vm_external_discarded;
3065int vm_external_collapsed;
3066/*
3067 * vm_object_do_collapse:
3068 *
3069 * Collapse an object with the object backing it.
3070 * Pages in the backing object are moved into the
3071 * parent, and the backing object is deallocated.
3072 *
3073 * Both objects and the cache are locked; the page
3074 * queues are unlocked.
3075 *
3076 */
3077void
3078vm_object_do_collapse(
3079 vm_object_t object,
3080 vm_object_t backing_object)
3081{
3082 vm_page_t p, pp;
3083 vm_object_offset_t new_offset, backing_offset;
3084 vm_object_size_t size;
3085
3086 backing_offset = object->shadow_offset;
3087 size = object->size;
3088
3089
3090 /*
3091 * Move all in-memory pages from backing_object
3092 * to the parent. Pages that have been paged out
3093 * will be overwritten by any of the parent's
3094 * pages that shadow them.
3095 */
3096
3097 while (!queue_empty(&backing_object->memq)) {
3098
3099 p = (vm_page_t) queue_first(&backing_object->memq);
3100
3101 new_offset = (p->offset - backing_offset);
3102
3103 assert(!p->busy || p->absent);
3104
3105 /*
3106 * If the parent has a page here, or if
3107 * this page falls outside the parent,
3108 * dispose of it.
3109 *
3110 * Otherwise, move it as planned.
3111 */
3112
3113 if (p->offset < backing_offset || new_offset >= size) {
3114 VM_PAGE_FREE(p);
3115 } else {
3116 pp = vm_page_lookup(object, new_offset);
3117 if (pp == VM_PAGE_NULL) {
3118
3119 /*
3120 * Parent now has no page.
3121 * Move the backing object's page up.
3122 */
3123
3124 vm_page_rename(p, object, new_offset);
3125#if MACH_PAGEMAP
3126 } else if (pp->absent) {
3127
3128 /*
3129 * Parent has an absent page...
3130 * it's not being paged in, so
3131 * it must really be missing from
3132 * the parent.
3133 *
3134 * Throw out the absent page...
3135 * any faults looking for that
3136 * page will restart with the new
3137 * one.
3138 */
3139
3140 VM_PAGE_FREE(pp);
3141 vm_page_rename(p, object, new_offset);
3142#endif /* MACH_PAGEMAP */
3143 } else {
3144 assert(! pp->absent);
3145
3146 /*
3147 * Parent object has a real page.
3148 * Throw away the backing object's
3149 * page.
3150 */
3151 VM_PAGE_FREE(p);
3152 }
3153 }
3154 }
3155
3156 assert(object->pager == IP_NULL || backing_object->pager == IP_NULL);
3157
3158 if (backing_object->pager != IP_NULL) {
3159 vm_object_hash_entry_t entry;
3160
3161 /*
3162 * Move the pager from backing_object to object.
3163 *
3164 * XXX We're only using part of the paging space
3165 * for keeps now... we ought to discard the
3166 * unused portion.
3167 */
3168
3169 object->pager = backing_object->pager;
3170 entry = vm_object_hash_lookup(object->pager, FALSE);
3171 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3172 entry->object = object;
3173 object->pager_created = backing_object->pager_created;
3174 object->pager_request = backing_object->pager_request;
3175 object->pager_ready = backing_object->pager_ready;
3176 object->pager_initialized = backing_object->pager_initialized;
3177 object->cluster_size = backing_object->cluster_size;
3178 object->paging_offset =
3179 backing_object->paging_offset + backing_offset;
3180 if (object->pager_request != IP_NULL) {
3181 ipc_kobject_set(object->pager_request,
3182 (ipc_kobject_t) object,
3183 IKOT_PAGING_REQUEST);
3184 }
3185 }
3186
3187 vm_object_cache_unlock();
3188
3189 object->paging_offset = backing_object->paging_offset + backing_offset;
3190
3191#if MACH_PAGEMAP
3192 /*
3193 * If the shadow offset is 0, the use the existence map from
3194 * the backing object if there is one. If the shadow offset is
3195 * not zero, toss it.
3196 *
3197 * XXX - If the shadow offset is not 0 then a bit copy is needed
3198 * if the map is to be salvaged. For now, we just just toss the
3199 * old map, giving the collapsed object no map. This means that
3200 * the pager is invoked for zero fill pages. If analysis shows
3201 * that this happens frequently and is a performance hit, then
3202 * this code should be fixed to salvage the map.
3203 */
3204 assert(object->existence_map == VM_EXTERNAL_NULL);
3205 if (backing_offset || (size != backing_object->size)) {
3206 vm_external_discarded++;
3207 vm_external_destroy(backing_object->existence_map,
3208 backing_object->size);
3209 }
3210 else {
3211 vm_external_collapsed++;
3212 object->existence_map = backing_object->existence_map;
3213 }
3214 backing_object->existence_map = VM_EXTERNAL_NULL;
3215#endif /* MACH_PAGEMAP */
3216
3217 /*
3218 * Object now shadows whatever backing_object did.
3219 * Note that the reference to backing_object->shadow
3220 * moves from within backing_object to within object.
3221 */
3222
3223 object->shadow = backing_object->shadow;
3224 object->shadow_offset += backing_object->shadow_offset;
3225 assert((object->shadow == VM_OBJECT_NULL) ||
3226 (object->shadow->copy == VM_OBJECT_NULL));
3227
3228 /*
3229 * Discard backing_object.
3230 *
3231 * Since the backing object has no pages, no
3232 * pager left, and no object references within it,
3233 * all that is necessary is to dispose of it.
3234 */
3235
3236 assert((backing_object->ref_count == 1) &&
3237 (backing_object->resident_page_count == 0) &&
3238 (backing_object->paging_in_progress == 0));
3239
3240 assert(backing_object->alive);
3241 backing_object->alive = FALSE;
3242 vm_object_unlock(backing_object);
3243
3244 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3245 (integer_t)backing_object, 0,0,0,0);
3246
3247 zfree(vm_object_zone, (vm_offset_t) backing_object);
3248
3249 object_collapses++;
3250}
3251
3252void
3253vm_object_do_bypass(
3254 vm_object_t object,
3255 vm_object_t backing_object)
3256{
3257 /*
3258 * Make the parent shadow the next object
3259 * in the chain.
3260 */
3261
3262#if TASK_SWAPPER
3263 /*
3264 * Do object reference in-line to
3265 * conditionally increment shadow's
3266 * residence count. If object is not
3267 * resident, leave residence count
3268 * on shadow alone.
3269 */
3270 if (backing_object->shadow != VM_OBJECT_NULL) {
3271 vm_object_lock(backing_object->shadow);
3272 backing_object->shadow->ref_count++;
3273 if (object->res_count != 0)
3274 vm_object_res_reference(backing_object->shadow);
3275 vm_object_unlock(backing_object->shadow);
3276 }
3277#else /* TASK_SWAPPER */
3278 vm_object_reference(backing_object->shadow);
3279#endif /* TASK_SWAPPER */
3280
3281 object->shadow = backing_object->shadow;
3282 object->shadow_offset += backing_object->shadow_offset;
3283
3284 /*
3285 * Backing object might have had a copy pointer
3286 * to us. If it did, clear it.
3287 */
3288 if (backing_object->copy == object) {
3289 backing_object->copy = VM_OBJECT_NULL;
3290 }
3291
3292 /*
3293 * Drop the reference count on backing_object.
3294#if TASK_SWAPPER
3295 * Since its ref_count was at least 2, it
3296 * will not vanish; so we don't need to call
3297 * vm_object_deallocate.
3298 * [FBDP: that doesn't seem to be true any more]
3299 *
3300 * The res_count on the backing object is
3301 * conditionally decremented. It's possible
3302 * (via vm_pageout_scan) to get here with
3303 * a "swapped" object, which has a 0 res_count,
3304 * in which case, the backing object res_count
3305 * is already down by one.
3306#else
3307 * Don't call vm_object_deallocate unless
3308 * ref_count drops to zero.
3309 *
3310 * The ref_count can drop to zero here if the
3311 * backing object could be bypassed but not
3312 * collapsed, such as when the backing object
3313 * is temporary and cachable.
3314#endif
3315 */
3316 if (backing_object->ref_count > 1) {
3317 backing_object->ref_count--;
3318#if TASK_SWAPPER
3319 if (object->res_count != 0)
3320 vm_object_res_deallocate(backing_object);
3321 assert(backing_object->ref_count > 0);
3322#endif /* TASK_SWAPPER */
3323 vm_object_unlock(backing_object);
3324 } else {
3325
3326 /*
3327 * Drop locks so that we can deallocate
3328 * the backing object.
3329 */
3330
3331#if TASK_SWAPPER
3332 if (object->res_count == 0) {
3333 /* XXX get a reference for the deallocate below */
3334 vm_object_res_reference(backing_object);
3335 }
3336#endif /* TASK_SWAPPER */
3337 vm_object_unlock(object);
3338 vm_object_unlock(backing_object);
3339 vm_object_deallocate(backing_object);
3340
3341 /*
3342 * Relock object. We don't have to reverify
3343 * its state since vm_object_collapse will
3344 * do that for us as it starts at the
3345 * top of its loop.
3346 */
3347
3348 vm_object_lock(object);
3349 }
3350
3351 object_bypasses++;
3352}
3353
3354/*
3355 * vm_object_collapse:
3356 *
3357 * Perform an object collapse or an object bypass if appropriate.
3358 * The real work of collapsing and bypassing is performed in
3359 * the routines vm_object_do_collapse and vm_object_do_bypass.
3360 *
3361 * Requires that the object be locked and the page queues be unlocked.
3362 *
3363 */
3364void
3365vm_object_collapse(
3366 register vm_object_t object)
3367{
3368 register vm_object_t backing_object;
3369 register vm_object_offset_t backing_offset;
3370 register vm_object_size_t size;
3371 register vm_object_offset_t new_offset;
3372 register vm_page_t p;
3373
3374 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3375 return;
3376 }
3377
3378 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3379 (integer_t)object, 0,0,0,0);
3380
3381 while (TRUE) {
3382 /*
3383 * Verify that the conditions are right for either
3384 * collapse or bypass:
3385 *
3386 * The object exists and no pages in it are currently
3387 * being paged out, and
3388 */
3389 if (object == VM_OBJECT_NULL ||
3390 object->paging_in_progress != 0 ||
3391 object->absent_count != 0)
3392 return;
3393
3394 /*
3395 * There is a backing object, and
3396 */
3397
3398 if ((backing_object = object->shadow) == VM_OBJECT_NULL)
3399 return;
3400
3401 vm_object_lock(backing_object);
3402
3403 /*
3404 * ...
3405 * The backing object is not read_only,
3406 * and no pages in the backing object are
3407 * currently being paged out.
3408 * The backing object is internal.
3409 *
3410 */
3411
3412 if (!backing_object->internal ||
3413 backing_object->paging_in_progress != 0) {
3414 vm_object_unlock(backing_object);
3415 return;
3416 }
3417
3418 /*
3419 * The backing object can't be a copy-object:
3420 * the shadow_offset for the copy-object must stay
3421 * as 0. Furthermore (for the 'we have all the
3422 * pages' case), if we bypass backing_object and
3423 * just shadow the next object in the chain, old
3424 * pages from that object would then have to be copied
3425 * BOTH into the (former) backing_object and into the
3426 * parent object.
3427 */
3428 if (backing_object->shadow != VM_OBJECT_NULL &&
3429 backing_object->shadow->copy != VM_OBJECT_NULL) {
3430 vm_object_unlock(backing_object);
3431 return;
3432 }
3433
3434 /*
3435 * We can now try to either collapse the backing
3436 * object (if the parent is the only reference to
3437 * it) or (perhaps) remove the parent's reference
3438 * to it.
3439 */
3440
3441 /*
3442 * If there is exactly one reference to the backing
3443 * object, we may be able to collapse it into the parent.
3444 *
3445 * XXXO (norma vm):
3446 *
3447 * The backing object must not have a pager
3448 * created for it, since collapsing an object
3449 * into a backing_object dumps new pages into
3450 * the backing_object that its pager doesn't
3451 * know about, and we've already declared pages.
3452 * This page dumping is deadly if other kernels
3453 * are shadowing this object; this is the
3454 * distributed equivalent of the ref_count == 1
3455 * condition.
3456 *
3457 * With some work, we could downgrade this
3458 * restriction to the backing object must not
3459 * be cachable, since when a temporary object
3460 * is uncachable we are allowed to do anything
3461 * to it. We would have to do something like
3462 * call declare_pages again, and we would have
3463 * to be prepared for the memory manager
3464 * disabling temporary termination, which right
3465 * now is a difficult race to deal with, since
3466 * the memory manager currently assumes that
3467 * termination is the only possible failure
3468 * for disabling temporary termination.
3469 */
3470
3471 if (backing_object->ref_count == 1 &&
3472 ! object->pager_created &&
3473 vm_object_collapse_allowed) {
3474
3475 XPR(XPR_VM_OBJECT,
3476 "vm_object_collapse: %x to %x, pager %x, pager_request %x\n",
3477 (integer_t)backing_object, (integer_t)object,
3478 (integer_t)backing_object->pager,
3479 (integer_t)backing_object->pager_request, 0);
3480
3481 /*
3482 * We need the cache lock for collapsing,
3483 * but we must not deadlock.
3484 */
3485
3486 if (! vm_object_cache_lock_try()) {
3487 vm_object_unlock(backing_object);
3488 return;
3489 }
3490
3491 /*
3492 * Collapse the object with its backing
3493 * object, and try again with the object's
3494 * new backing object.
3495 */
3496
3497 vm_object_do_collapse(object, backing_object);
3498 continue;
3499 }
3500
3501
3502 /*
3503 * Collapsing the backing object was not possible
3504 * or permitted, so let's try bypassing it.
3505 */
3506
3507 if (! vm_object_bypass_allowed) {
3508 vm_object_unlock(backing_object);
3509 return;
3510 }
3511
3512 /*
3513 * If the backing object has a pager but no pagemap,
3514 * then we cannot bypass it, because we don't know
3515 * what pages it has.
3516 */
3517 if (backing_object->pager_created
3518#if MACH_PAGEMAP
3519 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3520#endif /* MACH_PAGEMAP */
3521 ) {
3522 vm_object_unlock(backing_object);
3523 return;
3524 }
3525
3526 backing_offset = object->shadow_offset;
3527 size = object->size;
3528
3529 /*
3530 * If all of the pages in the backing object are
3531 * shadowed by the parent object, the parent
3532 * object no longer has to shadow the backing
3533 * object; it can shadow the next one in the
3534 * chain.
3535 *
3536 * If the backing object has existence info,
3537 * we must check examine its existence info
3538 * as well.
3539 *
3540 * XXX
3541 * Should have a check for a 'small' number
3542 * of pages here.
3543 */
3544
3545 /*
3546 * First, check pages resident in the backing object.
3547 */
3548
3549 queue_iterate(&backing_object->memq, p, vm_page_t, listq) {
3550
3551 /*
3552 * If the parent has a page here, or if
3553 * this page falls outside the parent,
3554 * keep going.
3555 *
3556 * Otherwise, the backing_object must be
3557 * left in the chain.
3558 */
3559
3560 new_offset = (p->offset - backing_offset);
3561 if (p->offset < backing_offset || new_offset >= size) {
3562
3563 /*
3564 * Page falls outside of parent.
3565 * Keep going.
3566 */
3567
3568 continue;
3569 }
3570
3571 if ((vm_page_lookup(object, new_offset) == VM_PAGE_NULL)
3572#if MACH_PAGEMAP
3573 &&
3574 (vm_external_state_get(object->existence_map,
3575 new_offset)
3576 != VM_EXTERNAL_STATE_EXISTS)
3577#endif /* MACH_PAGEMAP */
3578 ) {
3579
3580 /*
3581 * Page still needed.
3582 * Can't go any further.
3583 */
3584
3585 vm_object_unlock(backing_object);
3586 return;
3587 }
3588 }
3589
3590#if MACH_PAGEMAP
3591 /*
3592 * Next, if backing object has been paged out,
3593 * we must check its existence info for pages
3594 * that the parent doesn't have.
3595 */
3596
3597 if (backing_object->pager_created) {
3598 assert(backing_object->existence_map
3599 != VM_EXTERNAL_NULL);
3600 for (new_offset = 0; new_offset < object->size;
3601 new_offset += PAGE_SIZE_64) {
3602 vm_object_offset_t
3603 offset = new_offset + backing_offset;
3604
3605 /*
3606 * If this page doesn't exist in
3607 * the backing object's existence
3608 * info, then continue.
3609 */
3610
3611 if (vm_external_state_get(
3612 backing_object->existence_map,
3613 offset) == VM_EXTERNAL_STATE_ABSENT) {
3614 continue;
3615 }
3616
3617 /*
3618 * If this page is neither resident
3619 * in the parent nor paged out to
3620 * the parent's pager, then we cannot
3621 * bypass the backing object.
3622 */
3623
3624 if ((vm_page_lookup(object, new_offset) ==
3625 VM_PAGE_NULL) &&
3626 ((object->existence_map == VM_EXTERNAL_NULL)
3627 || (vm_external_state_get(
3628 object->existence_map, new_offset)
3629 == VM_EXTERNAL_STATE_ABSENT))) {
3630 vm_object_unlock(backing_object);
3631 return;
3632 }
3633 }
3634 }
3635#else /* MACH_PAGEMAP */
3636 assert(! backing_object->pager_created);
3637#endif /* MACH_PAGEMAP */
3638
3639 /*
3640 * All interesting pages in the backing object
3641 * already live in the parent or its pager.
3642 * Thus we can bypass the backing object.
3643 */
3644
3645 vm_object_do_bypass(object, backing_object);
3646
3647 /*
3648 * Try again with this object's new backing object.
3649 */
3650
3651 continue;
3652 }
3653}
3654
3655/*
3656 * Routine: vm_object_page_remove: [internal]
3657 * Purpose:
3658 * Removes all physical pages in the specified
3659 * object range from the object's list of pages.
3660 *
3661 * In/out conditions:
3662 * The object must be locked.
3663 * The object must not have paging_in_progress, usually
3664 * guaranteed by not having a pager.
3665 */
3666unsigned int vm_object_page_remove_lookup = 0;
3667unsigned int vm_object_page_remove_iterate = 0;
3668
3669void
3670vm_object_page_remove(
3671 register vm_object_t object,
3672 register vm_object_offset_t start,
3673 register vm_object_offset_t end)
3674{
3675 register vm_page_t p, next;
3676
3677 /*
3678 * One and two page removals are most popular.
3679 * The factor of 16 here is somewhat arbitrary.
3680 * It balances vm_object_lookup vs iteration.
3681 */
3682
3683 if (atop(end - start) < (unsigned)object->resident_page_count/16) {
3684 vm_object_page_remove_lookup++;
3685
3686 for (; start < end; start += PAGE_SIZE_64) {
3687 p = vm_page_lookup(object, start);
3688 if (p != VM_PAGE_NULL) {
3689 assert(!p->cleaning && !p->pageout);
3690 if (!p->fictitious)
3691 pmap_page_protect(p->phys_addr,
3692 VM_PROT_NONE);
3693 VM_PAGE_FREE(p);
3694 }
3695 }
3696 } else {
3697 vm_object_page_remove_iterate++;
3698
3699 p = (vm_page_t) queue_first(&object->memq);
3700 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3701 next = (vm_page_t) queue_next(&p->listq);
3702 if ((start <= p->offset) && (p->offset < end)) {
3703 assert(!p->cleaning && !p->pageout);
3704 if (!p->fictitious)
3705 pmap_page_protect(p->phys_addr,
3706 VM_PROT_NONE);
3707 VM_PAGE_FREE(p);
3708 }
3709 p = next;
3710 }
3711 }
3712}
3713
3714/*
3715 * Routine: vm_object_coalesce
3716 * Function: Coalesces two objects backing up adjoining
3717 * regions of memory into a single object.
3718 *
3719 * returns TRUE if objects were combined.
3720 *
3721 * NOTE: Only works at the moment if the second object is NULL -
3722 * if it's not, which object do we lock first?
3723 *
3724 * Parameters:
3725 * prev_object First object to coalesce
3726 * prev_offset Offset into prev_object
3727 * next_object Second object into coalesce
3728 * next_offset Offset into next_object
3729 *
3730 * prev_size Size of reference to prev_object
3731 * next_size Size of reference to next_object
3732 *
3733 * Conditions:
3734 * The object(s) must *not* be locked. The map must be locked
3735 * to preserve the reference to the object(s).
3736 */
3737int vm_object_coalesce_count = 0;
3738
3739boolean_t
3740vm_object_coalesce(
3741 register vm_object_t prev_object,
3742 vm_object_t next_object,
3743 vm_object_offset_t prev_offset,
3744 vm_object_offset_t next_offset,
3745 vm_object_size_t prev_size,
3746 vm_object_size_t next_size)
3747{
3748 vm_object_size_t newsize;
3749
3750#ifdef lint
3751 next_offset++;
3752#endif /* lint */
3753
3754 if (next_object != VM_OBJECT_NULL) {
3755 return(FALSE);
3756 }
3757
3758 if (prev_object == VM_OBJECT_NULL) {
3759 return(TRUE);
3760 }
3761
3762 XPR(XPR_VM_OBJECT,
3763 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3764 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3765
3766 vm_object_lock(prev_object);
3767
3768 /*
3769 * Try to collapse the object first
3770 */
3771 vm_object_collapse(prev_object);
3772
3773 /*
3774 * Can't coalesce if pages not mapped to
3775 * prev_entry may be in use any way:
3776 * . more than one reference
3777 * . paged out
3778 * . shadows another object
3779 * . has a copy elsewhere
3780 * . paging references (pages might be in page-list)
3781 */
3782
3783 if ((prev_object->ref_count > 1) ||
3784 prev_object->pager_created ||
3785 (prev_object->shadow != VM_OBJECT_NULL) ||
3786 (prev_object->copy != VM_OBJECT_NULL) ||
3787 (prev_object->true_share != FALSE) ||
3788 (prev_object->paging_in_progress != 0)) {
3789 vm_object_unlock(prev_object);
3790 return(FALSE);
3791 }
3792
3793 vm_object_coalesce_count++;
3794
3795 /*
3796 * Remove any pages that may still be in the object from
3797 * a previous deallocation.
3798 */
3799 vm_object_page_remove(prev_object,
3800 prev_offset + prev_size,
3801 prev_offset + prev_size + next_size);
3802
3803 /*
3804 * Extend the object if necessary.
3805 */
3806 newsize = prev_offset + prev_size + next_size;
3807 if (newsize > prev_object->size) {
3808#if MACH_PAGEMAP
3809 /*
3810 * We cannot extend an object that has existence info,
3811 * since the existence info might then fail to cover
3812 * the entire object.
3813 *
3814 * This assertion must be true because the object
3815 * has no pager, and we only create existence info
3816 * for objects with pagers.
3817 */
3818 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3819#endif /* MACH_PAGEMAP */
3820 prev_object->size = newsize;
3821 }
3822
3823 vm_object_unlock(prev_object);
3824 return(TRUE);
3825}
3826
3827/*
3828 * Attach a set of physical pages to an object, so that they can
3829 * be mapped by mapping the object. Typically used to map IO memory.
3830 *
3831 * The mapping function and its private data are used to obtain the
3832 * physical addresses for each page to be mapped.
3833 */
3834void
3835vm_object_page_map(
3836 vm_object_t object,
3837 vm_object_offset_t offset,
3838 vm_object_size_t size,
3839 vm_object_offset_t (*map_fn)(void *map_fn_data,
3840 vm_object_offset_t offset),
3841 void *map_fn_data) /* private to map_fn */
3842{
3843 int num_pages;
3844 int i;
3845 vm_page_t m;
3846 vm_page_t old_page;
3847 vm_object_offset_t addr;
3848
3849 num_pages = atop(size);
3850
3851 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3852
3853 addr = (*map_fn)(map_fn_data, offset);
3854
3855 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3856 vm_page_more_fictitious();
3857
3858 vm_object_lock(object);
3859 if ((old_page = vm_page_lookup(object, offset))
3860 != VM_PAGE_NULL)
3861 {
3862 vm_page_lock_queues();
3863 vm_page_free(old_page);
3864 vm_page_unlock_queues();
3865 }
3866
3867 vm_page_init(m, addr);
3868 m->private = TRUE; /* don`t free page */
3869 m->wire_count = 1;
3870 vm_page_insert(m, object, offset);
3871
3872 PAGE_WAKEUP_DONE(m);
3873 vm_object_unlock(object);
3874 }
3875}
3876
3877#include <mach_kdb.h>
3878
3879#if MACH_KDB
3880#include <ddb/db_output.h>
3881#include <vm/vm_print.h>
3882
3883#define printf kdbprintf
3884
3885extern boolean_t vm_object_cached(
3886 vm_object_t object);
3887
3888extern void print_bitstring(
3889 char byte);
3890
3891boolean_t vm_object_print_pages = FALSE;
3892
3893void
3894print_bitstring(
3895 char byte)
3896{
3897 printf("%c%c%c%c%c%c%c%c",
3898 ((byte & (1 << 0)) ? '1' : '0'),
3899 ((byte & (1 << 1)) ? '1' : '0'),
3900 ((byte & (1 << 2)) ? '1' : '0'),
3901 ((byte & (1 << 3)) ? '1' : '0'),
3902 ((byte & (1 << 4)) ? '1' : '0'),
3903 ((byte & (1 << 5)) ? '1' : '0'),
3904 ((byte & (1 << 6)) ? '1' : '0'),
3905 ((byte & (1 << 7)) ? '1' : '0'));
3906}
3907
3908boolean_t
3909vm_object_cached(
3910 register vm_object_t object)
3911{
3912 register vm_object_t o;
3913
3914 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3915 if (object == o) {
3916 return TRUE;
3917 }
3918 }
3919 return FALSE;
3920}
3921
3922#if MACH_PAGEMAP
3923/*
3924 * vm_external_print: [ debug ]
3925 */
3926void
3927vm_external_print(
3928 vm_external_map_t map,
3929 vm_size_t size)
3930{
3931 if (map == VM_EXTERNAL_NULL) {
3932 printf("0 ");
3933 } else {
3934 vm_size_t existence_size = stob(size);
3935 printf("{ size=%d, map=[", existence_size);
3936 if (existence_size > 0) {
3937 print_bitstring(map[0]);
3938 }
3939 if (existence_size > 1) {
3940 print_bitstring(map[1]);
3941 }
3942 if (existence_size > 2) {
3943 printf("...");
3944 print_bitstring(map[existence_size-1]);
3945 }
3946 printf("] }\n");
3947 }
3948 return;
3949}
3950#endif /* MACH_PAGEMAP */
3951
3952int
3953vm_follow_object(
3954 vm_object_t object)
3955{
3956 extern db_indent;
3957
3958 int count = 1;
3959
3960 if (object == VM_OBJECT_NULL)
3961 return 0;
3962
3963 iprintf("object 0x%x", object);
3964 printf(", shadow=0x%x", object->shadow);
3965 printf(", copy=0x%x", object->copy);
3966 printf(", pager=0x%x", object->pager);
3967 printf(", ref=%d\n", object->ref_count);
3968
3969 db_indent += 2;
3970 if (object->shadow)
3971 count += vm_follow_object(object->shadow);
3972
3973 db_indent -= 2;
3974 return count;
3975}
3976
3977/*
3978 * vm_object_print: [ debug ]
3979 */
3980void
3981vm_object_print(
3982 vm_object_t object,
3983 boolean_t have_addr,
3984 int arg_count,
3985 char *modif)
3986{
3987 register vm_page_t p;
3988 extern db_indent;
3989 char *s;
3990
3991 register int count;
3992
3993 if (object == VM_OBJECT_NULL)
3994 return;
3995
3996 iprintf("object 0x%x\n", object);
3997
3998 db_indent += 2;
3999
4000 iprintf("size=0x%x", object->size);
4001 printf(", cluster=0x%x", object->cluster_size);
4002 printf(", frozen=0x%x", object->frozen_size);
4003 printf(", ref_count=%d\n", object->ref_count);
4004 iprintf("");
4005#if TASK_SWAPPER
4006 printf("res_count=%d, ", object->res_count);
4007#endif /* TASK_SWAPPER */
4008 printf("resident_page_count=%d\n", object->resident_page_count);
4009
4010 iprintf("shadow=0x%x", object->shadow);
4011 if (object->shadow) {
4012 register int i = 0;
4013 vm_object_t shadow = object;
4014 while(shadow = shadow->shadow)
4015 i++;
4016 printf(" (depth %d)", i);
4017 }
4018 printf(", copy=0x%x", object->copy);
4019 printf(", shadow_offset=0x%x", object->shadow_offset);
4020 printf(", last_alloc=0x%x\n", object->last_alloc);
4021
4022 iprintf("pager=0x%x", object->pager);
4023 printf(", paging_offset=0x%x", object->paging_offset);
4024 printf(", pager_request=0x%x\n", object->pager_request);
4025
4026 iprintf("copy_strategy=%d[", object->copy_strategy);
4027 switch (object->copy_strategy) {
4028 case MEMORY_OBJECT_COPY_NONE:
4029 printf("copy_none");
4030 break;
4031
4032 case MEMORY_OBJECT_COPY_CALL:
4033 printf("copy_call");
4034 break;
4035
4036 case MEMORY_OBJECT_COPY_DELAY:
4037 printf("copy_delay");
4038 break;
4039
4040 case MEMORY_OBJECT_COPY_SYMMETRIC:
4041 printf("copy_symmetric");
4042 break;
4043
4044 case MEMORY_OBJECT_COPY_INVALID:
4045 printf("copy_invalid");
4046 break;
4047
4048 default:
4049 printf("?");
4050 }
4051 printf("]");
4052 printf(", absent_count=%d\n", object->absent_count);
4053
4054 iprintf("all_wanted=0x%x<", object->all_wanted);
4055 s = "";
4056 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4057 printf("%sinit", s);
4058 s = ",";
4059 }
4060 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4061 printf("%sready", s);
4062 s = ",";
4063 }
4064 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4065 printf("%spaging", s);
4066 s = ",";
4067 }
4068 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
4069 printf("%sabsent", s);
4070 s = ",";
4071 }
4072 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4073 printf("%slock", s);
4074 s = ",";
4075 }
4076 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4077 printf("%suncaching", s);
4078 s = ",";
4079 }
4080 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4081 printf("%scopy_call", s);
4082 s = ",";
4083 }
4084 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4085 printf("%scaching", s);
4086 s = ",";
4087 }
4088 printf(">");
4089 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4090
4091 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4092 (object->pager_created ? "" : "!"),
4093 (object->pager_initialized ? "" : "!"),
4094 (object->pager_ready ? "" : "!"),
4095 (object->can_persist ? "" : "!"),
4096 (object->pager_trusted ? "" : "!"),
4097 (object->pageout ? "" : "!"),
4098 (object->internal ? "internal" : "external"),
4099 (object->temporary ? "temporary" : "permanent"));
4100 iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n",
4101 (object->alive ? "" : "!"),
4102 (object->lock_in_progress ? "" : "!"),
4103 (object->lock_restart ? "" : "!"),
4104 (object->shadowed ? "" : "!"),
4105 (vm_object_cached(object) ? "" : "!"),
4106 (object->private ? "" : "!"));
4107 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4108 (object->advisory_pageout ? "" : "!"),
4109 (object->silent_overwrite ? "" : "!"));
4110
4111#if MACH_PAGEMAP
4112 iprintf("existence_map=");
4113 vm_external_print(object->existence_map, object->size);
4114#endif /* MACH_PAGEMAP */
4115#if MACH_ASSERT
4116 iprintf("paging_object=0x%x\n", object->paging_object);
4117#endif /* MACH_ASSERT */
4118
4119 if (vm_object_print_pages) {
4120 count = 0;
4121 p = (vm_page_t) queue_first(&object->memq);
4122 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4123 if (count == 0) {
4124 iprintf("memory:=");
4125 } else if (count == 2) {
4126 printf("\n");
4127 iprintf(" ...");
4128 count = 0;
4129 } else {
4130 printf(",");
4131 }
4132 count++;
4133
4134 printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p);
4135 p = (vm_page_t) queue_next(&p->listq);
4136 }
4137 if (count != 0) {
4138 printf("\n");
4139 }
4140 }
4141 db_indent -= 2;
4142}
4143
4144
4145/*
4146 * vm_object_find [ debug ]
4147 *
4148 * Find all tasks which reference the given vm_object.
4149 */
4150
4151boolean_t vm_object_find(vm_object_t object);
4152boolean_t vm_object_print_verbose = FALSE;
4153
4154boolean_t
4155vm_object_find(
4156 vm_object_t object)
4157{
4158 task_t task;
4159 vm_map_t map;
4160 vm_map_entry_t entry;
4161 processor_set_t pset = &default_pset;
4162 boolean_t found = FALSE;
4163
4164 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
4165 map = task->map;
4166 for (entry = vm_map_first_entry(map);
4167 entry && entry != vm_map_to_entry(map);
4168 entry = entry->vme_next) {
4169
4170 vm_object_t obj;
4171
4172 /*
4173 * For the time being skip submaps,
4174 * only the kernel can have submaps,
4175 * and unless we are interested in
4176 * kernel objects, we can simply skip
4177 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4178 * for a full solution.
4179 */
4180 if (entry->is_sub_map)
4181 continue;
4182 if (entry)
4183 obj = entry->object.vm_object;
4184 else
4185 continue;
4186
4187 while (obj != VM_OBJECT_NULL) {
4188 if (obj == object) {
4189 if (!found) {
4190 printf("TASK\t\tMAP\t\tENTRY\n");
4191 found = TRUE;
4192 }
4193 printf("0x%x\t0x%x\t0x%x\n",
4194 task, map, entry);
4195 }
4196 obj = obj->shadow;
4197 }
4198 }
4199 }
4200
4201 return(found);
4202}
4203
4204#endif /* MACH_KDB */
4205
4206/*
4207 * memory_object_free_from_cache:
4208 *
4209 * Walk the vm_object cache list, removing and freeing vm_objects
4210 * which are backed by the pager identified by the caller, (pager_id).
4211 * Remove up to "count" objects, if there are that may available
4212 * in the cache.
4213 * Walk the list at most once, return the number of vm_objects
4214 * actually freed.
4215 *
4216 */
4217
4218kern_return_t
4219memory_object_free_from_cache(
4220 host_t host,
4221 int pager_id,
4222 int *count)
4223{
4224
4225 int object_released = 0;
4226 int i;
4227
4228 register vm_object_t object = VM_OBJECT_NULL;
4229 vm_object_t shadow;
4230
4231/*
4232 if(host == HOST_NULL)
4233 return(KERN_INVALID_ARGUMENT);
4234*/
4235
4236 try_again:
4237 vm_object_cache_lock();
4238
4239 queue_iterate(&vm_object_cached_list, object,
4240 vm_object_t, cached_list) {
4241 if (pager_id == (int) pager_mux_hash_lookup(
4242 (ipc_port_t)object->pager)) {
4243 vm_object_lock(object);
4244 queue_remove(&vm_object_cached_list, object,
4245 vm_object_t, cached_list);
4246 vm_object_cached_count--;
4247
4248 /*
4249 * Since this object is in the cache, we know
4250 * that it is initialized and has no references.
4251 * Take a reference to avoid recursive
4252 * deallocations.
4253 */
4254
4255 assert(object->pager_initialized);
4256 assert(object->ref_count == 0);
4257 object->ref_count++;
4258
4259 /*
4260 * Terminate the object.
4261 * If the object had a shadow, we let
4262 * vm_object_deallocate deallocate it.
4263 * "pageout" objects have a shadow, but
4264 * maintain a "paging reference" rather
4265 * than a normal reference.
4266 * (We are careful here to limit recursion.)
4267 */
4268 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4269 if ((vm_object_terminate(object) == KERN_SUCCESS)
4270 && (shadow != VM_OBJECT_NULL)) {
4271 vm_object_deallocate(shadow);
4272 }
4273
4274 if(object_released++ == *count)
4275 return KERN_SUCCESS;
4276 goto try_again;
4277 }
4278 }
4279 vm_object_cache_unlock();
4280 *count = object_released;
4281 return KERN_SUCCESS;
4282}
4283
4284/*
4285 * memory_object_remove_cached_object:
4286 *
4287 * Check for the existance of a memory object represented by the
4288 * supplied port. If one exists and it is not in use, remove the
4289 * memory object from the vm_object cache.
4290 * If the memory object is in use, turn off the the "can_persist"
4291 * property so that it will not go in the cache when the last user
4292 * gives it up.
4293 *
4294 */
4295
4296kern_return_t
4297memory_object_remove_cached_object(
4298 ipc_port_t port)
4299{
4300 vm_object_t object;
4301 vm_object_t shadow;
4302
4303repeat_lock_acquire:
4304 object = VM_OBJECT_NULL;
4305
4306 if (IP_VALID(port)) {
4307 vm_object_cache_lock();
4308 ip_lock(port);
4309 if (ip_active(port) &&
4310 (ip_kotype(port) == IKOT_PAGER_LOOKUP_TYPE)) {
4311 object = (vm_object_t) port->ip_kobject;
4312 if (!vm_object_lock_try(object)) {
4313 /*
4314 * failed to acquire object lock. Drop the
4315 * other two locks and wait for it, then go
4316 * back and start over in case the port
4317 * associations changed in the interim.
4318 */
4319 ip_unlock(port);
4320 vm_object_cache_unlock();
4321 vm_object_lock(object);
4322 vm_object_unlock(object);
4323 goto repeat_lock_acquire;
4324 }
4325
4326 if(object->terminating) {
4327 ip_unlock(port);
4328 vm_object_unlock(object);
4329 vm_object_cache_unlock();
4330 return KERN_RIGHT_EXISTS;
4331 }
4332
4333 assert(object->alive);
4334 ip_unlock(port);
4335
4336 if (object->ref_count == 0) {
4337 queue_remove(&vm_object_cached_list, object,
4338 vm_object_t, cached_list);
4339 vm_object_cached_count--;
4340 object->ref_count++;
4341 /*
4342 * Terminate the object.
4343 * If the object had a shadow, we let
4344 * vm_object_deallocate deallocate it.
4345 * "pageout" objects have a shadow, but
4346 * maintain a "paging reference" rather
4347 * than a normal reference.
4348 * (We are careful here to limit
4349 * recursion.)
4350 */
4351 shadow = object->pageout?
4352 VM_OBJECT_NULL:object->shadow;
4353 /* will do the vm_object_cache_unlock */
4354 if((vm_object_terminate(object)
4355 == KERN_SUCCESS)
4356 && (shadow != VM_OBJECT_NULL)) {
4357 /* will lock and unlock cache_lock */
4358 vm_object_deallocate(shadow);
4359 }
4360 }
4361 else {
4362 /*
4363 * We cannot free object but we can
4364 * make sure it doesn't go into the
4365 * cache when it is no longer in
4366 * use.
4367 */
4368 object->can_persist = FALSE;
4369
4370 vm_object_unlock(object);
4371 vm_object_cache_unlock();
4372 return KERN_RIGHT_EXISTS;
4373 }
4374
4375
4376 }
4377 else {
4378 ip_unlock(port);
4379 vm_object_cache_unlock();
4380 }
4381 } else {
4382 return KERN_INVALID_ARGUMENT;
4383 }
4384
4385
4386 return KERN_SUCCESS;
4387}
4388
4389kern_return_t
4390memory_object_create_named(
4391 ipc_port_t port,
4392 vm_object_size_t size,
4393 vm_object_t *object_ptr)
4394{
4395 vm_object_t object;
4396 vm_object_hash_entry_t entry;
4397
4398 *object_ptr = (vm_object_t)NULL;
4399 if (IP_VALID(port)) {
4400
4401 vm_object_cache_lock();
4402 entry = vm_object_hash_lookup(port, FALSE);
4403 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4404 (entry->object != VM_OBJECT_NULL)) {
4405 if (entry->object->named == TRUE)
4406 panic("memory_object_create_named: caller already holds the right");
4407 }
4408
4409 vm_object_cache_unlock();
4410 if ((object = vm_object_enter(port, size, FALSE, FALSE, TRUE))
4411 == VM_OBJECT_NULL)
4412 return(KERN_INVALID_OBJECT);
4413
4414 /* wait for object (if any) to be ready */
4415 if (object != VM_OBJECT_NULL) {
4416 vm_object_lock(object);
4417 object->named = TRUE;
4418 while (!object->pager_ready) {
4419 vm_object_wait(object,
4420 VM_OBJECT_EVENT_PAGER_READY,
4421 FALSE);
4422 vm_object_lock(object);
4423 }
4424 vm_object_unlock(object);
4425 }
4426 *object_ptr = object;
4427 return (KERN_SUCCESS);
4428 } else {
4429 return (KERN_INVALID_ARGUMENT);
4430 }
4431}
4432
4433kern_return_t
4434memory_object_recover_named(
4435 ipc_port_t pager,
4436 boolean_t wait_on_terminating,
4437 vm_object_t *object_ptr)
4438{
4439 vm_object_t object;
4440 vm_object_hash_entry_t entry;
4441
4442 *object_ptr = (vm_object_t)NULL;
4443lookup_entry:
4444 if (IP_VALID(pager)) {
4445
4446 vm_object_cache_lock();
4447 entry = vm_object_hash_lookup(pager, FALSE);
4448 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4449 (entry->object != VM_OBJECT_NULL)) {
4450 if (entry->object->named == TRUE)
4451 panic("memory_object_recover_named: caller already holds the right");
4452 object = entry->object;
4453 vm_object_lock(object);
4454 vm_object_cache_unlock();
4455 if (object->terminating && wait_on_terminating) {
4456 vm_object_wait(object,
4457 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4458 THREAD_UNINT);
4459 vm_object_unlock(object);
4460 goto lookup_entry;
4461 }
4462 } else {
4463 vm_object_cache_unlock();
4464 return KERN_FAILURE;
4465 }
4466
4467 if((object->ref_count == 0) && (!object->terminating)){
4468 queue_remove(&vm_object_cached_list, object,
4469 vm_object_t, cached_list);
4470 vm_object_cached_count--;
4471 XPR(XPR_VM_OBJECT_CACHE,
4472 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4473 (integer_t)object,
4474 (integer_t)vm_object_cached_list.next,
4475 (integer_t)vm_object_cached_list.prev, 0,0);
4476 }
4477
4478 object->named = TRUE;
4479 object->ref_count++;
4480 vm_object_res_reference(object);
4481 while (!object->pager_ready) {
4482 vm_object_wait(object,
4483 VM_OBJECT_EVENT_PAGER_READY,
4484 FALSE);
4485 vm_object_lock(object);
4486 }
4487 vm_object_unlock(object);
4488 *object_ptr = object;
4489 return (KERN_SUCCESS);
4490 } else {
4491 return (KERN_INVALID_ARGUMENT);
4492 }
4493}
4494#ifdef MACH_BSD
4495/*
4496 * Scale the vm_object_cache
4497 * This is required to make sure that the vm_object_cache is big
4498 * enough to effectively cache the mapped file.
4499 * This is really important with UBC as all the regular file vnodes
4500 * have memory object associated with them. Havving this cache too
4501 * small results in rapid reclaim of vnodes and hurts performance a LOT!
4502 *
4503 * This is also needed as number of vnodes can be dynamically scaled.
4504 */
4505kern_return_t
4506adjust_vm_object_cache(vm_size_t oval, vm_size_t nval)
4507{
4508 vm_object_cached_max = nval;
4509 vm_object_cache_trim(FALSE);
4510 return (KERN_SUCCESS);
4511}
4512#endif /* MACH_BSD */
4513