]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_object.c
xnu-517.7.7.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: vm/vm_object.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Virtual memory object module.
57 */
58
59#ifdef MACH_BSD
60/* remove as part of compoenent support merge */
61extern int vnode_pager_workaround;
62#endif
63
64#include <mach_pagemap.h>
65#include <task_swapper.h>
66
0b4e3aa0 67#include <mach/mach_types.h>
1c79356b
A
68#include <mach/memory_object.h>
69#include <mach/memory_object_default.h>
70#include <mach/memory_object_control_server.h>
71#include <mach/vm_param.h>
72#include <ipc/ipc_port.h>
1c79356b
A
73#include <kern/assert.h>
74#include <kern/lock.h>
75#include <kern/queue.h>
76#include <kern/xpr.h>
77#include <kern/zalloc.h>
78#include <kern/host.h>
79#include <kern/host_statistics.h>
80#include <kern/processor.h>
81#include <vm/memory_object.h>
82#include <vm/vm_fault.h>
83#include <vm/vm_map.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
87#include <kern/misc_protos.h>
88
1c79356b
A
89/*
90 * Virtual memory objects maintain the actual data
91 * associated with allocated virtual memory. A given
92 * page of memory exists within exactly one object.
93 *
94 * An object is only deallocated when all "references"
0b4e3aa0 95 * are given up.
1c79356b
A
96 *
97 * Associated with each object is a list of all resident
98 * memory pages belonging to that object; this list is
99 * maintained by the "vm_page" module, but locked by the object's
100 * lock.
101 *
0b4e3aa0 102 * Each object also records the memory object reference
1c79356b 103 * that is used by the kernel to request and write
0b4e3aa0 104 * back data (the memory object, field "pager"), etc...
1c79356b
A
105 *
106 * Virtual memory objects are allocated to provide
107 * zero-filled memory (vm_allocate) or map a user-defined
108 * memory object into a virtual address space (vm_map).
109 *
110 * Virtual memory objects that refer to a user-defined
111 * memory object are called "permanent", because all changes
112 * made in virtual memory are reflected back to the
113 * memory manager, which may then store it permanently.
114 * Other virtual memory objects are called "temporary",
115 * meaning that changes need be written back only when
116 * necessary to reclaim pages, and that storage associated
117 * with the object can be discarded once it is no longer
118 * mapped.
119 *
120 * A permanent memory object may be mapped into more
121 * than one virtual address space. Moreover, two threads
122 * may attempt to make the first mapping of a memory
123 * object concurrently. Only one thread is allowed to
124 * complete this mapping; all others wait for the
125 * "pager_initialized" field is asserted, indicating
126 * that the first thread has initialized all of the
127 * necessary fields in the virtual memory object structure.
128 *
129 * The kernel relies on a *default memory manager* to
130 * provide backing storage for the zero-filled virtual
0b4e3aa0 131 * memory objects. The pager memory objects associated
1c79356b 132 * with these temporary virtual memory objects are only
0b4e3aa0
A
133 * requested from the default memory manager when it
134 * becomes necessary. Virtual memory objects
1c79356b
A
135 * that depend on the default memory manager are called
136 * "internal". The "pager_created" field is provided to
137 * indicate whether these ports have ever been allocated.
138 *
139 * The kernel may also create virtual memory objects to
140 * hold changed pages after a copy-on-write operation.
141 * In this case, the virtual memory object (and its
142 * backing storage -- its memory object) only contain
143 * those pages that have been changed. The "shadow"
144 * field refers to the virtual memory object that contains
145 * the remainder of the contents. The "shadow_offset"
146 * field indicates where in the "shadow" these contents begin.
147 * The "copy" field refers to a virtual memory object
148 * to which changed pages must be copied before changing
149 * this object, in order to implement another form
150 * of copy-on-write optimization.
151 *
152 * The virtual memory object structure also records
153 * the attributes associated with its memory object.
154 * The "pager_ready", "can_persist" and "copy_strategy"
155 * fields represent those attributes. The "cached_list"
156 * field is used in the implementation of the persistence
157 * attribute.
158 *
159 * ZZZ Continue this comment.
160 */
161
162/* Forward declarations for internal functions. */
0b4e3aa0 163static void _vm_object_allocate(
1c79356b
A
164 vm_object_size_t size,
165 vm_object_t object);
166
0b4e3aa0 167static kern_return_t vm_object_terminate(
1c79356b
A
168 vm_object_t object);
169
170extern void vm_object_remove(
171 vm_object_t object);
172
0b4e3aa0 173static vm_object_t vm_object_cache_trim(
1c79356b
A
174 boolean_t called_from_vm_object_deallocate);
175
0b4e3aa0 176static void vm_object_deactivate_all_pages(
1c79356b
A
177 vm_object_t object);
178
0b4e3aa0 179static void vm_object_abort_activity(
1c79356b
A
180 vm_object_t object);
181
0b4e3aa0 182static kern_return_t vm_object_copy_call(
1c79356b
A
183 vm_object_t src_object,
184 vm_object_offset_t src_offset,
185 vm_object_size_t size,
186 vm_object_t *_result_object);
187
0b4e3aa0 188static void vm_object_do_collapse(
1c79356b
A
189 vm_object_t object,
190 vm_object_t backing_object);
191
0b4e3aa0 192static void vm_object_do_bypass(
1c79356b
A
193 vm_object_t object,
194 vm_object_t backing_object);
195
0b4e3aa0
A
196static void vm_object_release_pager(
197 memory_object_t pager);
1c79356b 198
0b4e3aa0 199static zone_t vm_object_zone; /* vm backing store zone */
1c79356b
A
200
201/*
202 * All wired-down kernel memory belongs to a single virtual
203 * memory object (kernel_object) to avoid wasting data structures.
204 */
0b4e3aa0
A
205static struct vm_object kernel_object_store;
206__private_extern__ vm_object_t kernel_object = &kernel_object_store;
1c79356b
A
207
208/*
209 * The submap object is used as a placeholder for vm_map_submap
210 * operations. The object is declared in vm_map.c because it
211 * is exported by the vm_map module. The storage is declared
212 * here because it must be initialized here.
213 */
0b4e3aa0 214static struct vm_object vm_submap_object_store;
1c79356b
A
215
216/*
217 * Virtual memory objects are initialized from
218 * a template (see vm_object_allocate).
219 *
220 * When adding a new field to the virtual memory
221 * object structure, be sure to add initialization
0b4e3aa0 222 * (see _vm_object_allocate()).
1c79356b 223 */
0b4e3aa0 224static struct vm_object vm_object_template;
1c79356b
A
225
226/*
227 * Virtual memory objects that are not referenced by
228 * any address maps, but that are allowed to persist
229 * (an attribute specified by the associated memory manager),
230 * are kept in a queue (vm_object_cached_list).
231 *
232 * When an object from this queue is referenced again,
233 * for example to make another address space mapping,
234 * it must be removed from the queue. That is, the
235 * queue contains *only* objects with zero references.
236 *
237 * The kernel may choose to terminate objects from this
238 * queue in order to reclaim storage. The current policy
239 * is to permit a fixed maximum number of unreferenced
240 * objects (vm_object_cached_max).
241 *
242 * A spin lock (accessed by routines
243 * vm_object_cache_{lock,lock_try,unlock}) governs the
244 * object cache. It must be held when objects are
245 * added to or removed from the cache (in vm_object_terminate).
246 * The routines that acquire a reference to a virtual
247 * memory object based on one of the memory object ports
248 * must also lock the cache.
249 *
250 * Ideally, the object cache should be more isolated
251 * from the reference mechanism, so that the lock need
252 * not be held to make simple references.
253 */
0b4e3aa0 254static queue_head_t vm_object_cached_list;
9bccf70c 255static int vm_object_cached_count=0;
0b4e3aa0
A
256static int vm_object_cached_high; /* highest # cached objects */
257static int vm_object_cached_max = 512; /* may be patched*/
1c79356b 258
0b4e3aa0 259static decl_mutex_data(,vm_object_cached_lock_data)
1c79356b
A
260
261#define vm_object_cache_lock() \
262 mutex_lock(&vm_object_cached_lock_data)
263#define vm_object_cache_lock_try() \
264 mutex_try(&vm_object_cached_lock_data)
265#define vm_object_cache_unlock() \
266 mutex_unlock(&vm_object_cached_lock_data)
267
268#define VM_OBJECT_HASH_COUNT 1024
0b4e3aa0
A
269static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
270static struct zone *vm_object_hash_zone;
1c79356b
A
271
272struct vm_object_hash_entry {
273 queue_chain_t hash_link; /* hash chain link */
0b4e3aa0 274 memory_object_t pager; /* pager we represent */
1c79356b
A
275 vm_object_t object; /* corresponding object */
276 boolean_t waiting; /* someone waiting for
277 * termination */
278};
279
280typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
281#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
282
283#define VM_OBJECT_HASH_SHIFT 8
284#define vm_object_hash(pager) \
285 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
286
287/*
288 * vm_object_hash_lookup looks up a pager in the hashtable
289 * and returns the corresponding entry, with optional removal.
290 */
291
0b4e3aa0 292static vm_object_hash_entry_t
1c79356b 293vm_object_hash_lookup(
0b4e3aa0 294 memory_object_t pager,
1c79356b
A
295 boolean_t remove_entry)
296{
297 register queue_t bucket;
298 register vm_object_hash_entry_t entry;
299
300 bucket = &vm_object_hashtable[vm_object_hash(pager)];
301
302 entry = (vm_object_hash_entry_t)queue_first(bucket);
303 while (!queue_end(bucket, (queue_entry_t)entry)) {
304 if (entry->pager == pager && !remove_entry)
305 return(entry);
306 else if (entry->pager == pager) {
307 queue_remove(bucket, entry,
308 vm_object_hash_entry_t, hash_link);
309 return(entry);
310 }
311
312 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
313 }
314
315 return(VM_OBJECT_HASH_ENTRY_NULL);
316}
317
318/*
319 * vm_object_hash_enter enters the specified
320 * pager / cache object association in the hashtable.
321 */
322
0b4e3aa0 323static void
1c79356b
A
324vm_object_hash_insert(
325 vm_object_hash_entry_t entry)
326{
327 register queue_t bucket;
328
329 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
330
331 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
332}
333
0b4e3aa0 334static vm_object_hash_entry_t
1c79356b 335vm_object_hash_entry_alloc(
0b4e3aa0 336 memory_object_t pager)
1c79356b
A
337{
338 vm_object_hash_entry_t entry;
339
340 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
341 entry->pager = pager;
342 entry->object = VM_OBJECT_NULL;
343 entry->waiting = FALSE;
344
345 return(entry);
346}
347
348void
349vm_object_hash_entry_free(
350 vm_object_hash_entry_t entry)
351{
352 zfree(vm_object_hash_zone, (vm_offset_t)entry);
353}
354
355/*
356 * vm_object_allocate:
357 *
358 * Returns a new object with the given size.
359 */
360
0b4e3aa0 361static void
1c79356b
A
362_vm_object_allocate(
363 vm_object_size_t size,
364 vm_object_t object)
365{
366 XPR(XPR_VM_OBJECT,
367 "vm_object_allocate, object 0x%X size 0x%X\n",
368 (integer_t)object, size, 0,0,0);
369
370 *object = vm_object_template;
371 queue_init(&object->memq);
372 queue_init(&object->msr_q);
373#ifdef UBC_DEBUG
374 queue_init(&object->uplq);
375#endif /* UBC_DEBUG */
376 vm_object_lock_init(object);
377 object->size = size;
378}
379
0b4e3aa0 380__private_extern__ vm_object_t
1c79356b
A
381vm_object_allocate(
382 vm_object_size_t size)
383{
384 register vm_object_t object;
1c79356b
A
385
386 object = (vm_object_t) zalloc(vm_object_zone);
387
0b4e3aa0
A
388// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
389
390 if (object != VM_OBJECT_NULL)
391 _vm_object_allocate(size, object);
1c79356b
A
392
393 return object;
394}
395
396/*
397 * vm_object_bootstrap:
398 *
399 * Initialize the VM objects module.
400 */
0b4e3aa0 401__private_extern__ void
1c79356b
A
402vm_object_bootstrap(void)
403{
404 register i;
405
406 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
55e303ae
A
407 round_page_32(512*1024),
408 round_page_32(12*1024),
1c79356b
A
409 "vm objects");
410
411 queue_init(&vm_object_cached_list);
412 mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE);
413
414 vm_object_hash_zone =
415 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
55e303ae
A
416 round_page_32(512*1024),
417 round_page_32(12*1024),
1c79356b
A
418 "vm object hash entries");
419
420 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
421 queue_init(&vm_object_hashtable[i]);
422
423 /*
424 * Fill in a template object, for quick initialization
425 */
426
427 /* memq; Lock; init after allocation */
428 vm_object_template.size = 0;
429 vm_object_template.frozen_size = 0;
430 vm_object_template.ref_count = 1;
431#if TASK_SWAPPER
432 vm_object_template.res_count = 1;
433#endif /* TASK_SWAPPER */
434 vm_object_template.resident_page_count = 0;
435 vm_object_template.copy = VM_OBJECT_NULL;
436 vm_object_template.shadow = VM_OBJECT_NULL;
437 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
55e303ae 438 vm_object_template.cow_hint = ~(vm_offset_t)0;
1c79356b
A
439 vm_object_template.true_share = FALSE;
440
0b4e3aa0 441 vm_object_template.pager = MEMORY_OBJECT_NULL;
1c79356b
A
442 vm_object_template.paging_offset = 0;
443 vm_object_template.pager_request = PAGER_REQUEST_NULL;
444 /* msr_q; init after allocation */
445
446 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
447 vm_object_template.absent_count = 0;
448 vm_object_template.paging_in_progress = 0;
449
450 /* Begin bitfields */
451 vm_object_template.all_wanted = 0; /* all bits FALSE */
452 vm_object_template.pager_created = FALSE;
453 vm_object_template.pager_initialized = FALSE;
454 vm_object_template.pager_ready = FALSE;
455 vm_object_template.pager_trusted = FALSE;
456 vm_object_template.can_persist = FALSE;
457 vm_object_template.internal = TRUE;
458 vm_object_template.temporary = TRUE;
459 vm_object_template.private = FALSE;
460 vm_object_template.pageout = FALSE;
461 vm_object_template.alive = TRUE;
462 vm_object_template.lock_in_progress = FALSE;
463 vm_object_template.lock_restart = FALSE;
464 vm_object_template.silent_overwrite = FALSE;
465 vm_object_template.advisory_pageout = FALSE;
466 vm_object_template.shadowed = FALSE;
467 vm_object_template.terminating = FALSE;
468 vm_object_template.shadow_severed = FALSE;
469 vm_object_template.phys_contiguous = FALSE;
0b4e3aa0 470 vm_object_template.nophyscache = FALSE;
1c79356b
A
471 /* End bitfields */
472
9bccf70c
A
473 /* cache bitfields */
474 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
475
1c79356b
A
476 /* cached_list; init after allocation */
477 vm_object_template.last_alloc = (vm_object_offset_t) 0;
478 vm_object_template.cluster_size = 0;
479#if MACH_PAGEMAP
480 vm_object_template.existence_map = VM_EXTERNAL_NULL;
481#endif /* MACH_PAGEMAP */
482#if MACH_ASSERT
483 vm_object_template.paging_object = VM_OBJECT_NULL;
484#endif /* MACH_ASSERT */
485
486 /*
487 * Initialize the "kernel object"
488 */
489
490 kernel_object = &kernel_object_store;
491
492/*
493 * Note that in the following size specifications, we need to add 1 because
55e303ae 494 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
1c79356b 495 */
55e303ae
A
496
497#ifdef ppc
498 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
499 kernel_object);
500#else
1c79356b
A
501 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
502 kernel_object);
55e303ae
A
503#endif
504 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1c79356b
A
505
506 /*
507 * Initialize the "submap object". Make it as large as the
508 * kernel object so that no limit is imposed on submap sizes.
509 */
510
511 vm_submap_object = &vm_submap_object_store;
55e303ae
A
512#ifdef ppc
513 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
514 vm_submap_object);
515#else
1c79356b
A
516 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
517 vm_submap_object);
55e303ae
A
518#endif
519 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
520
1c79356b
A
521 /*
522 * Create an "extra" reference to this object so that we never
523 * try to deallocate it; zfree doesn't like to be called with
524 * non-zone memory.
525 */
526 vm_object_reference(vm_submap_object);
527
528#if MACH_PAGEMAP
529 vm_external_module_initialize();
530#endif /* MACH_PAGEMAP */
531}
532
0b4e3aa0 533__private_extern__ void
1c79356b
A
534vm_object_init(void)
535{
536 /*
537 * Finish initializing the kernel object.
538 */
539}
540
1c79356b
A
541/* remove the typedef below when emergency work-around is taken out */
542typedef struct vnode_pager {
0b4e3aa0
A
543 memory_object_t pager;
544 memory_object_t pager_handle; /* pager */
545 memory_object_control_t control_handle; /* memory object's control handle */
546 void *vnode_handle; /* vnode handle */
1c79356b
A
547} *vnode_pager_t;
548
549#define MIGHT_NOT_CACHE_SHADOWS 1
550#if MIGHT_NOT_CACHE_SHADOWS
0b4e3aa0 551static int cache_shadows = TRUE;
1c79356b
A
552#endif /* MIGHT_NOT_CACHE_SHADOWS */
553
554/*
555 * vm_object_deallocate:
556 *
557 * Release a reference to the specified object,
558 * gained either through a vm_object_allocate
559 * or a vm_object_reference call. When all references
560 * are gone, storage associated with this object
561 * may be relinquished.
562 *
563 * No object may be locked.
564 */
0b4e3aa0 565__private_extern__ void
1c79356b
A
566vm_object_deallocate(
567 register vm_object_t object)
568{
569 boolean_t retry_cache_trim = FALSE;
570 vm_object_t shadow;
571
572// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
573// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
574
575
576 while (object != VM_OBJECT_NULL) {
577
578 /*
579 * The cache holds a reference (uncounted) to
580 * the object; we must lock it before removing
581 * the object.
582 */
55e303ae
A
583 for (;;) {
584 vm_object_cache_lock();
1c79356b 585
55e303ae
A
586 /*
587 * if we try to take a regular lock here
588 * we risk deadlocking against someone
589 * holding a lock on this object while
590 * trying to vm_object_deallocate a different
591 * object
592 */
593 if (vm_object_lock_try(object))
594 break;
595 vm_object_cache_unlock();
596 mutex_pause(); /* wait a bit */
597 }
0b4e3aa0
A
598 assert(object->ref_count > 0);
599
600 /*
601 * If the object has a named reference, and only
602 * that reference would remain, inform the pager
603 * about the last "mapping" reference going away.
604 */
605 if ((object->ref_count == 2) && (object->named)) {
606 memory_object_t pager = object->pager;
607
608 /* Notify the Pager that there are no */
609 /* more mappers for this object */
610
611 if (pager != MEMORY_OBJECT_NULL) {
612 vm_object_unlock(object);
613 vm_object_cache_unlock();
614
615 memory_object_unmap(pager);
616
55e303ae
A
617 for (;;) {
618 vm_object_cache_lock();
619
620 /*
621 * if we try to take a regular lock here
622 * we risk deadlocking against someone
623 * holding a lock on this object while
624 * trying to vm_object_deallocate a different
625 * object
626 */
627 if (vm_object_lock_try(object))
628 break;
629 vm_object_cache_unlock();
630 mutex_pause(); /* wait a bit */
631 }
0b4e3aa0
A
632 assert(object->ref_count > 0);
633 }
634 }
1c79356b
A
635
636 /*
637 * Lose the reference. If other references
638 * remain, then we are done, unless we need
639 * to retry a cache trim.
640 * If it is the last reference, then keep it
641 * until any pending initialization is completed.
642 */
643
0b4e3aa0
A
644 /* if the object is terminating, it cannot go into */
645 /* the cache and we obviously should not call */
646 /* terminate again. */
647
648 if ((object->ref_count > 1) || object->terminating) {
1c79356b 649 object->ref_count--;
1c79356b
A
650 vm_object_res_deallocate(object);
651 vm_object_unlock(object);
652 vm_object_cache_unlock();
653 if (retry_cache_trim &&
654 ((object = vm_object_cache_trim(TRUE)) !=
655 VM_OBJECT_NULL)) {
656 continue;
657 }
658 return;
659 }
660
661 /*
662 * We have to wait for initialization
663 * before destroying or caching the object.
664 */
665
666 if (object->pager_created && ! object->pager_initialized) {
667 assert(! object->can_persist);
668 vm_object_assert_wait(object,
669 VM_OBJECT_EVENT_INITIALIZED,
670 THREAD_UNINT);
671 vm_object_unlock(object);
672 vm_object_cache_unlock();
9bccf70c 673 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
674 continue;
675 }
676
677 /*
678 * If this object can persist, then enter it in
679 * the cache. Otherwise, terminate it.
680 *
681 * NOTE: Only permanent objects are cached, and
682 * permanent objects cannot have shadows. This
683 * affects the residence counting logic in a minor
684 * way (can do it in-line, mostly).
685 */
686
0b4e3aa0 687 if ((object->can_persist) && (object->alive)) {
1c79356b
A
688 /*
689 * Now it is safe to decrement reference count,
690 * and to return if reference count is > 0.
691 */
692 if (--object->ref_count > 0) {
693 vm_object_res_deallocate(object);
694 vm_object_unlock(object);
695 vm_object_cache_unlock();
696 if (retry_cache_trim &&
697 ((object = vm_object_cache_trim(TRUE)) !=
698 VM_OBJECT_NULL)) {
699 continue;
700 }
701 return;
702 }
703
704#if MIGHT_NOT_CACHE_SHADOWS
705 /*
706 * Remove shadow now if we don't
707 * want to cache shadows.
708 */
709 if (! cache_shadows) {
710 shadow = object->shadow;
711 object->shadow = VM_OBJECT_NULL;
712 }
713#endif /* MIGHT_NOT_CACHE_SHADOWS */
714
715 /*
716 * Enter the object onto the queue of
717 * cached objects, and deactivate
718 * all of its pages.
719 */
720 assert(object->shadow == VM_OBJECT_NULL);
721 VM_OBJ_RES_DECR(object);
722 XPR(XPR_VM_OBJECT,
723 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
724 (integer_t)object,
725 (integer_t)vm_object_cached_list.next,
726 (integer_t)vm_object_cached_list.prev,0,0);
727
728 vm_object_cached_count++;
729 if (vm_object_cached_count > vm_object_cached_high)
730 vm_object_cached_high = vm_object_cached_count;
731 queue_enter(&vm_object_cached_list, object,
732 vm_object_t, cached_list);
733 vm_object_cache_unlock();
0b4e3aa0 734 vm_object_deactivate_all_pages(object);
1c79356b
A
735 vm_object_unlock(object);
736
737#if MIGHT_NOT_CACHE_SHADOWS
738 /*
739 * If we have a shadow that we need
740 * to deallocate, do so now, remembering
741 * to trim the cache later.
742 */
743 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
744 object = shadow;
745 retry_cache_trim = TRUE;
746 continue;
747 }
748#endif /* MIGHT_NOT_CACHE_SHADOWS */
749
750 /*
751 * Trim the cache. If the cache trim
752 * returns with a shadow for us to deallocate,
753 * then remember to retry the cache trim
754 * when we are done deallocating the shadow.
755 * Otherwise, we are done.
756 */
757
758 object = vm_object_cache_trim(TRUE);
759 if (object == VM_OBJECT_NULL) {
760 return;
761 }
762 retry_cache_trim = TRUE;
763
764 } else {
765 /*
766 * This object is not cachable; terminate it.
767 */
768 XPR(XPR_VM_OBJECT,
769 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n",
770 (integer_t)object, object->resident_page_count,
771 object->paging_in_progress,
772 (natural_t)current_thread(),object->ref_count);
773
774 VM_OBJ_RES_DECR(object); /* XXX ? */
775 /*
776 * Terminate this object. If it had a shadow,
777 * then deallocate it; otherwise, if we need
778 * to retry a cache trim, do so now; otherwise,
779 * we are done. "pageout" objects have a shadow,
780 * but maintain a "paging reference" rather than
781 * a normal reference.
782 */
783 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
784 if(vm_object_terminate(object) != KERN_SUCCESS) {
785 return;
786 }
787 if (shadow != VM_OBJECT_NULL) {
788 object = shadow;
789 continue;
790 }
791 if (retry_cache_trim &&
792 ((object = vm_object_cache_trim(TRUE)) !=
793 VM_OBJECT_NULL)) {
794 continue;
795 }
796 return;
797 }
798 }
799 assert(! retry_cache_trim);
800}
801
802/*
803 * Check to see whether we really need to trim
804 * down the cache. If so, remove an object from
805 * the cache, terminate it, and repeat.
806 *
807 * Called with, and returns with, cache lock unlocked.
808 */
809vm_object_t
810vm_object_cache_trim(
811 boolean_t called_from_vm_object_deallocate)
812{
813 register vm_object_t object = VM_OBJECT_NULL;
814 vm_object_t shadow;
815
816 for (;;) {
817
818 /*
819 * If we no longer need to trim the cache,
820 * then we are done.
821 */
822
823 vm_object_cache_lock();
824 if (vm_object_cached_count <= vm_object_cached_max) {
825 vm_object_cache_unlock();
826 return VM_OBJECT_NULL;
827 }
828
829 /*
830 * We must trim down the cache, so remove
831 * the first object in the cache.
832 */
833 XPR(XPR_VM_OBJECT,
834 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
835 (integer_t)vm_object_cached_list.next,
836 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
837
838 object = (vm_object_t) queue_first(&vm_object_cached_list);
9bccf70c
A
839 if(object == (vm_object_t) &vm_object_cached_list) {
840 /* something's wrong with the calling parameter or */
841 /* the value of vm_object_cached_count, just fix */
842 /* and return */
843 if(vm_object_cached_max < 0)
844 vm_object_cached_max = 0;
845 vm_object_cached_count = 0;
846 vm_object_cache_unlock();
847 return VM_OBJECT_NULL;
848 }
1c79356b
A
849 vm_object_lock(object);
850 queue_remove(&vm_object_cached_list, object, vm_object_t,
851 cached_list);
852 vm_object_cached_count--;
853
854 /*
855 * Since this object is in the cache, we know
856 * that it is initialized and has no references.
857 * Take a reference to avoid recursive deallocations.
858 */
859
860 assert(object->pager_initialized);
861 assert(object->ref_count == 0);
862 object->ref_count++;
863
864 /*
865 * Terminate the object.
866 * If the object had a shadow, we let vm_object_deallocate
867 * deallocate it. "pageout" objects have a shadow, but
868 * maintain a "paging reference" rather than a normal
869 * reference.
870 * (We are careful here to limit recursion.)
871 */
872 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
873 if(vm_object_terminate(object) != KERN_SUCCESS)
874 continue;
875 if (shadow != VM_OBJECT_NULL) {
876 if (called_from_vm_object_deallocate) {
877 return shadow;
878 } else {
879 vm_object_deallocate(shadow);
880 }
881 }
882 }
883}
884
885boolean_t vm_object_terminate_remove_all = FALSE;
886
887/*
888 * Routine: vm_object_terminate
889 * Purpose:
890 * Free all resources associated with a vm_object.
891 * In/out conditions:
0b4e3aa0 892 * Upon entry, the object must be locked,
1c79356b
A
893 * and the object must have exactly one reference.
894 *
895 * The shadow object reference is left alone.
896 *
897 * The object must be unlocked if its found that pages
898 * must be flushed to a backing object. If someone
899 * manages to map the object while it is being flushed
900 * the object is returned unlocked and unchanged. Otherwise,
901 * upon exit, the cache will be unlocked, and the
902 * object will cease to exist.
903 */
0b4e3aa0 904static kern_return_t
1c79356b
A
905vm_object_terminate(
906 register vm_object_t object)
907{
0b4e3aa0 908 memory_object_t pager;
1c79356b
A
909 register vm_page_t p;
910 vm_object_t shadow_object;
911
912 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
913 (integer_t)object, object->ref_count, 0, 0, 0);
914
1c79356b
A
915 if (!object->pageout && (!object->temporary || object->can_persist)
916 && (object->pager != NULL || object->shadow_severed)) {
0b4e3aa0 917 vm_object_cache_unlock();
1c79356b
A
918 while (!queue_empty(&object->memq)) {
919 /*
920 * Clear pager_trusted bit so that the pages get yanked
921 * out of the object instead of cleaned in place. This
922 * prevents a deadlock in XMM and makes more sense anyway.
923 */
924 object->pager_trusted = FALSE;
925
926 p = (vm_page_t) queue_first(&object->memq);
927
928 VM_PAGE_CHECK(p);
929
930 if (p->busy || p->cleaning) {
931 if(p->cleaning || p->absent) {
932 vm_object_paging_wait(object, THREAD_UNINT);
933 continue;
934 } else {
935 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
936 }
937 }
938
939 vm_page_lock_queues();
55e303ae 940 p->busy = TRUE;
1c79356b
A
941 VM_PAGE_QUEUES_REMOVE(p);
942 vm_page_unlock_queues();
943
944 if (p->absent || p->private) {
945
946 /*
947 * For private pages, VM_PAGE_FREE just
948 * leaves the page structure around for
949 * its owner to clean up. For absent
950 * pages, the structure is returned to
951 * the appropriate pool.
952 */
953
954 goto free_page;
955 }
956
957 if (p->fictitious)
958 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
959
960 if (!p->dirty)
55e303ae 961 p->dirty = pmap_is_modified(p->phys_page);
1c79356b 962
0b4e3aa0 963 if ((p->dirty || p->precious) && !p->error && object->alive) {
1c79356b 964 vm_pageout_cluster(p); /* flush page */
1c79356b
A
965 vm_object_paging_wait(object, THREAD_UNINT);
966 XPR(XPR_VM_OBJECT,
967 "vm_object_terminate restart, object 0x%X ref %d\n",
968 (integer_t)object, object->ref_count, 0, 0, 0);
969 } else {
970 free_page:
971 VM_PAGE_FREE(p);
972 }
973 }
0b4e3aa0
A
974 vm_object_unlock(object);
975 vm_object_cache_lock();
976 vm_object_lock(object);
1c79356b 977 }
0b4e3aa0
A
978
979 /*
980 * Make sure the object isn't already being terminated
981 */
982 if(object->terminating) {
983 object->ref_count -= 1;
984 assert(object->ref_count > 0);
985 vm_object_cache_unlock();
986 vm_object_unlock(object);
987 return KERN_FAILURE;
988 }
989
990 /*
991 * Did somebody get a reference to the object while we were
992 * cleaning it?
993 */
1c79356b
A
994 if(object->ref_count != 1) {
995 object->ref_count -= 1;
0b4e3aa0 996 assert(object->ref_count > 0);
1c79356b 997 vm_object_res_deallocate(object);
0b4e3aa0 998 vm_object_cache_unlock();
1c79356b
A
999 vm_object_unlock(object);
1000 return KERN_FAILURE;
1001 }
1002
1c79356b
A
1003 /*
1004 * Make sure no one can look us up now.
1005 */
1006
0b4e3aa0
A
1007 object->terminating = TRUE;
1008 object->alive = FALSE;
1009 vm_object_remove(object);
1c79356b
A
1010
1011 /*
1012 * Detach the object from its shadow if we are the shadow's
55e303ae
A
1013 * copy. The reference we hold on the shadow must be dropped
1014 * by our caller.
1c79356b
A
1015 */
1016 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1017 !(object->pageout)) {
1018 vm_object_lock(shadow_object);
55e303ae
A
1019 if (shadow_object->copy == object)
1020 shadow_object->copy = VM_OBJECT_NULL;
1c79356b
A
1021 vm_object_unlock(shadow_object);
1022 }
1023
1024 /*
1025 * The pageout daemon might be playing with our pages.
1026 * Now that the object is dead, it won't touch any more
1027 * pages, but some pages might already be on their way out.
0b4e3aa0
A
1028 * Hence, we wait until the active paging activities have ceased
1029 * before we break the association with the pager itself.
1c79356b 1030 */
0b4e3aa0
A
1031 while (object->paging_in_progress != 0) {
1032 vm_object_cache_unlock();
1033 vm_object_wait(object,
1034 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1035 THREAD_UNINT);
1036 vm_object_cache_lock();
1037 vm_object_lock(object);
1038 }
1039
1040 pager = object->pager;
1041 object->pager = MEMORY_OBJECT_NULL;
1042
1043 if (pager != MEMORY_OBJECT_NULL)
1044 memory_object_control_disable(object->pager_request);
1045 vm_object_cache_unlock();
1046
1c79356b
A
1047 object->ref_count--;
1048#if TASK_SWAPPER
1049 assert(object->res_count == 0);
1050#endif /* TASK_SWAPPER */
1051
1c79356b
A
1052 assert (object->ref_count == 0);
1053
1054 /*
1055 * Clean or free the pages, as appropriate.
1056 * It is possible for us to find busy/absent pages,
1057 * if some faults on this object were aborted.
1058 */
1059 if (object->pageout) {
1060 assert(shadow_object != VM_OBJECT_NULL);
1061 assert(shadow_object == object->shadow);
1062
1063 vm_pageout_object_terminate(object);
1064
0b4e3aa0
A
1065 } else if ((object->temporary && !object->can_persist) ||
1066 (pager == MEMORY_OBJECT_NULL)) {
1c79356b
A
1067 while (!queue_empty(&object->memq)) {
1068 p = (vm_page_t) queue_first(&object->memq);
1069
1070 VM_PAGE_CHECK(p);
1071 VM_PAGE_FREE(p);
1072 }
1073 } else if (!queue_empty(&object->memq)) {
1074 panic("vm_object_terminate: queue just emptied isn't");
1075 }
1076
1077 assert(object->paging_in_progress == 0);
1078 assert(object->ref_count == 0);
1079
1c79356b 1080 /*
0b4e3aa0
A
1081 * If the pager has not already been released by
1082 * vm_object_destroy, we need to terminate it and
1083 * release our reference to it here.
1c79356b 1084 */
0b4e3aa0
A
1085 if (pager != MEMORY_OBJECT_NULL) {
1086 vm_object_unlock(object);
1087 vm_object_release_pager(pager);
1088 vm_object_lock(object);
1c79356b 1089 }
0b4e3aa0 1090
1c79356b 1091 /* kick off anyone waiting on terminating */
0b4e3aa0 1092 object->terminating = FALSE;
1c79356b
A
1093 vm_object_paging_begin(object);
1094 vm_object_paging_end(object);
1095 vm_object_unlock(object);
1096
1097#if MACH_PAGEMAP
1098 vm_external_destroy(object->existence_map, object->size);
1099#endif /* MACH_PAGEMAP */
1100
1101 /*
1102 * Free the space for the object.
1103 */
1c79356b
A
1104 zfree(vm_object_zone, (vm_offset_t) object);
1105 return KERN_SUCCESS;
1106}
1107
1108/*
1109 * Routine: vm_object_pager_wakeup
1110 * Purpose: Wake up anyone waiting for termination of a pager.
1111 */
1112
0b4e3aa0 1113static void
1c79356b 1114vm_object_pager_wakeup(
0b4e3aa0 1115 memory_object_t pager)
1c79356b
A
1116{
1117 vm_object_hash_entry_t entry;
1118 boolean_t waiting = FALSE;
1119
1120 /*
1121 * If anyone was waiting for the memory_object_terminate
1122 * to be queued, wake them up now.
1123 */
1124 vm_object_cache_lock();
1125 entry = vm_object_hash_lookup(pager, TRUE);
1126 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1127 waiting = entry->waiting;
1128 vm_object_cache_unlock();
1129 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1130 if (waiting)
1131 thread_wakeup((event_t) pager);
1132 vm_object_hash_entry_free(entry);
1133 }
1134}
1135
1136/*
0b4e3aa0
A
1137 * Routine: vm_object_release_pager
1138 * Purpose: Terminate the pager and, upon completion,
1139 * release our last reference to it.
1140 * just like memory_object_terminate, except
1141 * that we wake up anyone blocked in vm_object_enter
1142 * waiting for termination message to be queued
1143 * before calling memory_object_init.
1c79356b 1144 */
0b4e3aa0
A
1145static void
1146vm_object_release_pager(
1147 memory_object_t pager)
1c79356b 1148{
1c79356b 1149
0b4e3aa0
A
1150 /*
1151 * Terminate the pager.
1152 */
1c79356b 1153
0b4e3aa0 1154 (void) memory_object_terminate(pager);
1c79356b 1155
0b4e3aa0
A
1156 /*
1157 * Wakeup anyone waiting for this terminate
1158 */
1159 vm_object_pager_wakeup(pager);
1c79356b 1160
0b4e3aa0
A
1161 /*
1162 * Release reference to pager.
1163 */
1164 memory_object_deallocate(pager);
1165}
1c79356b
A
1166
1167/*
1168 * Routine: vm_object_abort_activity [internal use only]
1169 * Purpose:
1170 * Abort paging requests pending on this object.
1171 * In/out conditions:
1172 * The object is locked on entry and exit.
1173 */
0b4e3aa0 1174static void
1c79356b
A
1175vm_object_abort_activity(
1176 vm_object_t object)
1177{
1178 register
1179 vm_page_t p;
1180 vm_page_t next;
1181
1182 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
1183 (integer_t)object, 0, 0, 0, 0);
1184
1185 /*
1186 * Abort all activity that would be waiting
1187 * for a result on this memory object.
1188 *
1189 * We could also choose to destroy all pages
1190 * that we have in memory for this object, but
1191 * we don't.
1192 */
1193
1194 p = (vm_page_t) queue_first(&object->memq);
1195 while (!queue_end(&object->memq, (queue_entry_t) p)) {
1196 next = (vm_page_t) queue_next(&p->listq);
1197
1198 /*
1199 * If it's being paged in, destroy it.
1200 * If an unlock has been requested, start it again.
1201 */
1202
1203 if (p->busy && p->absent) {
1204 VM_PAGE_FREE(p);
1205 }
1206 else {
1207 if (p->unlock_request != VM_PROT_NONE)
1208 p->unlock_request = VM_PROT_NONE;
1209 PAGE_WAKEUP(p);
1210 }
1211
1212 p = next;
1213 }
1214
1215 /*
1216 * Wake up threads waiting for the memory object to
1217 * become ready.
1218 */
1219
1220 object->pager_ready = TRUE;
1221 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1222}
1223
1224/*
0b4e3aa0 1225 * Routine: vm_object_destroy
1c79356b 1226 * Purpose:
0b4e3aa0 1227 * Shut down a VM object, despite the
1c79356b
A
1228 * presence of address map (or other) references
1229 * to the vm_object.
1230 */
1231kern_return_t
0b4e3aa0
A
1232vm_object_destroy(
1233 vm_object_t object,
1c79356b
A
1234 kern_return_t reason)
1235{
0b4e3aa0 1236 memory_object_t old_pager;
1c79356b
A
1237
1238 if (object == VM_OBJECT_NULL)
1239 return(KERN_SUCCESS);
1240
1241 /*
0b4e3aa0 1242 * Remove the pager association immediately.
1c79356b
A
1243 *
1244 * This will prevent the memory manager from further
1245 * meddling. [If it wanted to flush data or make
1246 * other changes, it should have done so before performing
1247 * the destroy call.]
1248 */
1249
1250 vm_object_cache_lock();
1251 vm_object_lock(object);
1c79356b
A
1252 object->can_persist = FALSE;
1253 object->named = FALSE;
0b4e3aa0 1254 object->alive = FALSE;
1c79356b
A
1255
1256 /*
0b4e3aa0 1257 * Rip out the pager from the vm_object now...
1c79356b
A
1258 */
1259
0b4e3aa0
A
1260 vm_object_remove(object);
1261 old_pager = object->pager;
1262 object->pager = MEMORY_OBJECT_NULL;
1263 if (old_pager != MEMORY_OBJECT_NULL)
1264 memory_object_control_disable(object->pager_request);
1265 vm_object_cache_unlock();
1c79356b
A
1266
1267 /*
0b4e3aa0
A
1268 * Wait for the existing paging activity (that got
1269 * through before we nulled out the pager) to subside.
1c79356b
A
1270 */
1271
1272 vm_object_paging_wait(object, THREAD_UNINT);
1273 vm_object_unlock(object);
1274
1275 /*
0b4e3aa0 1276 * Terminate the object now.
1c79356b 1277 */
0b4e3aa0
A
1278 if (old_pager != MEMORY_OBJECT_NULL) {
1279 vm_object_release_pager(old_pager);
1280
1281 /*
1282 * JMM - Release the caller's reference. This assumes the
1283 * caller had a reference to release, which is a big (but
1284 * currently valid) assumption if this is driven from the
1285 * vnode pager (it is holding a named reference when making
1286 * this call)..
1287 */
1288 vm_object_deallocate(object);
1c79356b 1289
1c79356b 1290 }
1c79356b
A
1291 return(KERN_SUCCESS);
1292}
1293
1294/*
1295 * vm_object_deactivate_pages
1296 *
1297 * Deactivate all pages in the specified object. (Keep its pages
1298 * in memory even though it is no longer referenced.)
1299 *
1300 * The object must be locked.
1301 */
0b4e3aa0
A
1302static void
1303vm_object_deactivate_all_pages(
1c79356b
A
1304 register vm_object_t object)
1305{
1306 register vm_page_t p;
1307
1308 queue_iterate(&object->memq, p, vm_page_t, listq) {
1309 vm_page_lock_queues();
1310 if (!p->busy)
1311 vm_page_deactivate(p);
1312 vm_page_unlock_queues();
1313 }
1314}
1315
0b4e3aa0
A
1316__private_extern__ void
1317vm_object_deactivate_pages(
1318 vm_object_t object,
1319 vm_object_offset_t offset,
1320 vm_object_size_t size,
1321 boolean_t kill_page)
1322{
1323 vm_object_t orig_object;
1324 int pages_moved = 0;
1325 int pages_found = 0;
1326
1327 /*
1328 * entered with object lock held, acquire a paging reference to
1329 * prevent the memory_object and control ports from
1330 * being destroyed.
1331 */
1332 orig_object = object;
1333
1334 for (;;) {
1335 register vm_page_t m;
1336 vm_object_offset_t toffset;
1337 vm_object_size_t tsize;
1338
1339 vm_object_paging_begin(object);
1340 vm_page_lock_queues();
1341
1342 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1343
1344 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1345
1346 pages_found++;
1347
1348 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1349
1350 m->reference = FALSE;
55e303ae 1351 pmap_clear_reference(m->phys_page);
0b4e3aa0
A
1352
1353 if ((kill_page) && (object->internal)) {
1354 m->precious = FALSE;
1355 m->dirty = FALSE;
55e303ae 1356 pmap_clear_modify(m->phys_page);
0b4e3aa0
A
1357 vm_external_state_clr(object->existence_map, offset);
1358 }
1359 VM_PAGE_QUEUES_REMOVE(m);
1360
9bccf70c
A
1361 if(m->zero_fill) {
1362 queue_enter_first(
1363 &vm_page_queue_zf,
1364 m, vm_page_t, pageq);
1365 } else {
1366 queue_enter_first(
1367 &vm_page_queue_inactive,
1368 m, vm_page_t, pageq);
1369 }
0b4e3aa0
A
1370
1371 m->inactive = TRUE;
1372 if (!m->fictitious)
1373 vm_page_inactive_count++;
1374
1375 pages_moved++;
1376 }
1377 }
1378 }
1379 vm_page_unlock_queues();
1380 vm_object_paging_end(object);
1381
1382 if (object->shadow) {
1383 vm_object_t tmp_object;
1384
1385 kill_page = 0;
1386
1387 offset += object->shadow_offset;
1388
1389 tmp_object = object->shadow;
1390 vm_object_lock(tmp_object);
1391
1392 if (object != orig_object)
1393 vm_object_unlock(object);
1394 object = tmp_object;
1395 } else
1396 break;
1397 }
1398 if (object != orig_object)
1399 vm_object_unlock(object);
1400}
1c79356b
A
1401
1402/*
1403 * Routine: vm_object_pmap_protect
1404 *
1405 * Purpose:
1406 * Reduces the permission for all physical
1407 * pages in the specified object range.
1408 *
1409 * If removing write permission only, it is
1410 * sufficient to protect only the pages in
1411 * the top-level object; only those pages may
1412 * have write permission.
1413 *
1414 * If removing all access, we must follow the
1415 * shadow chain from the top-level object to
1416 * remove access to all pages in shadowed objects.
1417 *
1418 * The object must *not* be locked. The object must
1419 * be temporary/internal.
1420 *
1421 * If pmap is not NULL, this routine assumes that
1422 * the only mappings for the pages are in that
1423 * pmap.
1424 */
1425
0b4e3aa0 1426__private_extern__ void
1c79356b
A
1427vm_object_pmap_protect(
1428 register vm_object_t object,
1429 register vm_object_offset_t offset,
1430 vm_size_t size,
1431 pmap_t pmap,
1432 vm_offset_t pmap_start,
1433 vm_prot_t prot)
1434{
1435 if (object == VM_OBJECT_NULL)
1436 return;
9bccf70c
A
1437 size = round_page_64(size);
1438 offset = trunc_page_64(offset);
1c79356b
A
1439
1440 vm_object_lock(object);
1441
55e303ae 1442 assert(object->internal);
de355530 1443
1c79356b 1444 while (TRUE) {
55e303ae 1445 if (object->resident_page_count > atop_32(size) / 2 &&
1c79356b
A
1446 pmap != PMAP_NULL) {
1447 vm_object_unlock(object);
1448 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1449 return;
1450 }
1451
9bccf70c
A
1452 /* if we are doing large ranges with respect to resident */
1453 /* page count then we should interate over pages otherwise */
1454 /* inverse page look-up will be faster */
55e303ae 1455 if ((object->resident_page_count / 4) < atop_32(size)) {
9bccf70c
A
1456 vm_page_t p;
1457 vm_object_offset_t end;
1c79356b
A
1458
1459 end = offset + size;
1460
1461 if (pmap != PMAP_NULL) {
1462 queue_iterate(&object->memq, p, vm_page_t, listq) {
1463 if (!p->fictitious &&
1464 (offset <= p->offset) && (p->offset < end)) {
1465
1466 vm_offset_t start = pmap_start +
1467 (vm_offset_t)(p->offset - offset);
1468
1469 pmap_protect(pmap, start, start + PAGE_SIZE, prot);
1470 }
1471 }
1472 } else {
1473 queue_iterate(&object->memq, p, vm_page_t, listq) {
1474 if (!p->fictitious &&
1475 (offset <= p->offset) && (p->offset < end)) {
1476
55e303ae 1477 pmap_page_protect(p->phys_page,
1c79356b
A
1478 prot & ~p->page_lock);
1479 }
1480 }
1481 }
9bccf70c
A
1482 } else {
1483 vm_page_t p;
1484 vm_object_offset_t end;
1485 vm_object_offset_t target_off;
1486
1487 end = offset + size;
1488
1489 if (pmap != PMAP_NULL) {
1490 for(target_off = offset;
1491 target_off < end; target_off += PAGE_SIZE) {
1492 if(p = vm_page_lookup(object, target_off)) {
1493 vm_offset_t start = pmap_start +
1494 (vm_offset_t)(p->offset - offset);
1495 pmap_protect(pmap, start,
1496 start + PAGE_SIZE, prot);
1497 }
1498 }
1499 } else {
1500 for(target_off = offset;
1501 target_off < end; target_off += PAGE_SIZE) {
1502 if(p = vm_page_lookup(object, target_off)) {
55e303ae 1503 pmap_page_protect(p->phys_page,
9bccf70c
A
1504 prot & ~p->page_lock);
1505 }
1506 }
1507 }
1508 }
1c79356b
A
1509
1510 if (prot == VM_PROT_NONE) {
1511 /*
1512 * Must follow shadow chain to remove access
1513 * to pages in shadowed objects.
1514 */
1515 register vm_object_t next_object;
1516
1517 next_object = object->shadow;
1518 if (next_object != VM_OBJECT_NULL) {
1519 offset += object->shadow_offset;
1520 vm_object_lock(next_object);
1521 vm_object_unlock(object);
1522 object = next_object;
1523 }
1524 else {
1525 /*
1526 * End of chain - we are done.
1527 */
1528 break;
1529 }
1530 }
1531 else {
1532 /*
1533 * Pages in shadowed objects may never have
1534 * write permission - we may stop here.
1535 */
1536 break;
1537 }
1538 }
1539
1540 vm_object_unlock(object);
1541}
1542
1543/*
1544 * Routine: vm_object_copy_slowly
1545 *
1546 * Description:
1547 * Copy the specified range of the source
1548 * virtual memory object without using
1549 * protection-based optimizations (such
1550 * as copy-on-write). The pages in the
1551 * region are actually copied.
1552 *
1553 * In/out conditions:
1554 * The caller must hold a reference and a lock
1555 * for the source virtual memory object. The source
1556 * object will be returned *unlocked*.
1557 *
1558 * Results:
1559 * If the copy is completed successfully, KERN_SUCCESS is
1560 * returned. If the caller asserted the interruptible
1561 * argument, and an interruption occurred while waiting
1562 * for a user-generated event, MACH_SEND_INTERRUPTED is
1563 * returned. Other values may be returned to indicate
1564 * hard errors during the copy operation.
1565 *
1566 * A new virtual memory object is returned in a
1567 * parameter (_result_object). The contents of this
1568 * new object, starting at a zero offset, are a copy
1569 * of the source memory region. In the event of
1570 * an error, this parameter will contain the value
1571 * VM_OBJECT_NULL.
1572 */
0b4e3aa0 1573__private_extern__ kern_return_t
1c79356b
A
1574vm_object_copy_slowly(
1575 register vm_object_t src_object,
1576 vm_object_offset_t src_offset,
1577 vm_object_size_t size,
1578 boolean_t interruptible,
1579 vm_object_t *_result_object) /* OUT */
1580{
1581 vm_object_t new_object;
1582 vm_object_offset_t new_offset;
1583
1584 vm_object_offset_t src_lo_offset = src_offset;
1585 vm_object_offset_t src_hi_offset = src_offset + size;
1586
1587 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1588 src_object, src_offset, size, 0, 0);
1589
1590 if (size == 0) {
1591 vm_object_unlock(src_object);
1592 *_result_object = VM_OBJECT_NULL;
1593 return(KERN_INVALID_ARGUMENT);
1594 }
1595
1596 /*
1597 * Prevent destruction of the source object while we copy.
1598 */
1599
1600 assert(src_object->ref_count > 0);
1601 src_object->ref_count++;
1602 VM_OBJ_RES_INCR(src_object);
1603 vm_object_unlock(src_object);
1604
1605 /*
1606 * Create a new object to hold the copied pages.
1607 * A few notes:
1608 * We fill the new object starting at offset 0,
1609 * regardless of the input offset.
1610 * We don't bother to lock the new object within
1611 * this routine, since we have the only reference.
1612 */
1613
1614 new_object = vm_object_allocate(size);
1615 new_offset = 0;
1616
1617 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1618
1619 for ( ;
1620 size != 0 ;
1621 src_offset += PAGE_SIZE_64,
1622 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1623 ) {
1624 vm_page_t new_page;
1625 vm_fault_return_t result;
1626
1627 while ((new_page = vm_page_alloc(new_object, new_offset))
1628 == VM_PAGE_NULL) {
1629 if (!vm_page_wait(interruptible)) {
1630 vm_object_deallocate(new_object);
1631 *_result_object = VM_OBJECT_NULL;
1632 return(MACH_SEND_INTERRUPTED);
1633 }
1634 }
1635
1636 do {
1637 vm_prot_t prot = VM_PROT_READ;
1638 vm_page_t _result_page;
1639 vm_page_t top_page;
1640 register
1641 vm_page_t result_page;
1642 kern_return_t error_code;
1643
1644 vm_object_lock(src_object);
1645 vm_object_paging_begin(src_object);
1646
1647 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1648 result = vm_fault_page(src_object, src_offset,
1649 VM_PROT_READ, FALSE, interruptible,
1650 src_lo_offset, src_hi_offset,
1651 VM_BEHAVIOR_SEQUENTIAL,
1652 &prot, &_result_page, &top_page,
1653 (int *)0,
0b4e3aa0 1654 &error_code, FALSE, FALSE, NULL, 0);
1c79356b
A
1655
1656 switch(result) {
1657 case VM_FAULT_SUCCESS:
1658 result_page = _result_page;
1659
1660 /*
1661 * We don't need to hold the object
1662 * lock -- the busy page will be enough.
1663 * [We don't care about picking up any
1664 * new modifications.]
1665 *
1666 * Copy the page to the new object.
1667 *
1668 * POLICY DECISION:
1669 * If result_page is clean,
1670 * we could steal it instead
1671 * of copying.
1672 */
1673
1674 vm_object_unlock(result_page->object);
1675 vm_page_copy(result_page, new_page);
1676
1677 /*
1678 * Let go of both pages (make them
1679 * not busy, perform wakeup, activate).
1680 */
1681
1682 new_page->busy = FALSE;
1683 new_page->dirty = TRUE;
1684 vm_object_lock(result_page->object);
1685 PAGE_WAKEUP_DONE(result_page);
1686
1687 vm_page_lock_queues();
1688 if (!result_page->active &&
1689 !result_page->inactive)
1690 vm_page_activate(result_page);
1691 vm_page_activate(new_page);
1692 vm_page_unlock_queues();
1693
1694 /*
1695 * Release paging references and
1696 * top-level placeholder page, if any.
1697 */
1698
1699 vm_fault_cleanup(result_page->object,
1700 top_page);
1701
1702 break;
1703
1704 case VM_FAULT_RETRY:
1705 break;
1706
1707 case VM_FAULT_FICTITIOUS_SHORTAGE:
1708 vm_page_more_fictitious();
1709 break;
1710
1711 case VM_FAULT_MEMORY_SHORTAGE:
1712 if (vm_page_wait(interruptible))
1713 break;
1714 /* fall thru */
1715
1716 case VM_FAULT_INTERRUPTED:
1717 vm_page_free(new_page);
1718 vm_object_deallocate(new_object);
1719 vm_object_deallocate(src_object);
1720 *_result_object = VM_OBJECT_NULL;
1721 return(MACH_SEND_INTERRUPTED);
1722
1723 case VM_FAULT_MEMORY_ERROR:
1724 /*
1725 * A policy choice:
1726 * (a) ignore pages that we can't
1727 * copy
1728 * (b) return the null object if
1729 * any page fails [chosen]
1730 */
1731
1732 vm_page_lock_queues();
1733 vm_page_free(new_page);
1734 vm_page_unlock_queues();
1735 vm_object_deallocate(new_object);
1736 vm_object_deallocate(src_object);
1737 *_result_object = VM_OBJECT_NULL;
1738 return(error_code ? error_code:
1739 KERN_MEMORY_ERROR);
1740 }
1741 } while (result != VM_FAULT_SUCCESS);
1742 }
1743
1744 /*
1745 * Lose the extra reference, and return our object.
1746 */
1747
1748 vm_object_deallocate(src_object);
1749 *_result_object = new_object;
1750 return(KERN_SUCCESS);
1751}
1752
1753/*
1754 * Routine: vm_object_copy_quickly
1755 *
1756 * Purpose:
1757 * Copy the specified range of the source virtual
1758 * memory object, if it can be done without waiting
1759 * for user-generated events.
1760 *
1761 * Results:
1762 * If the copy is successful, the copy is returned in
1763 * the arguments; otherwise, the arguments are not
1764 * affected.
1765 *
1766 * In/out conditions:
1767 * The object should be unlocked on entry and exit.
1768 */
1769
1770/*ARGSUSED*/
0b4e3aa0 1771__private_extern__ boolean_t
1c79356b
A
1772vm_object_copy_quickly(
1773 vm_object_t *_object, /* INOUT */
1774 vm_object_offset_t offset, /* IN */
1775 vm_object_size_t size, /* IN */
1776 boolean_t *_src_needs_copy, /* OUT */
1777 boolean_t *_dst_needs_copy) /* OUT */
1778{
1779 vm_object_t object = *_object;
1780 memory_object_copy_strategy_t copy_strategy;
1781
1782 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1783 *_object, offset, size, 0, 0);
1784 if (object == VM_OBJECT_NULL) {
1785 *_src_needs_copy = FALSE;
1786 *_dst_needs_copy = FALSE;
1787 return(TRUE);
1788 }
1789
1790 vm_object_lock(object);
1791
1792 copy_strategy = object->copy_strategy;
1793
1794 switch (copy_strategy) {
1795 case MEMORY_OBJECT_COPY_SYMMETRIC:
1796
1797 /*
1798 * Symmetric copy strategy.
1799 * Make another reference to the object.
1800 * Leave object/offset unchanged.
1801 */
1802
1803 assert(object->ref_count > 0);
1804 object->ref_count++;
1805 vm_object_res_reference(object);
1806 object->shadowed = TRUE;
1807 vm_object_unlock(object);
1808
1809 /*
1810 * Both source and destination must make
1811 * shadows, and the source must be made
1812 * read-only if not already.
1813 */
1814
1815 *_src_needs_copy = TRUE;
1816 *_dst_needs_copy = TRUE;
1817
1818 break;
1819
1820 case MEMORY_OBJECT_COPY_DELAY:
1821 vm_object_unlock(object);
1822 return(FALSE);
1823
1824 default:
1825 vm_object_unlock(object);
1826 return(FALSE);
1827 }
1828 return(TRUE);
1829}
1830
0b4e3aa0
A
1831static int copy_call_count = 0;
1832static int copy_call_sleep_count = 0;
1833static int copy_call_restart_count = 0;
1c79356b
A
1834
1835/*
1836 * Routine: vm_object_copy_call [internal]
1837 *
1838 * Description:
1839 * Copy the source object (src_object), using the
1840 * user-managed copy algorithm.
1841 *
1842 * In/out conditions:
1843 * The source object must be locked on entry. It
1844 * will be *unlocked* on exit.
1845 *
1846 * Results:
1847 * If the copy is successful, KERN_SUCCESS is returned.
1848 * A new object that represents the copied virtual
1849 * memory is returned in a parameter (*_result_object).
1850 * If the return value indicates an error, this parameter
1851 * is not valid.
1852 */
0b4e3aa0 1853static kern_return_t
1c79356b
A
1854vm_object_copy_call(
1855 vm_object_t src_object,
1856 vm_object_offset_t src_offset,
1857 vm_object_size_t size,
1858 vm_object_t *_result_object) /* OUT */
1859{
1860 kern_return_t kr;
1861 vm_object_t copy;
1862 boolean_t check_ready = FALSE;
1863
1864 /*
1865 * If a copy is already in progress, wait and retry.
1866 *
1867 * XXX
1868 * Consider making this call interruptable, as Mike
1869 * intended it to be.
1870 *
1871 * XXXO
1872 * Need a counter or version or something to allow
1873 * us to use the copy that the currently requesting
1874 * thread is obtaining -- is it worth adding to the
1875 * vm object structure? Depends how common this case it.
1876 */
1877 copy_call_count++;
1878 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 1879 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 1880 THREAD_UNINT);
1c79356b
A
1881 copy_call_restart_count++;
1882 }
1883
1884 /*
1885 * Indicate (for the benefit of memory_object_create_copy)
1886 * that we want a copy for src_object. (Note that we cannot
1887 * do a real assert_wait before calling memory_object_copy,
1888 * so we simply set the flag.)
1889 */
1890
1891 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1892 vm_object_unlock(src_object);
1893
1894 /*
1895 * Ask the memory manager to give us a memory object
1896 * which represents a copy of the src object.
1897 * The memory manager may give us a memory object
1898 * which we already have, or it may give us a
1899 * new memory object. This memory object will arrive
1900 * via memory_object_create_copy.
1901 */
1902
1903 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1904 if (kr != KERN_SUCCESS) {
1905 return kr;
1906 }
1907
1908 /*
1909 * Wait for the copy to arrive.
1910 */
1911 vm_object_lock(src_object);
1912 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 1913 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 1914 THREAD_UNINT);
1c79356b
A
1915 copy_call_sleep_count++;
1916 }
1917Retry:
1918 assert(src_object->copy != VM_OBJECT_NULL);
1919 copy = src_object->copy;
1920 if (!vm_object_lock_try(copy)) {
1921 vm_object_unlock(src_object);
1922 mutex_pause(); /* wait a bit */
1923 vm_object_lock(src_object);
1924 goto Retry;
1925 }
1926 if (copy->size < src_offset+size)
1927 copy->size = src_offset+size;
1928
1929 if (!copy->pager_ready)
1930 check_ready = TRUE;
1931
1932 /*
1933 * Return the copy.
1934 */
1935 *_result_object = copy;
1936 vm_object_unlock(copy);
1937 vm_object_unlock(src_object);
1938
1939 /* Wait for the copy to be ready. */
1940 if (check_ready == TRUE) {
1941 vm_object_lock(copy);
1942 while (!copy->pager_ready) {
9bccf70c 1943 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1c79356b
A
1944 }
1945 vm_object_unlock(copy);
1946 }
1947
1948 return KERN_SUCCESS;
1949}
1950
0b4e3aa0
A
1951static int copy_delayed_lock_collisions = 0;
1952static int copy_delayed_max_collisions = 0;
1953static int copy_delayed_lock_contention = 0;
1954static int copy_delayed_protect_iterate = 0;
1955static int copy_delayed_protect_lookup = 0;
1956static int copy_delayed_protect_lookup_wait = 0;
1c79356b
A
1957
1958/*
1959 * Routine: vm_object_copy_delayed [internal]
1960 *
1961 * Description:
1962 * Copy the specified virtual memory object, using
1963 * the asymmetric copy-on-write algorithm.
1964 *
1965 * In/out conditions:
55e303ae
A
1966 * The src_object must be locked on entry. It will be unlocked
1967 * on exit - so the caller must also hold a reference to it.
1c79356b
A
1968 *
1969 * This routine will not block waiting for user-generated
1970 * events. It is not interruptible.
1971 */
0b4e3aa0 1972__private_extern__ vm_object_t
1c79356b
A
1973vm_object_copy_delayed(
1974 vm_object_t src_object,
1975 vm_object_offset_t src_offset,
1976 vm_object_size_t size)
1977{
1978 vm_object_t new_copy = VM_OBJECT_NULL;
1979 vm_object_t old_copy;
1980 vm_page_t p;
55e303ae 1981 vm_object_size_t copy_size = src_offset + size;
1c79356b
A
1982
1983 int collisions = 0;
1984 /*
1985 * The user-level memory manager wants to see all of the changes
1986 * to this object, but it has promised not to make any changes on
1987 * its own.
1988 *
1989 * Perform an asymmetric copy-on-write, as follows:
1990 * Create a new object, called a "copy object" to hold
1991 * pages modified by the new mapping (i.e., the copy,
1992 * not the original mapping).
1993 * Record the original object as the backing object for
1994 * the copy object. If the original mapping does not
1995 * change a page, it may be used read-only by the copy.
1996 * Record the copy object in the original object.
1997 * When the original mapping causes a page to be modified,
1998 * it must be copied to a new page that is "pushed" to
1999 * the copy object.
2000 * Mark the new mapping (the copy object) copy-on-write.
2001 * This makes the copy object itself read-only, allowing
2002 * it to be reused if the original mapping makes no
2003 * changes, and simplifying the synchronization required
2004 * in the "push" operation described above.
2005 *
2006 * The copy-on-write is said to be assymetric because the original
2007 * object is *not* marked copy-on-write. A copied page is pushed
2008 * to the copy object, regardless which party attempted to modify
2009 * the page.
2010 *
2011 * Repeated asymmetric copy operations may be done. If the
2012 * original object has not been changed since the last copy, its
2013 * copy object can be reused. Otherwise, a new copy object can be
2014 * inserted between the original object and its previous copy
2015 * object. Since any copy object is read-only, this cannot affect
2016 * affect the contents of the previous copy object.
2017 *
2018 * Note that a copy object is higher in the object tree than the
2019 * original object; therefore, use of the copy object recorded in
2020 * the original object must be done carefully, to avoid deadlock.
2021 */
2022
2023 Retry:
1c79356b 2024
55e303ae
A
2025 /*
2026 * Wait for paging in progress.
2027 */
2028 if (!src_object->true_share)
2029 vm_object_paging_wait(src_object, THREAD_UNINT);
2030
1c79356b
A
2031 /*
2032 * See whether we can reuse the result of a previous
2033 * copy operation.
2034 */
2035
2036 old_copy = src_object->copy;
2037 if (old_copy != VM_OBJECT_NULL) {
2038 /*
2039 * Try to get the locks (out of order)
2040 */
2041 if (!vm_object_lock_try(old_copy)) {
2042 vm_object_unlock(src_object);
2043 mutex_pause();
2044
2045 /* Heisenberg Rules */
2046 copy_delayed_lock_collisions++;
2047 if (collisions++ == 0)
2048 copy_delayed_lock_contention++;
2049
2050 if (collisions > copy_delayed_max_collisions)
2051 copy_delayed_max_collisions = collisions;
2052
55e303ae 2053 vm_object_lock(src_object);
1c79356b
A
2054 goto Retry;
2055 }
2056
2057 /*
2058 * Determine whether the old copy object has
2059 * been modified.
2060 */
2061
2062 if (old_copy->resident_page_count == 0 &&
2063 !old_copy->pager_created) {
2064 /*
2065 * It has not been modified.
2066 *
2067 * Return another reference to
55e303ae
A
2068 * the existing copy-object if
2069 * we can safely grow it (if
2070 * needed).
de355530 2071 */
1c79356b
A
2072
2073 if (new_copy != VM_OBJECT_NULL) {
2074 vm_object_unlock(new_copy);
2075 vm_object_deallocate(new_copy);
2076 }
2077
55e303ae
A
2078 if (old_copy->size < copy_size) {
2079 /*
2080 * We can't perform a delayed copy if any of the
2081 * pages in the extended range are wired (because
2082 * we can't safely take write permission away from
2083 * wired pages). If the pages aren't wired, then
2084 * go ahead and protect them.
2085 */
2086 copy_delayed_protect_iterate++;
2087 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2088 if (!p->fictitious &&
2089 p->offset >= old_copy->size &&
2090 p->offset < copy_size) {
2091 if (p->wire_count > 0) {
2092 vm_object_unlock(old_copy);
2093 vm_object_unlock(src_object);
2094 return VM_OBJECT_NULL;
2095 } else {
2096 pmap_page_protect(p->phys_page,
2097 (VM_PROT_ALL & ~VM_PROT_WRITE &
2098 ~p->page_lock));
2099 }
2100 }
2101 }
2102 old_copy->size = copy_size;
2103 }
2104
2105 vm_object_reference_locked(old_copy);
d7e50217
A
2106 vm_object_unlock(old_copy);
2107 vm_object_unlock(src_object);
55e303ae 2108 return(old_copy);
d7e50217 2109 }
de355530
A
2110
2111 /*
2112 * Adjust the size argument so that the newly-created
2113 * copy object will be large enough to back either the
55e303ae 2114 * old copy object or the new mapping.
de355530 2115 */
55e303ae
A
2116 if (old_copy->size > copy_size)
2117 copy_size = old_copy->size;
2118
2119 if (new_copy == VM_OBJECT_NULL) {
2120 vm_object_unlock(old_copy);
2121 vm_object_unlock(src_object);
2122 new_copy = vm_object_allocate(copy_size);
2123 vm_object_lock(src_object);
2124 vm_object_lock(new_copy);
2125 goto Retry;
2126 }
2127 new_copy->size = copy_size;
1c79356b
A
2128
2129 /*
2130 * The copy-object is always made large enough to
2131 * completely shadow the original object, since
2132 * it may have several users who want to shadow
2133 * the original object at different points.
2134 */
2135
2136 assert((old_copy->shadow == src_object) &&
2137 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2138
55e303ae
A
2139 } else if (new_copy == VM_OBJECT_NULL) {
2140 vm_object_unlock(src_object);
2141 new_copy = vm_object_allocate(copy_size);
2142 vm_object_lock(src_object);
2143 vm_object_lock(new_copy);
2144 goto Retry;
2145 }
2146
2147 /*
2148 * We now have the src object locked, and the new copy object
2149 * allocated and locked (and potentially the old copy locked).
2150 * Before we go any further, make sure we can still perform
2151 * a delayed copy, as the situation may have changed.
2152 *
2153 * Specifically, we can't perform a delayed copy if any of the
2154 * pages in the range are wired (because we can't safely take
2155 * write permission away from wired pages). If the pages aren't
2156 * wired, then go ahead and protect them.
2157 */
2158 copy_delayed_protect_iterate++;
2159 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2160 if (!p->fictitious && p->offset < copy_size) {
2161 if (p->wire_count > 0) {
2162 if (old_copy)
2163 vm_object_unlock(old_copy);
2164 vm_object_unlock(src_object);
2165 vm_object_unlock(new_copy);
2166 vm_object_deallocate(new_copy);
2167 return VM_OBJECT_NULL;
2168 } else {
2169 pmap_page_protect(p->phys_page,
2170 (VM_PROT_ALL & ~VM_PROT_WRITE &
2171 ~p->page_lock));
2172 }
2173 }
2174 }
2175
2176 if (old_copy != VM_OBJECT_NULL) {
1c79356b
A
2177 /*
2178 * Make the old copy-object shadow the new one.
2179 * It will receive no more pages from the original
2180 * object.
2181 */
2182
2183 src_object->ref_count--; /* remove ref. from old_copy */
2184 assert(src_object->ref_count > 0);
2185 old_copy->shadow = new_copy;
2186 assert(new_copy->ref_count > 0);
2187 new_copy->ref_count++; /* for old_copy->shadow ref. */
2188
2189#if TASK_SWAPPER
2190 if (old_copy->res_count) {
2191 VM_OBJ_RES_INCR(new_copy);
2192 VM_OBJ_RES_DECR(src_object);
2193 }
2194#endif
2195
2196 vm_object_unlock(old_copy); /* done with old_copy */
1c79356b
A
2197 }
2198
2199 /*
2200 * Point the new copy at the existing object.
2201 */
1c79356b
A
2202 new_copy->shadow = src_object;
2203 new_copy->shadow_offset = 0;
2204 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2205 assert(src_object->ref_count > 0);
2206 src_object->ref_count++;
2207 VM_OBJ_RES_INCR(src_object);
2208 src_object->copy = new_copy;
55e303ae 2209 vm_object_unlock(src_object);
1c79356b
A
2210 vm_object_unlock(new_copy);
2211
1c79356b
A
2212 XPR(XPR_VM_OBJECT,
2213 "vm_object_copy_delayed: used copy object %X for source %X\n",
2214 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2215
2216 return(new_copy);
2217}
2218
2219/*
2220 * Routine: vm_object_copy_strategically
2221 *
2222 * Purpose:
2223 * Perform a copy according to the source object's
2224 * declared strategy. This operation may block,
2225 * and may be interrupted.
2226 */
0b4e3aa0 2227__private_extern__ kern_return_t
1c79356b
A
2228vm_object_copy_strategically(
2229 register vm_object_t src_object,
2230 vm_object_offset_t src_offset,
2231 vm_object_size_t size,
2232 vm_object_t *dst_object, /* OUT */
2233 vm_object_offset_t *dst_offset, /* OUT */
2234 boolean_t *dst_needs_copy) /* OUT */
2235{
2236 boolean_t result;
2237 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2238 memory_object_copy_strategy_t copy_strategy;
2239
2240 assert(src_object != VM_OBJECT_NULL);
2241
2242 vm_object_lock(src_object);
2243
2244 /*
2245 * The copy strategy is only valid if the memory manager
2246 * is "ready". Internal objects are always ready.
2247 */
2248
2249 while (!src_object->internal && !src_object->pager_ready) {
9bccf70c 2250 wait_result_t wait_result;
1c79356b 2251
9bccf70c
A
2252 wait_result = vm_object_sleep( src_object,
2253 VM_OBJECT_EVENT_PAGER_READY,
2254 interruptible);
2255 if (wait_result != THREAD_AWAKENED) {
2256 vm_object_unlock(src_object);
1c79356b
A
2257 *dst_object = VM_OBJECT_NULL;
2258 *dst_offset = 0;
2259 *dst_needs_copy = FALSE;
2260 return(MACH_SEND_INTERRUPTED);
2261 }
1c79356b
A
2262 }
2263
2264 copy_strategy = src_object->copy_strategy;
2265
2266 /*
2267 * Use the appropriate copy strategy.
2268 */
2269
2270 switch (copy_strategy) {
55e303ae
A
2271 case MEMORY_OBJECT_COPY_DELAY:
2272 *dst_object = vm_object_copy_delayed(src_object,
2273 src_offset, size);
2274 if (*dst_object != VM_OBJECT_NULL) {
2275 *dst_offset = src_offset;
2276 *dst_needs_copy = TRUE;
2277 result = KERN_SUCCESS;
2278 break;
2279 }
2280 vm_object_lock(src_object);
2281 /* fall thru when delayed copy not allowed */
2282
1c79356b
A
2283 case MEMORY_OBJECT_COPY_NONE:
2284 result = vm_object_copy_slowly(src_object, src_offset, size,
2285 interruptible, dst_object);
2286 if (result == KERN_SUCCESS) {
2287 *dst_offset = 0;
2288 *dst_needs_copy = FALSE;
2289 }
2290 break;
2291
2292 case MEMORY_OBJECT_COPY_CALL:
2293 result = vm_object_copy_call(src_object, src_offset, size,
2294 dst_object);
2295 if (result == KERN_SUCCESS) {
2296 *dst_offset = src_offset;
2297 *dst_needs_copy = TRUE;
2298 }
2299 break;
2300
1c79356b
A
2301 case MEMORY_OBJECT_COPY_SYMMETRIC:
2302 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2303 vm_object_unlock(src_object);
2304 result = KERN_MEMORY_RESTART_COPY;
2305 break;
2306
2307 default:
2308 panic("copy_strategically: bad strategy");
2309 result = KERN_INVALID_ARGUMENT;
2310 }
2311 return(result);
2312}
2313
2314/*
2315 * vm_object_shadow:
2316 *
2317 * Create a new object which is backed by the
2318 * specified existing object range. The source
2319 * object reference is deallocated.
2320 *
2321 * The new object and offset into that object
2322 * are returned in the source parameters.
2323 */
2324boolean_t vm_object_shadow_check = FALSE;
2325
0b4e3aa0 2326__private_extern__ boolean_t
1c79356b
A
2327vm_object_shadow(
2328 vm_object_t *object, /* IN/OUT */
2329 vm_object_offset_t *offset, /* IN/OUT */
2330 vm_object_size_t length)
2331{
2332 register vm_object_t source;
2333 register vm_object_t result;
2334
2335 source = *object;
2336 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2337
2338 /*
2339 * Determine if we really need a shadow.
2340 */
2341
2342 if (vm_object_shadow_check && source->ref_count == 1 &&
2343 (source->shadow == VM_OBJECT_NULL ||
2344 source->shadow->copy == VM_OBJECT_NULL))
2345 {
2346 source->shadowed = FALSE;
2347 return FALSE;
2348 }
2349
2350 /*
2351 * Allocate a new object with the given length
2352 */
2353
2354 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2355 panic("vm_object_shadow: no object for shadowing");
2356
2357 /*
2358 * The new object shadows the source object, adding
2359 * a reference to it. Our caller changes his reference
2360 * to point to the new object, removing a reference to
2361 * the source object. Net result: no change of reference
2362 * count.
2363 */
2364 result->shadow = source;
2365
2366 /*
2367 * Store the offset into the source object,
2368 * and fix up the offset into the new object.
2369 */
2370
2371 result->shadow_offset = *offset;
2372
2373 /*
2374 * Return the new things
2375 */
2376
2377 *offset = 0;
2378 *object = result;
2379 return TRUE;
2380}
2381
2382/*
2383 * The relationship between vm_object structures and
0b4e3aa0 2384 * the memory_object requires careful synchronization.
1c79356b 2385 *
0b4e3aa0
A
2386 * All associations are created by memory_object_create_named
2387 * for external pagers and vm_object_pager_create for internal
2388 * objects as follows:
2389 *
2390 * pager: the memory_object itself, supplied by
1c79356b
A
2391 * the user requesting a mapping (or the kernel,
2392 * when initializing internal objects); the
2393 * kernel simulates holding send rights by keeping
2394 * a port reference;
0b4e3aa0 2395 *
1c79356b
A
2396 * pager_request:
2397 * the memory object control port,
2398 * created by the kernel; the kernel holds
2399 * receive (and ownership) rights to this
2400 * port, but no other references.
1c79356b
A
2401 *
2402 * When initialization is complete, the "initialized" field
2403 * is asserted. Other mappings using a particular memory object,
2404 * and any references to the vm_object gained through the
2405 * port association must wait for this initialization to occur.
2406 *
2407 * In order to allow the memory manager to set attributes before
2408 * requests (notably virtual copy operations, but also data or
2409 * unlock requests) are made, a "ready" attribute is made available.
2410 * Only the memory manager may affect the value of this attribute.
2411 * Its value does not affect critical kernel functions, such as
2412 * internal object initialization or destruction. [Furthermore,
2413 * memory objects created by the kernel are assumed to be ready
2414 * immediately; the default memory manager need not explicitly
2415 * set the "ready" attribute.]
2416 *
2417 * [Both the "initialized" and "ready" attribute wait conditions
2418 * use the "pager" field as the wait event.]
2419 *
2420 * The port associations can be broken down by any of the
2421 * following routines:
2422 * vm_object_terminate:
2423 * No references to the vm_object remain, and
2424 * the object cannot (or will not) be cached.
2425 * This is the normal case, and is done even
2426 * though one of the other cases has already been
2427 * done.
1c79356b
A
2428 * memory_object_destroy:
2429 * The memory manager has requested that the
0b4e3aa0
A
2430 * kernel relinquish references to the memory
2431 * object. [The memory manager may not want to
2432 * destroy the memory object, but may wish to
2433 * refuse or tear down existing memory mappings.]
2434 *
1c79356b
A
2435 * Each routine that breaks an association must break all of
2436 * them at once. At some later time, that routine must clear
0b4e3aa0 2437 * the pager field and release the memory object references.
1c79356b
A
2438 * [Furthermore, each routine must cope with the simultaneous
2439 * or previous operations of the others.]
2440 *
2441 * In addition to the lock on the object, the vm_object_cache_lock
0b4e3aa0
A
2442 * governs the associations. References gained through the
2443 * association require use of the cache lock.
1c79356b 2444 *
0b4e3aa0 2445 * Because the pager field may be cleared spontaneously, it
1c79356b
A
2446 * cannot be used to determine whether a memory object has
2447 * ever been associated with a particular vm_object. [This
2448 * knowledge is important to the shadow object mechanism.]
2449 * For this reason, an additional "created" attribute is
2450 * provided.
2451 *
0b4e3aa0
A
2452 * During various paging operations, the pager reference found in the
2453 * vm_object must be valid. To prevent this from being released,
1c79356b
A
2454 * (other than being removed, i.e., made null), routines may use
2455 * the vm_object_paging_begin/end routines [actually, macros].
2456 * The implementation uses the "paging_in_progress" and "wanted" fields.
0b4e3aa0 2457 * [Operations that alter the validity of the pager values include the
1c79356b
A
2458 * termination routines and vm_object_collapse.]
2459 */
2460
0b4e3aa0
A
2461#if 0
2462/*
2463 * Routine: vm_object_pager_dead
2464 *
2465 * Purpose:
2466 * A port is being destroy, and the IPC kobject code
2467 * can't tell if it represents a pager port or not.
2468 * So this function is called each time it sees a port
2469 * die.
2470 * THIS IS HORRIBLY INEFFICIENT. We should only call
2471 * this routine if we had requested a notification on
2472 * the port.
2473 */
1c79356b 2474
0b4e3aa0
A
2475__private_extern__ void
2476vm_object_pager_dead(
1c79356b
A
2477 ipc_port_t pager)
2478{
2479 vm_object_t object;
2480 vm_object_hash_entry_t entry;
1c79356b
A
2481
2482 /*
2483 * Perform essentially the same operations as in vm_object_lookup,
2484 * except that this time we look up based on the memory_object
2485 * port, not the control port.
2486 */
2487 vm_object_cache_lock();
2488 entry = vm_object_hash_lookup(pager, FALSE);
2489 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2490 entry->object == VM_OBJECT_NULL) {
2491 vm_object_cache_unlock();
2492 return;
2493 }
2494
2495 object = entry->object;
2496 entry->object = VM_OBJECT_NULL;
2497
2498 vm_object_lock(object);
2499 if (object->ref_count == 0) {
2500 XPR(XPR_VM_OBJECT_CACHE,
2501 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2502 (integer_t)object,
2503 (integer_t)vm_object_cached_list.next,
2504 (integer_t)vm_object_cached_list.prev, 0,0);
2505
2506 queue_remove(&vm_object_cached_list, object,
2507 vm_object_t, cached_list);
2508 vm_object_cached_count--;
2509 }
2510 object->ref_count++;
2511 vm_object_res_reference(object);
2512
2513 object->can_persist = FALSE;
2514
2515 assert(object->pager == pager);
2516
2517 /*
0b4e3aa0 2518 * Remove the pager association.
1c79356b
A
2519 *
2520 * Note that the memory_object itself is dead, so
2521 * we don't bother with it.
2522 */
2523
0b4e3aa0 2524 object->pager = MEMORY_OBJECT_NULL;
1c79356b
A
2525
2526 vm_object_unlock(object);
2527 vm_object_cache_unlock();
2528
2529 vm_object_pager_wakeup(pager);
2530
2531 /*
0b4e3aa0 2532 * Release the pager reference. Note that there's no
1c79356b 2533 * point in trying the memory_object_terminate call
0b4e3aa0
A
2534 * because the memory_object itself is dead. Also
2535 * release the memory_object_control reference, since
2536 * the pager didn't do that either.
1c79356b
A
2537 */
2538
0b4e3aa0
A
2539 memory_object_deallocate(pager);
2540 memory_object_control_deallocate(object->pager_request);
2541
1c79356b
A
2542
2543 /*
2544 * Restart pending page requests
2545 */
2546 vm_object_lock(object);
1c79356b 2547 vm_object_abort_activity(object);
1c79356b
A
2548 vm_object_unlock(object);
2549
2550 /*
2551 * Lose the object reference.
2552 */
2553
2554 vm_object_deallocate(object);
2555}
0b4e3aa0 2556#endif
1c79356b
A
2557
2558/*
2559 * Routine: vm_object_enter
2560 * Purpose:
2561 * Find a VM object corresponding to the given
2562 * pager; if no such object exists, create one,
2563 * and initialize the pager.
2564 */
2565vm_object_t
2566vm_object_enter(
0b4e3aa0 2567 memory_object_t pager,
1c79356b
A
2568 vm_object_size_t size,
2569 boolean_t internal,
2570 boolean_t init,
0b4e3aa0 2571 boolean_t named)
1c79356b
A
2572{
2573 register vm_object_t object;
2574 vm_object_t new_object;
2575 boolean_t must_init;
1c79356b 2576 vm_object_hash_entry_t entry, new_entry;
1c79356b 2577
0b4e3aa0 2578 if (pager == MEMORY_OBJECT_NULL)
1c79356b
A
2579 return(vm_object_allocate(size));
2580
2581 new_object = VM_OBJECT_NULL;
2582 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2583 must_init = init;
2584
2585 /*
2586 * Look for an object associated with this port.
2587 */
2588
1c79356b 2589 vm_object_cache_lock();
55e303ae 2590 do {
1c79356b
A
2591 entry = vm_object_hash_lookup(pager, FALSE);
2592
55e303ae
A
2593 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2594 if (new_object == VM_OBJECT_NULL) {
2595 /*
2596 * We must unlock to create a new object;
2597 * if we do so, we must try the lookup again.
2598 */
2599 vm_object_cache_unlock();
2600 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2601 new_entry = vm_object_hash_entry_alloc(pager);
2602 new_object = vm_object_allocate(size);
2603 vm_object_cache_lock();
2604 } else {
2605 /*
2606 * Lookup failed twice, and we have something
2607 * to insert; set the object.
2608 */
2609 vm_object_hash_insert(new_entry);
2610 entry = new_entry;
2611 entry->object = new_object;
2612 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2613 new_object = VM_OBJECT_NULL;
2614 must_init = TRUE;
2615 }
2616 } else if (entry->object == VM_OBJECT_NULL) {
2617 /*
2618 * If a previous object is being terminated,
2619 * we must wait for the termination message
2620 * to be queued (and lookup the entry again).
2621 */
1c79356b 2622 entry->waiting = TRUE;
55e303ae 2623 entry = VM_OBJECT_HASH_ENTRY_NULL;
1c79356b
A
2624 assert_wait((event_t) pager, THREAD_UNINT);
2625 vm_object_cache_unlock();
2626 thread_block((void (*)(void))0);
1c79356b 2627 vm_object_cache_lock();
1c79356b 2628 }
55e303ae 2629 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
1c79356b
A
2630
2631 object = entry->object;
2632 assert(object != VM_OBJECT_NULL);
2633
2634 if (!must_init) {
2635 vm_object_lock(object);
1c79356b 2636 assert(!internal || object->internal);
0b4e3aa0
A
2637 if (named) {
2638 assert(!object->named);
1c79356b 2639 object->named = TRUE;
0b4e3aa0 2640 }
1c79356b
A
2641 if (object->ref_count == 0) {
2642 XPR(XPR_VM_OBJECT_CACHE,
2643 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2644 (integer_t)object,
2645 (integer_t)vm_object_cached_list.next,
2646 (integer_t)vm_object_cached_list.prev, 0,0);
2647 queue_remove(&vm_object_cached_list, object,
2648 vm_object_t, cached_list);
2649 vm_object_cached_count--;
2650 }
2651 object->ref_count++;
2652 vm_object_res_reference(object);
2653 vm_object_unlock(object);
2654
2655 VM_STAT(hits++);
2656 }
2657 assert(object->ref_count > 0);
2658
2659 VM_STAT(lookups++);
2660
2661 vm_object_cache_unlock();
2662
2663 XPR(XPR_VM_OBJECT,
2664 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2665 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2666
2667 /*
2668 * If we raced to create a vm_object but lost, let's
2669 * throw away ours.
2670 */
2671
2672 if (new_object != VM_OBJECT_NULL)
2673 vm_object_deallocate(new_object);
2674
2675 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2676 vm_object_hash_entry_free(new_entry);
2677
2678 if (must_init) {
0b4e3aa0 2679 pager_request_t pager_request;
1c79356b
A
2680
2681 /*
2682 * Allocate request port.
2683 */
2684
0b4e3aa0
A
2685 pager_request = memory_object_control_allocate(object);
2686 assert (pager_request != PAGER_REQUEST_NULL);
1c79356b
A
2687
2688 vm_object_lock(object);
2689
2690 /*
0b4e3aa0 2691 * Copy the reference we were given.
1c79356b
A
2692 */
2693
0b4e3aa0 2694 memory_object_reference(pager);
1c79356b
A
2695 object->pager_created = TRUE;
2696 object->pager = pager;
2697 object->internal = internal;
2698 object->pager_trusted = internal;
2699 if (!internal) {
2700 /* copy strategy invalid until set by memory manager */
2701 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2702 }
2703 object->pager_request = pager_request;
2704 object->pager_ready = FALSE;
2705
1c79356b
A
2706 vm_object_unlock(object);
2707
2708 /*
2709 * Let the pager know we're using it.
2710 */
2711
0b4e3aa0
A
2712 (void) memory_object_init(pager,
2713 object->pager_request,
2714 PAGE_SIZE);
1c79356b
A
2715
2716 vm_object_lock(object);
0b4e3aa0
A
2717 if (named)
2718 object->named = TRUE;
1c79356b
A
2719 if (internal) {
2720 object->pager_ready = TRUE;
2721 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2722 }
2723
2724 object->pager_initialized = TRUE;
2725 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2726 } else {
2727 vm_object_lock(object);
2728 }
2729
2730 /*
2731 * [At this point, the object must be locked]
2732 */
2733
2734 /*
2735 * Wait for the work above to be done by the first
2736 * thread to map this object.
2737 */
2738
2739 while (!object->pager_initialized) {
9bccf70c 2740 vm_object_sleep(object,
1c79356b
A
2741 VM_OBJECT_EVENT_INITIALIZED,
2742 THREAD_UNINT);
1c79356b
A
2743 }
2744 vm_object_unlock(object);
2745
2746 XPR(XPR_VM_OBJECT,
2747 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2748 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2749 return(object);
2750}
2751
2752/*
2753 * Routine: vm_object_pager_create
2754 * Purpose:
2755 * Create a memory object for an internal object.
2756 * In/out conditions:
2757 * The object is locked on entry and exit;
2758 * it may be unlocked within this call.
2759 * Limitations:
2760 * Only one thread may be performing a
2761 * vm_object_pager_create on an object at
2762 * a time. Presumably, only the pageout
2763 * daemon will be using this routine.
2764 */
2765
2766void
2767vm_object_pager_create(
2768 register vm_object_t object)
2769{
0b4e3aa0 2770 memory_object_t pager;
1c79356b
A
2771 vm_object_hash_entry_t entry;
2772#if MACH_PAGEMAP
2773 vm_object_size_t size;
2774 vm_external_map_t map;
2775#endif /* MACH_PAGEMAP */
2776
2777 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2778 (integer_t)object, 0,0,0,0);
2779
2780 if (memory_manager_default_check() != KERN_SUCCESS)
2781 return;
2782
2783 /*
2784 * Prevent collapse or termination by holding a paging reference
2785 */
2786
2787 vm_object_paging_begin(object);
2788 if (object->pager_created) {
2789 /*
2790 * Someone else got to it first...
2791 * wait for them to finish initializing the ports
2792 */
2793 while (!object->pager_initialized) {
9bccf70c
A
2794 vm_object_sleep(object,
2795 VM_OBJECT_EVENT_INITIALIZED,
2796 THREAD_UNINT);
1c79356b
A
2797 }
2798 vm_object_paging_end(object);
2799 return;
2800 }
2801
2802 /*
2803 * Indicate that a memory object has been assigned
2804 * before dropping the lock, to prevent a race.
2805 */
2806
2807 object->pager_created = TRUE;
2808 object->paging_offset = 0;
2809
2810#if MACH_PAGEMAP
2811 size = object->size;
2812#endif /* MACH_PAGEMAP */
2813 vm_object_unlock(object);
2814
2815#if MACH_PAGEMAP
2816 map = vm_external_create(size);
2817 vm_object_lock(object);
2818 assert(object->size == size);
2819 object->existence_map = map;
2820 vm_object_unlock(object);
2821#endif /* MACH_PAGEMAP */
2822
2823 /*
0b4e3aa0 2824 * Create the [internal] pager, and associate it with this object.
1c79356b 2825 *
0b4e3aa0 2826 * We make the association here so that vm_object_enter()
1c79356b
A
2827 * can look up the object to complete initializing it. No
2828 * user will ever map this object.
2829 */
2830 {
0b4e3aa0 2831 memory_object_default_t dmm;
1c79356b
A
2832 vm_size_t cluster_size;
2833
0b4e3aa0
A
2834 /* acquire a reference for the default memory manager */
2835 dmm = memory_manager_default_reference(&cluster_size);
1c79356b
A
2836 assert(cluster_size >= PAGE_SIZE);
2837
2838 object->cluster_size = cluster_size; /* XXX ??? */
2839 assert(object->temporary);
2840
0b4e3aa0
A
2841 /* create our new memory object */
2842 (void) memory_object_create(dmm, object->size, &pager);
2843
2844 memory_object_default_deallocate(dmm);
1c79356b
A
2845 }
2846
2847 entry = vm_object_hash_entry_alloc(pager);
2848
2849 vm_object_cache_lock();
2850 vm_object_hash_insert(entry);
2851
2852 entry->object = object;
2853 vm_object_cache_unlock();
2854
2855 /*
0b4e3aa0 2856 * A reference was returned by
1c79356b
A
2857 * memory_object_create(), and it is
2858 * copied by vm_object_enter().
2859 */
2860
2861 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2862 panic("vm_object_pager_create: mismatch");
2863
2864 /*
0b4e3aa0 2865 * Drop the reference we were passed.
1c79356b 2866 */
0b4e3aa0 2867 memory_object_deallocate(pager);
1c79356b
A
2868
2869 vm_object_lock(object);
2870
2871 /*
2872 * Release the paging reference
2873 */
2874 vm_object_paging_end(object);
2875}
2876
2877/*
2878 * Routine: vm_object_remove
2879 * Purpose:
2880 * Eliminate the pager/object association
2881 * for this pager.
2882 * Conditions:
2883 * The object cache must be locked.
2884 */
0b4e3aa0 2885__private_extern__ void
1c79356b
A
2886vm_object_remove(
2887 vm_object_t object)
2888{
0b4e3aa0
A
2889 memory_object_t pager;
2890 pager_request_t pager_request;
1c79356b 2891
0b4e3aa0 2892 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
1c79356b
A
2893 vm_object_hash_entry_t entry;
2894
0b4e3aa0 2895 entry = vm_object_hash_lookup(pager, FALSE);
1c79356b
A
2896 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2897 entry->object = VM_OBJECT_NULL;
2898 }
2899
1c79356b
A
2900}
2901
2902/*
2903 * Global variables for vm_object_collapse():
2904 *
2905 * Counts for normal collapses and bypasses.
2906 * Debugging variables, to watch or disable collapse.
2907 */
0b4e3aa0
A
2908static long object_collapses = 0;
2909static long object_bypasses = 0;
1c79356b 2910
0b4e3aa0
A
2911static boolean_t vm_object_collapse_allowed = TRUE;
2912static boolean_t vm_object_bypass_allowed = TRUE;
2913
2914static int vm_external_discarded;
2915static int vm_external_collapsed;
1c79356b 2916
1c79356b 2917/*
0b4e3aa0
A
2918 * Routine: vm_object_do_collapse
2919 * Purpose:
2920 * Collapse an object with the object backing it.
2921 * Pages in the backing object are moved into the
2922 * parent, and the backing object is deallocated.
2923 * Conditions:
2924 * Both objects and the cache are locked; the page
2925 * queues are unlocked.
1c79356b
A
2926 *
2927 */
0b4e3aa0 2928static void
1c79356b
A
2929vm_object_do_collapse(
2930 vm_object_t object,
2931 vm_object_t backing_object)
2932{
2933 vm_page_t p, pp;
2934 vm_object_offset_t new_offset, backing_offset;
2935 vm_object_size_t size;
2936
2937 backing_offset = object->shadow_offset;
2938 size = object->size;
2939
1c79356b
A
2940 /*
2941 * Move all in-memory pages from backing_object
2942 * to the parent. Pages that have been paged out
2943 * will be overwritten by any of the parent's
2944 * pages that shadow them.
2945 */
2946
2947 while (!queue_empty(&backing_object->memq)) {
2948
2949 p = (vm_page_t) queue_first(&backing_object->memq);
2950
2951 new_offset = (p->offset - backing_offset);
2952
2953 assert(!p->busy || p->absent);
2954
2955 /*
2956 * If the parent has a page here, or if
2957 * this page falls outside the parent,
2958 * dispose of it.
2959 *
2960 * Otherwise, move it as planned.
2961 */
2962
2963 if (p->offset < backing_offset || new_offset >= size) {
2964 VM_PAGE_FREE(p);
2965 } else {
2966 pp = vm_page_lookup(object, new_offset);
2967 if (pp == VM_PAGE_NULL) {
2968
2969 /*
2970 * Parent now has no page.
2971 * Move the backing object's page up.
2972 */
2973
2974 vm_page_rename(p, object, new_offset);
2975#if MACH_PAGEMAP
2976 } else if (pp->absent) {
2977
2978 /*
2979 * Parent has an absent page...
2980 * it's not being paged in, so
2981 * it must really be missing from
2982 * the parent.
2983 *
2984 * Throw out the absent page...
2985 * any faults looking for that
2986 * page will restart with the new
2987 * one.
2988 */
2989
2990 VM_PAGE_FREE(pp);
2991 vm_page_rename(p, object, new_offset);
2992#endif /* MACH_PAGEMAP */
2993 } else {
2994 assert(! pp->absent);
2995
2996 /*
2997 * Parent object has a real page.
2998 * Throw away the backing object's
2999 * page.
3000 */
3001 VM_PAGE_FREE(p);
3002 }
3003 }
3004 }
3005
55e303ae
A
3006#if !MACH_PAGEMAP
3007 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL
3008 || (!backing_object->pager_created
3009 && backing_object->pager == MEMORY_OBJECT_NULL));
3010#else
3011 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3012#endif /* !MACH_PAGEMAP */
1c79356b 3013
0b4e3aa0 3014 if (backing_object->pager != MEMORY_OBJECT_NULL) {
1c79356b
A
3015 vm_object_hash_entry_t entry;
3016
3017 /*
3018 * Move the pager from backing_object to object.
3019 *
3020 * XXX We're only using part of the paging space
3021 * for keeps now... we ought to discard the
3022 * unused portion.
3023 */
3024
55e303ae 3025 assert(!object->paging_in_progress);
1c79356b
A
3026 object->pager = backing_object->pager;
3027 entry = vm_object_hash_lookup(object->pager, FALSE);
3028 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3029 entry->object = object;
3030 object->pager_created = backing_object->pager_created;
3031 object->pager_request = backing_object->pager_request;
3032 object->pager_ready = backing_object->pager_ready;
3033 object->pager_initialized = backing_object->pager_initialized;
3034 object->cluster_size = backing_object->cluster_size;
3035 object->paging_offset =
3036 backing_object->paging_offset + backing_offset;
0b4e3aa0
A
3037 if (object->pager_request != PAGER_REQUEST_NULL) {
3038 memory_object_control_collapse(object->pager_request,
3039 object);
1c79356b
A
3040 }
3041 }
3042
3043 vm_object_cache_unlock();
3044
1c79356b
A
3045#if MACH_PAGEMAP
3046 /*
3047 * If the shadow offset is 0, the use the existence map from
3048 * the backing object if there is one. If the shadow offset is
3049 * not zero, toss it.
3050 *
3051 * XXX - If the shadow offset is not 0 then a bit copy is needed
3052 * if the map is to be salvaged. For now, we just just toss the
3053 * old map, giving the collapsed object no map. This means that
3054 * the pager is invoked for zero fill pages. If analysis shows
3055 * that this happens frequently and is a performance hit, then
3056 * this code should be fixed to salvage the map.
3057 */
3058 assert(object->existence_map == VM_EXTERNAL_NULL);
3059 if (backing_offset || (size != backing_object->size)) {
3060 vm_external_discarded++;
3061 vm_external_destroy(backing_object->existence_map,
3062 backing_object->size);
3063 }
3064 else {
3065 vm_external_collapsed++;
3066 object->existence_map = backing_object->existence_map;
3067 }
3068 backing_object->existence_map = VM_EXTERNAL_NULL;
3069#endif /* MACH_PAGEMAP */
3070
3071 /*
3072 * Object now shadows whatever backing_object did.
3073 * Note that the reference to backing_object->shadow
3074 * moves from within backing_object to within object.
3075 */
3076
3077 object->shadow = backing_object->shadow;
3078 object->shadow_offset += backing_object->shadow_offset;
3079 assert((object->shadow == VM_OBJECT_NULL) ||
55e303ae 3080 (object->shadow->copy != backing_object));
1c79356b
A
3081
3082 /*
3083 * Discard backing_object.
3084 *
3085 * Since the backing object has no pages, no
3086 * pager left, and no object references within it,
3087 * all that is necessary is to dispose of it.
3088 */
3089
3090 assert((backing_object->ref_count == 1) &&
3091 (backing_object->resident_page_count == 0) &&
3092 (backing_object->paging_in_progress == 0));
3093
1c79356b
A
3094 backing_object->alive = FALSE;
3095 vm_object_unlock(backing_object);
3096
3097 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3098 (integer_t)backing_object, 0,0,0,0);
3099
3100 zfree(vm_object_zone, (vm_offset_t) backing_object);
3101
3102 object_collapses++;
3103}
3104
0b4e3aa0 3105static void
1c79356b
A
3106vm_object_do_bypass(
3107 vm_object_t object,
3108 vm_object_t backing_object)
3109{
3110 /*
3111 * Make the parent shadow the next object
3112 * in the chain.
3113 */
3114
3115#if TASK_SWAPPER
3116 /*
3117 * Do object reference in-line to
3118 * conditionally increment shadow's
3119 * residence count. If object is not
3120 * resident, leave residence count
3121 * on shadow alone.
3122 */
3123 if (backing_object->shadow != VM_OBJECT_NULL) {
3124 vm_object_lock(backing_object->shadow);
3125 backing_object->shadow->ref_count++;
3126 if (object->res_count != 0)
3127 vm_object_res_reference(backing_object->shadow);
3128 vm_object_unlock(backing_object->shadow);
3129 }
3130#else /* TASK_SWAPPER */
3131 vm_object_reference(backing_object->shadow);
3132#endif /* TASK_SWAPPER */
3133
3134 object->shadow = backing_object->shadow;
3135 object->shadow_offset += backing_object->shadow_offset;
3136
3137 /*
3138 * Backing object might have had a copy pointer
3139 * to us. If it did, clear it.
3140 */
3141 if (backing_object->copy == object) {
3142 backing_object->copy = VM_OBJECT_NULL;
3143 }
3144
3145 /*
3146 * Drop the reference count on backing_object.
3147#if TASK_SWAPPER
3148 * Since its ref_count was at least 2, it
3149 * will not vanish; so we don't need to call
3150 * vm_object_deallocate.
3151 * [FBDP: that doesn't seem to be true any more]
3152 *
3153 * The res_count on the backing object is
3154 * conditionally decremented. It's possible
3155 * (via vm_pageout_scan) to get here with
3156 * a "swapped" object, which has a 0 res_count,
3157 * in which case, the backing object res_count
3158 * is already down by one.
3159#else
3160 * Don't call vm_object_deallocate unless
3161 * ref_count drops to zero.
3162 *
3163 * The ref_count can drop to zero here if the
3164 * backing object could be bypassed but not
3165 * collapsed, such as when the backing object
3166 * is temporary and cachable.
3167#endif
3168 */
3169 if (backing_object->ref_count > 1) {
3170 backing_object->ref_count--;
3171#if TASK_SWAPPER
3172 if (object->res_count != 0)
3173 vm_object_res_deallocate(backing_object);
3174 assert(backing_object->ref_count > 0);
3175#endif /* TASK_SWAPPER */
3176 vm_object_unlock(backing_object);
3177 } else {
3178
3179 /*
3180 * Drop locks so that we can deallocate
3181 * the backing object.
3182 */
3183
3184#if TASK_SWAPPER
3185 if (object->res_count == 0) {
3186 /* XXX get a reference for the deallocate below */
3187 vm_object_res_reference(backing_object);
3188 }
3189#endif /* TASK_SWAPPER */
3190 vm_object_unlock(object);
3191 vm_object_unlock(backing_object);
3192 vm_object_deallocate(backing_object);
3193
3194 /*
3195 * Relock object. We don't have to reverify
3196 * its state since vm_object_collapse will
3197 * do that for us as it starts at the
3198 * top of its loop.
3199 */
3200
3201 vm_object_lock(object);
3202 }
3203
3204 object_bypasses++;
3205}
0b4e3aa0 3206
1c79356b
A
3207
3208/*
3209 * vm_object_collapse:
3210 *
3211 * Perform an object collapse or an object bypass if appropriate.
3212 * The real work of collapsing and bypassing is performed in
3213 * the routines vm_object_do_collapse and vm_object_do_bypass.
3214 *
3215 * Requires that the object be locked and the page queues be unlocked.
3216 *
3217 */
0b4e3aa0 3218__private_extern__ void
1c79356b 3219vm_object_collapse(
55e303ae
A
3220 register vm_object_t object,
3221 register vm_object_offset_t hint_offset)
1c79356b
A
3222{
3223 register vm_object_t backing_object;
55e303ae
A
3224 register unsigned int rcount;
3225 register unsigned int size;
0b4e3aa0 3226
1c79356b
A
3227 if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) {
3228 return;
3229 }
3230
3231 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3232 (integer_t)object, 0,0,0,0);
3233
3234 while (TRUE) {
3235 /*
3236 * Verify that the conditions are right for either
3237 * collapse or bypass:
3238 *
3239 * The object exists and no pages in it are currently
3240 * being paged out, and
3241 */
3242 if (object == VM_OBJECT_NULL ||
3243 object->paging_in_progress != 0 ||
3244 object->absent_count != 0)
3245 return;
3246
3247 /*
3248 * There is a backing object, and
3249 */
3250
3251 if ((backing_object = object->shadow) == VM_OBJECT_NULL)
3252 return;
3253
3254 vm_object_lock(backing_object);
3255
3256 /*
3257 * ...
3258 * The backing object is not read_only,
3259 * and no pages in the backing object are
3260 * currently being paged out.
3261 * The backing object is internal.
3262 *
3263 */
3264
3265 if (!backing_object->internal ||
3266 backing_object->paging_in_progress != 0) {
3267 vm_object_unlock(backing_object);
3268 return;
3269 }
3270
3271 /*
3272 * The backing object can't be a copy-object:
3273 * the shadow_offset for the copy-object must stay
3274 * as 0. Furthermore (for the 'we have all the
3275 * pages' case), if we bypass backing_object and
3276 * just shadow the next object in the chain, old
3277 * pages from that object would then have to be copied
3278 * BOTH into the (former) backing_object and into the
3279 * parent object.
3280 */
3281 if (backing_object->shadow != VM_OBJECT_NULL &&
55e303ae 3282 backing_object->shadow->copy == backing_object) {
1c79356b
A
3283 vm_object_unlock(backing_object);
3284 return;
3285 }
3286
3287 /*
3288 * We can now try to either collapse the backing
3289 * object (if the parent is the only reference to
3290 * it) or (perhaps) remove the parent's reference
3291 * to it.
1c79356b 3292 *
0b4e3aa0
A
3293 * If there is exactly one reference to the backing
3294 * object, we may be able to collapse it into the
3295 * parent.
1c79356b 3296 *
55e303ae
A
3297 * If MACH_PAGEMAP is defined:
3298 * The parent must not have a pager created for it,
3299 * since collapsing a backing_object dumps new pages
3300 * into the parent that its pager doesn't know about
3301 * (and the collapse code can't merge the existence
3302 * maps).
3303 * Otherwise:
3304 * As long as one of the objects is still not known
3305 * to the pager, we can collapse them.
1c79356b 3306 */
1c79356b 3307 if (backing_object->ref_count == 1 &&
55e303ae
A
3308 (!object->pager_created
3309#if !MACH_PAGEMAP
3310 || !backing_object->pager_created
3311#endif /*!MACH_PAGEMAP */
3312 ) && vm_object_collapse_allowed) {
1c79356b
A
3313
3314 XPR(XPR_VM_OBJECT,
3315 "vm_object_collapse: %x to %x, pager %x, pager_request %x\n",
3316 (integer_t)backing_object, (integer_t)object,
3317 (integer_t)backing_object->pager,
3318 (integer_t)backing_object->pager_request, 0);
3319
3320 /*
3321 * We need the cache lock for collapsing,
3322 * but we must not deadlock.
3323 */
3324
3325 if (! vm_object_cache_lock_try()) {
3326 vm_object_unlock(backing_object);
3327 return;
3328 }
3329
3330 /*
3331 * Collapse the object with its backing
3332 * object, and try again with the object's
3333 * new backing object.
3334 */
3335
3336 vm_object_do_collapse(object, backing_object);
3337 continue;
3338 }
3339
3340
3341 /*
3342 * Collapsing the backing object was not possible
3343 * or permitted, so let's try bypassing it.
3344 */
3345
3346 if (! vm_object_bypass_allowed) {
3347 vm_object_unlock(backing_object);
3348 return;
3349 }
3350
0b4e3aa0 3351
1c79356b 3352 /*
55e303ae
A
3353 * If the object doesn't have all its pages present,
3354 * we have to make sure no pages in the backing object
3355 * "show through" before bypassing it.
1c79356b 3356 */
55e303ae
A
3357 size = atop(object->size);
3358 rcount = object->resident_page_count;
3359 if (rcount != size) {
3360 vm_object_size_t size;
3361 vm_object_offset_t offset;
3362 vm_object_offset_t backing_offset;
3363 unsigned int backing_rcount;
3364 unsigned int lookups = 0;
3365
3366 /*
3367 * If the backing object has a pager but no pagemap,
3368 * then we cannot bypass it, because we don't know
3369 * what pages it has.
3370 */
3371 if (backing_object->pager_created
1c79356b 3372#if MACH_PAGEMAP
55e303ae 3373 && (backing_object->existence_map == VM_EXTERNAL_NULL)
1c79356b 3374#endif /* MACH_PAGEMAP */
55e303ae
A
3375 ) {
3376 vm_object_unlock(backing_object);
3377 return;
3378 }
1c79356b 3379
55e303ae
A
3380 /*
3381 * If the object has a pager but no pagemap,
3382 * then we cannot bypass it, because we don't know
3383 * what pages it has.
3384 */
3385 if (object->pager_created
0b4e3aa0 3386#if MACH_PAGEMAP
55e303ae 3387 && (object->existence_map == VM_EXTERNAL_NULL)
0b4e3aa0 3388#endif /* MACH_PAGEMAP */
55e303ae
A
3389 ) {
3390 vm_object_unlock(backing_object);
3391 return;
3392 }
0b4e3aa0 3393
55e303ae
A
3394 /*
3395 * If all of the pages in the backing object are
3396 * shadowed by the parent object, the parent
3397 * object no longer has to shadow the backing
3398 * object; it can shadow the next one in the
3399 * chain.
3400 *
3401 * If the backing object has existence info,
3402 * we must check examine its existence info
3403 * as well.
3404 *
3405 */
1c79356b 3406
55e303ae
A
3407 backing_offset = object->shadow_offset;
3408 backing_rcount = backing_object->resident_page_count;
1c79356b 3409
55e303ae
A
3410#define EXISTS_IN_OBJECT(obj, off, rc) \
3411 (vm_external_state_get((obj)->existence_map, \
3412 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3413 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3414
3415 /*
3416 * Check the hint location first
3417 * (since it is often the quickest way out of here).
3418 */
3419 if (object->cow_hint != ~(vm_offset_t)0)
3420 hint_offset = (vm_object_offset_t)object->cow_hint;
3421 else
3422 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
3423 (hint_offset - 8 * PAGE_SIZE_64) : 0;
3424
3425 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
3426 backing_offset, backing_rcount) &&
3427 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
3428 /* dependency right at the hint */
3429 object->cow_hint = (vm_offset_t)hint_offset;
0b4e3aa0
A
3430 vm_object_unlock(backing_object);
3431 return;
0b4e3aa0 3432 }
55e303ae
A
3433
3434 /*
3435 * If the object's window onto the backing_object
3436 * is large compared to the number of resident
3437 * pages in the backing object, it makes sense to
3438 * walk the backing_object's resident pages first.
3439 *
3440 * NOTE: Pages may be in both the existence map and
3441 * resident. So, we can't permanently decrement
3442 * the rcount here because the second loop may
3443 * find the same pages in the backing object'
3444 * existence map that we found here and we would
3445 * double-decrement the rcount. We also may or
3446 * may not have found the
3447 */
3448 if (backing_rcount && size >
3449 ((backing_object->existence_map) ?
3450 backing_rcount : (backing_rcount >> 1))) {
3451 unsigned int rc = rcount;
3452 vm_page_t p;
3453
3454 backing_rcount = backing_object->resident_page_count;
3455 p = (vm_page_t)queue_first(&backing_object->memq);
3456 do {
3457 /* Until we get more than one lookup lock */
3458 if (lookups > 256) {
3459 lookups = 0;
3460 delay(1);
3461 }
3462
3463 offset = (p->offset - backing_offset);
3464 if (offset < object->size &&
3465 offset != hint_offset &&
3466 !EXISTS_IN_OBJECT(object, offset, rc)) {
3467 /* found a dependency */
3468 object->cow_hint = (vm_offset_t)offset;
3469 vm_object_unlock(backing_object);
3470 return;
3471 }
3472 p = queue_next(p);
3473
3474 } while (--backing_rcount);
0b4e3aa0 3475 }
55e303ae
A
3476
3477 /*
3478 * Walk through the offsets looking for pages in the
3479 * backing object that show through to the object.
3480 */
3481 if (backing_rcount || backing_object->existence_map) {
3482 offset = hint_offset;
3483
3484 while((offset =
3485 (offset + PAGE_SIZE_64 < object->size) ?
3486 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
3487
3488 /* Until we get more than one lookup lock */
3489 if (lookups > 256) {
3490 lookups = 0;
3491 delay(1);
3492 }
3493
3494 if (EXISTS_IN_OBJECT(backing_object, offset +
3495 backing_offset, backing_rcount) &&
3496 !EXISTS_IN_OBJECT(object, offset, rcount)) {
3497 /* found a dependency */
3498 object->cow_hint = (vm_offset_t)offset;
3499 vm_object_unlock(backing_object);
3500 return;
3501 }
3502 }
0b4e3aa0
A
3503 }
3504 }
1c79356b 3505
55e303ae
A
3506 /* reset the offset hint for any objects deeper in the chain */
3507 object->cow_hint = (vm_offset_t)0;
1c79356b
A
3508
3509 /*
3510 * All interesting pages in the backing object
3511 * already live in the parent or its pager.
3512 * Thus we can bypass the backing object.
3513 */
3514
3515 vm_object_do_bypass(object, backing_object);
3516
3517 /*
3518 * Try again with this object's new backing object.
3519 */
3520
3521 continue;
3522 }
3523}
3524
3525/*
3526 * Routine: vm_object_page_remove: [internal]
3527 * Purpose:
3528 * Removes all physical pages in the specified
3529 * object range from the object's list of pages.
3530 *
3531 * In/out conditions:
3532 * The object must be locked.
3533 * The object must not have paging_in_progress, usually
3534 * guaranteed by not having a pager.
3535 */
3536unsigned int vm_object_page_remove_lookup = 0;
3537unsigned int vm_object_page_remove_iterate = 0;
3538
0b4e3aa0 3539__private_extern__ void
1c79356b
A
3540vm_object_page_remove(
3541 register vm_object_t object,
3542 register vm_object_offset_t start,
3543 register vm_object_offset_t end)
3544{
3545 register vm_page_t p, next;
3546
3547 /*
3548 * One and two page removals are most popular.
3549 * The factor of 16 here is somewhat arbitrary.
3550 * It balances vm_object_lookup vs iteration.
3551 */
3552
55e303ae 3553 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
1c79356b
A
3554 vm_object_page_remove_lookup++;
3555
3556 for (; start < end; start += PAGE_SIZE_64) {
3557 p = vm_page_lookup(object, start);
3558 if (p != VM_PAGE_NULL) {
3559 assert(!p->cleaning && !p->pageout);
3560 if (!p->fictitious)
55e303ae 3561 pmap_page_protect(p->phys_page,
1c79356b
A
3562 VM_PROT_NONE);
3563 VM_PAGE_FREE(p);
3564 }
3565 }
3566 } else {
3567 vm_object_page_remove_iterate++;
3568
3569 p = (vm_page_t) queue_first(&object->memq);
3570 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3571 next = (vm_page_t) queue_next(&p->listq);
3572 if ((start <= p->offset) && (p->offset < end)) {
3573 assert(!p->cleaning && !p->pageout);
3574 if (!p->fictitious)
55e303ae 3575 pmap_page_protect(p->phys_page,
1c79356b
A
3576 VM_PROT_NONE);
3577 VM_PAGE_FREE(p);
3578 }
3579 p = next;
3580 }
3581 }
3582}
3583
0b4e3aa0 3584
1c79356b
A
3585/*
3586 * Routine: vm_object_coalesce
3587 * Function: Coalesces two objects backing up adjoining
3588 * regions of memory into a single object.
3589 *
3590 * returns TRUE if objects were combined.
3591 *
3592 * NOTE: Only works at the moment if the second object is NULL -
3593 * if it's not, which object do we lock first?
3594 *
3595 * Parameters:
3596 * prev_object First object to coalesce
3597 * prev_offset Offset into prev_object
3598 * next_object Second object into coalesce
3599 * next_offset Offset into next_object
3600 *
3601 * prev_size Size of reference to prev_object
3602 * next_size Size of reference to next_object
3603 *
3604 * Conditions:
3605 * The object(s) must *not* be locked. The map must be locked
3606 * to preserve the reference to the object(s).
3607 */
0b4e3aa0 3608static int vm_object_coalesce_count = 0;
1c79356b 3609
0b4e3aa0 3610__private_extern__ boolean_t
1c79356b
A
3611vm_object_coalesce(
3612 register vm_object_t prev_object,
3613 vm_object_t next_object,
3614 vm_object_offset_t prev_offset,
3615 vm_object_offset_t next_offset,
3616 vm_object_size_t prev_size,
3617 vm_object_size_t next_size)
3618{
3619 vm_object_size_t newsize;
3620
3621#ifdef lint
3622 next_offset++;
3623#endif /* lint */
3624
3625 if (next_object != VM_OBJECT_NULL) {
3626 return(FALSE);
3627 }
3628
3629 if (prev_object == VM_OBJECT_NULL) {
3630 return(TRUE);
3631 }
3632
3633 XPR(XPR_VM_OBJECT,
3634 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3635 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3636
3637 vm_object_lock(prev_object);
3638
3639 /*
3640 * Try to collapse the object first
3641 */
55e303ae 3642 vm_object_collapse(prev_object, prev_offset);
1c79356b
A
3643
3644 /*
3645 * Can't coalesce if pages not mapped to
3646 * prev_entry may be in use any way:
3647 * . more than one reference
3648 * . paged out
3649 * . shadows another object
3650 * . has a copy elsewhere
3651 * . paging references (pages might be in page-list)
3652 */
3653
3654 if ((prev_object->ref_count > 1) ||
3655 prev_object->pager_created ||
3656 (prev_object->shadow != VM_OBJECT_NULL) ||
3657 (prev_object->copy != VM_OBJECT_NULL) ||
3658 (prev_object->true_share != FALSE) ||
3659 (prev_object->paging_in_progress != 0)) {
3660 vm_object_unlock(prev_object);
3661 return(FALSE);
3662 }
3663
3664 vm_object_coalesce_count++;
3665
3666 /*
3667 * Remove any pages that may still be in the object from
3668 * a previous deallocation.
3669 */
3670 vm_object_page_remove(prev_object,
3671 prev_offset + prev_size,
3672 prev_offset + prev_size + next_size);
3673
3674 /*
3675 * Extend the object if necessary.
3676 */
3677 newsize = prev_offset + prev_size + next_size;
3678 if (newsize > prev_object->size) {
3679#if MACH_PAGEMAP
3680 /*
3681 * We cannot extend an object that has existence info,
3682 * since the existence info might then fail to cover
3683 * the entire object.
3684 *
3685 * This assertion must be true because the object
3686 * has no pager, and we only create existence info
3687 * for objects with pagers.
3688 */
3689 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3690#endif /* MACH_PAGEMAP */
3691 prev_object->size = newsize;
3692 }
3693
3694 vm_object_unlock(prev_object);
3695 return(TRUE);
3696}
3697
3698/*
3699 * Attach a set of physical pages to an object, so that they can
3700 * be mapped by mapping the object. Typically used to map IO memory.
3701 *
3702 * The mapping function and its private data are used to obtain the
3703 * physical addresses for each page to be mapped.
3704 */
3705void
3706vm_object_page_map(
3707 vm_object_t object,
3708 vm_object_offset_t offset,
3709 vm_object_size_t size,
3710 vm_object_offset_t (*map_fn)(void *map_fn_data,
3711 vm_object_offset_t offset),
3712 void *map_fn_data) /* private to map_fn */
3713{
3714 int num_pages;
3715 int i;
3716 vm_page_t m;
3717 vm_page_t old_page;
3718 vm_object_offset_t addr;
3719
55e303ae 3720 num_pages = atop_64(size);
1c79356b
A
3721
3722 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3723
3724 addr = (*map_fn)(map_fn_data, offset);
3725
3726 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3727 vm_page_more_fictitious();
3728
3729 vm_object_lock(object);
3730 if ((old_page = vm_page_lookup(object, offset))
3731 != VM_PAGE_NULL)
3732 {
3733 vm_page_lock_queues();
3734 vm_page_free(old_page);
3735 vm_page_unlock_queues();
3736 }
3737
3738 vm_page_init(m, addr);
0b4e3aa0
A
3739 /* private normally requires lock_queues but since we */
3740 /* are initializing the page, its not necessary here */
1c79356b
A
3741 m->private = TRUE; /* don`t free page */
3742 m->wire_count = 1;
3743 vm_page_insert(m, object, offset);
3744
3745 PAGE_WAKEUP_DONE(m);
3746 vm_object_unlock(object);
3747 }
3748}
3749
3750#include <mach_kdb.h>
3751
3752#if MACH_KDB
3753#include <ddb/db_output.h>
3754#include <vm/vm_print.h>
3755
3756#define printf kdbprintf
3757
3758extern boolean_t vm_object_cached(
3759 vm_object_t object);
3760
3761extern void print_bitstring(
3762 char byte);
3763
3764boolean_t vm_object_print_pages = FALSE;
3765
3766void
3767print_bitstring(
3768 char byte)
3769{
3770 printf("%c%c%c%c%c%c%c%c",
3771 ((byte & (1 << 0)) ? '1' : '0'),
3772 ((byte & (1 << 1)) ? '1' : '0'),
3773 ((byte & (1 << 2)) ? '1' : '0'),
3774 ((byte & (1 << 3)) ? '1' : '0'),
3775 ((byte & (1 << 4)) ? '1' : '0'),
3776 ((byte & (1 << 5)) ? '1' : '0'),
3777 ((byte & (1 << 6)) ? '1' : '0'),
3778 ((byte & (1 << 7)) ? '1' : '0'));
3779}
3780
3781boolean_t
3782vm_object_cached(
3783 register vm_object_t object)
3784{
3785 register vm_object_t o;
3786
3787 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3788 if (object == o) {
3789 return TRUE;
3790 }
3791 }
3792 return FALSE;
3793}
3794
3795#if MACH_PAGEMAP
3796/*
3797 * vm_external_print: [ debug ]
3798 */
3799void
3800vm_external_print(
3801 vm_external_map_t map,
3802 vm_size_t size)
3803{
3804 if (map == VM_EXTERNAL_NULL) {
3805 printf("0 ");
3806 } else {
3807 vm_size_t existence_size = stob(size);
3808 printf("{ size=%d, map=[", existence_size);
3809 if (existence_size > 0) {
3810 print_bitstring(map[0]);
3811 }
3812 if (existence_size > 1) {
3813 print_bitstring(map[1]);
3814 }
3815 if (existence_size > 2) {
3816 printf("...");
3817 print_bitstring(map[existence_size-1]);
3818 }
3819 printf("] }\n");
3820 }
3821 return;
3822}
3823#endif /* MACH_PAGEMAP */
3824
3825int
3826vm_follow_object(
3827 vm_object_t object)
3828{
3829 extern db_indent;
3830
0b4e3aa0
A
3831 int count = 0;
3832 int orig_db_indent = db_indent;
1c79356b 3833
0b4e3aa0
A
3834 while (TRUE) {
3835 if (object == VM_OBJECT_NULL) {
3836 db_indent = orig_db_indent;
3837 return count;
3838 }
1c79356b 3839
0b4e3aa0 3840 count += 1;
1c79356b 3841
0b4e3aa0
A
3842 iprintf("object 0x%x", object);
3843 printf(", shadow=0x%x", object->shadow);
3844 printf(", copy=0x%x", object->copy);
3845 printf(", pager=0x%x", object->pager);
3846 printf(", ref=%d\n", object->ref_count);
3847
3848 db_indent += 2;
3849 object = object->shadow;
3850 }
1c79356b 3851
1c79356b
A
3852}
3853
3854/*
3855 * vm_object_print: [ debug ]
3856 */
3857void
3858vm_object_print(
3859 vm_object_t object,
3860 boolean_t have_addr,
3861 int arg_count,
3862 char *modif)
3863{
3864 register vm_page_t p;
3865 extern db_indent;
3866 char *s;
3867
3868 register int count;
3869
3870 if (object == VM_OBJECT_NULL)
3871 return;
3872
3873 iprintf("object 0x%x\n", object);
3874
3875 db_indent += 2;
3876
3877 iprintf("size=0x%x", object->size);
3878 printf(", cluster=0x%x", object->cluster_size);
3879 printf(", frozen=0x%x", object->frozen_size);
3880 printf(", ref_count=%d\n", object->ref_count);
3881 iprintf("");
3882#if TASK_SWAPPER
3883 printf("res_count=%d, ", object->res_count);
3884#endif /* TASK_SWAPPER */
3885 printf("resident_page_count=%d\n", object->resident_page_count);
3886
3887 iprintf("shadow=0x%x", object->shadow);
3888 if (object->shadow) {
3889 register int i = 0;
3890 vm_object_t shadow = object;
3891 while(shadow = shadow->shadow)
3892 i++;
3893 printf(" (depth %d)", i);
3894 }
3895 printf(", copy=0x%x", object->copy);
3896 printf(", shadow_offset=0x%x", object->shadow_offset);
3897 printf(", last_alloc=0x%x\n", object->last_alloc);
3898
3899 iprintf("pager=0x%x", object->pager);
3900 printf(", paging_offset=0x%x", object->paging_offset);
3901 printf(", pager_request=0x%x\n", object->pager_request);
3902
3903 iprintf("copy_strategy=%d[", object->copy_strategy);
3904 switch (object->copy_strategy) {
3905 case MEMORY_OBJECT_COPY_NONE:
3906 printf("copy_none");
3907 break;
3908
3909 case MEMORY_OBJECT_COPY_CALL:
3910 printf("copy_call");
3911 break;
3912
3913 case MEMORY_OBJECT_COPY_DELAY:
3914 printf("copy_delay");
3915 break;
3916
3917 case MEMORY_OBJECT_COPY_SYMMETRIC:
3918 printf("copy_symmetric");
3919 break;
3920
3921 case MEMORY_OBJECT_COPY_INVALID:
3922 printf("copy_invalid");
3923 break;
3924
3925 default:
3926 printf("?");
3927 }
3928 printf("]");
3929 printf(", absent_count=%d\n", object->absent_count);
3930
3931 iprintf("all_wanted=0x%x<", object->all_wanted);
3932 s = "";
3933 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
3934 printf("%sinit", s);
3935 s = ",";
3936 }
3937 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
3938 printf("%sready", s);
3939 s = ",";
3940 }
3941 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
3942 printf("%spaging", s);
3943 s = ",";
3944 }
3945 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
3946 printf("%sabsent", s);
3947 s = ",";
3948 }
3949 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
3950 printf("%slock", s);
3951 s = ",";
3952 }
3953 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
3954 printf("%suncaching", s);
3955 s = ",";
3956 }
3957 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
3958 printf("%scopy_call", s);
3959 s = ",";
3960 }
3961 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
3962 printf("%scaching", s);
3963 s = ",";
3964 }
3965 printf(">");
3966 printf(", paging_in_progress=%d\n", object->paging_in_progress);
3967
3968 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
3969 (object->pager_created ? "" : "!"),
3970 (object->pager_initialized ? "" : "!"),
3971 (object->pager_ready ? "" : "!"),
3972 (object->can_persist ? "" : "!"),
3973 (object->pager_trusted ? "" : "!"),
3974 (object->pageout ? "" : "!"),
3975 (object->internal ? "internal" : "external"),
3976 (object->temporary ? "temporary" : "permanent"));
3977 iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n",
3978 (object->alive ? "" : "!"),
3979 (object->lock_in_progress ? "" : "!"),
3980 (object->lock_restart ? "" : "!"),
3981 (object->shadowed ? "" : "!"),
3982 (vm_object_cached(object) ? "" : "!"),
3983 (object->private ? "" : "!"));
3984 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
3985 (object->advisory_pageout ? "" : "!"),
3986 (object->silent_overwrite ? "" : "!"));
3987
3988#if MACH_PAGEMAP
3989 iprintf("existence_map=");
3990 vm_external_print(object->existence_map, object->size);
3991#endif /* MACH_PAGEMAP */
3992#if MACH_ASSERT
3993 iprintf("paging_object=0x%x\n", object->paging_object);
3994#endif /* MACH_ASSERT */
3995
3996 if (vm_object_print_pages) {
3997 count = 0;
3998 p = (vm_page_t) queue_first(&object->memq);
3999 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4000 if (count == 0) {
4001 iprintf("memory:=");
4002 } else if (count == 2) {
4003 printf("\n");
4004 iprintf(" ...");
4005 count = 0;
4006 } else {
4007 printf(",");
4008 }
4009 count++;
4010
4011 printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p);
4012 p = (vm_page_t) queue_next(&p->listq);
4013 }
4014 if (count != 0) {
4015 printf("\n");
4016 }
4017 }
4018 db_indent -= 2;
4019}
4020
4021
4022/*
4023 * vm_object_find [ debug ]
4024 *
4025 * Find all tasks which reference the given vm_object.
4026 */
4027
4028boolean_t vm_object_find(vm_object_t object);
4029boolean_t vm_object_print_verbose = FALSE;
4030
4031boolean_t
4032vm_object_find(
4033 vm_object_t object)
4034{
4035 task_t task;
4036 vm_map_t map;
4037 vm_map_entry_t entry;
4038 processor_set_t pset = &default_pset;
4039 boolean_t found = FALSE;
4040
4041 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
4042 map = task->map;
4043 for (entry = vm_map_first_entry(map);
4044 entry && entry != vm_map_to_entry(map);
4045 entry = entry->vme_next) {
4046
4047 vm_object_t obj;
4048
4049 /*
4050 * For the time being skip submaps,
4051 * only the kernel can have submaps,
4052 * and unless we are interested in
4053 * kernel objects, we can simply skip
4054 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4055 * for a full solution.
4056 */
4057 if (entry->is_sub_map)
4058 continue;
4059 if (entry)
4060 obj = entry->object.vm_object;
4061 else
4062 continue;
4063
4064 while (obj != VM_OBJECT_NULL) {
4065 if (obj == object) {
4066 if (!found) {
4067 printf("TASK\t\tMAP\t\tENTRY\n");
4068 found = TRUE;
4069 }
4070 printf("0x%x\t0x%x\t0x%x\n",
4071 task, map, entry);
4072 }
4073 obj = obj->shadow;
4074 }
4075 }
4076 }
4077
4078 return(found);
4079}
4080
4081#endif /* MACH_KDB */
4082
0b4e3aa0
A
4083kern_return_t
4084vm_object_populate_with_private(
55e303ae 4085 vm_object_t object,
0b4e3aa0 4086 vm_object_offset_t offset,
55e303ae
A
4087 ppnum_t phys_page,
4088 vm_size_t size)
0b4e3aa0 4089{
55e303ae 4090 ppnum_t base_page;
0b4e3aa0
A
4091 vm_object_offset_t base_offset;
4092
4093
4094 if(!object->private)
4095 return KERN_FAILURE;
4096
55e303ae 4097 base_page = phys_page;
0b4e3aa0
A
4098
4099 vm_object_lock(object);
4100 if(!object->phys_contiguous) {
4101 vm_page_t m;
55e303ae 4102 if((base_offset = trunc_page_64(offset)) != offset) {
0b4e3aa0
A
4103 vm_object_unlock(object);
4104 return KERN_FAILURE;
4105 }
4106 base_offset += object->paging_offset;
4107 while(size) {
4108 m = vm_page_lookup(object, base_offset);
4109 if(m != VM_PAGE_NULL) {
4110 if(m->fictitious) {
4111 vm_page_lock_queues();
4112 m->fictitious = FALSE;
4113 m->private = TRUE;
55e303ae 4114 m->phys_page = base_page;
0b4e3aa0
A
4115 if(!m->busy) {
4116 m->busy = TRUE;
4117 }
4118 if(!m->absent) {
4119 m->absent = TRUE;
4120 object->absent_count++;
4121 }
4122 m->list_req_pending = TRUE;
4123 vm_page_unlock_queues();
55e303ae 4124 } else if (m->phys_page != base_page) {
0b4e3aa0 4125 /* pmap call to clear old mapping */
55e303ae 4126 pmap_page_protect(m->phys_page,
0b4e3aa0 4127 VM_PROT_NONE);
55e303ae 4128 m->phys_page = base_page;
0b4e3aa0
A
4129 }
4130 } else {
4131 while ((m = vm_page_grab_fictitious())
4132 == VM_PAGE_NULL)
4133 vm_page_more_fictitious();
4134 vm_page_lock_queues();
4135 m->fictitious = FALSE;
4136 m->private = TRUE;
55e303ae 4137 m->phys_page = base_page;
0b4e3aa0
A
4138 m->list_req_pending = TRUE;
4139 m->absent = TRUE;
4140 m->unusual = TRUE;
4141 object->absent_count++;
4142 vm_page_unlock_queues();
4143 vm_page_insert(m, object, base_offset);
4144 }
55e303ae 4145 base_page++; /* Go to the next physical page */
0b4e3aa0
A
4146 base_offset += PAGE_SIZE;
4147 size -= PAGE_SIZE;
4148 }
4149 } else {
4150 /* NOTE: we should check the original settings here */
4151 /* if we have a size > zero a pmap call should be made */
4152 /* to disable the range */
4153
4154 /* pmap_? */
4155
4156 /* shadows on contiguous memory are not allowed */
4157 /* we therefore can use the offset field */
55e303ae 4158 object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
0b4e3aa0
A
4159 object->size = size;
4160 }
4161 vm_object_unlock(object);
4162 return KERN_SUCCESS;
4163}
4164
1c79356b
A
4165/*
4166 * memory_object_free_from_cache:
4167 *
4168 * Walk the vm_object cache list, removing and freeing vm_objects
4169 * which are backed by the pager identified by the caller, (pager_id).
4170 * Remove up to "count" objects, if there are that may available
4171 * in the cache.
0b4e3aa0 4172 *
1c79356b
A
4173 * Walk the list at most once, return the number of vm_objects
4174 * actually freed.
1c79356b
A
4175 */
4176
0b4e3aa0 4177__private_extern__ kern_return_t
1c79356b
A
4178memory_object_free_from_cache(
4179 host_t host,
0b4e3aa0 4180 int *pager_id,
1c79356b
A
4181 int *count)
4182{
4183
4184 int object_released = 0;
4185 int i;
4186
4187 register vm_object_t object = VM_OBJECT_NULL;
4188 vm_object_t shadow;
4189
4190/*
4191 if(host == HOST_NULL)
4192 return(KERN_INVALID_ARGUMENT);
4193*/
4194
4195 try_again:
4196 vm_object_cache_lock();
4197
4198 queue_iterate(&vm_object_cached_list, object,
4199 vm_object_t, cached_list) {
0b4e3aa0 4200 if (object->pager && (pager_id == object->pager->pager)) {
1c79356b
A
4201 vm_object_lock(object);
4202 queue_remove(&vm_object_cached_list, object,
4203 vm_object_t, cached_list);
4204 vm_object_cached_count--;
4205
4206 /*
4207 * Since this object is in the cache, we know
0b4e3aa0
A
4208 * that it is initialized and has only a pager's
4209 * (implicit) reference. Take a reference to avoid
4210 * recursive deallocations.
1c79356b
A
4211 */
4212
4213 assert(object->pager_initialized);
4214 assert(object->ref_count == 0);
4215 object->ref_count++;
4216
4217 /*
4218 * Terminate the object.
4219 * If the object had a shadow, we let
4220 * vm_object_deallocate deallocate it.
4221 * "pageout" objects have a shadow, but
4222 * maintain a "paging reference" rather
4223 * than a normal reference.
4224 * (We are careful here to limit recursion.)
4225 */
4226 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4227 if ((vm_object_terminate(object) == KERN_SUCCESS)
4228 && (shadow != VM_OBJECT_NULL)) {
4229 vm_object_deallocate(shadow);
4230 }
4231
4232 if(object_released++ == *count)
4233 return KERN_SUCCESS;
4234 goto try_again;
4235 }
4236 }
4237 vm_object_cache_unlock();
4238 *count = object_released;
4239 return KERN_SUCCESS;
4240}
4241
0b4e3aa0 4242
1c79356b
A
4243
4244kern_return_t
0b4e3aa0
A
4245memory_object_create_named(
4246 memory_object_t pager,
4247 memory_object_offset_t size,
4248 memory_object_control_t *control)
1c79356b 4249{
0b4e3aa0
A
4250 vm_object_t object;
4251 vm_object_hash_entry_t entry;
1c79356b 4252
0b4e3aa0
A
4253 *control = MEMORY_OBJECT_CONTROL_NULL;
4254 if (pager == MEMORY_OBJECT_NULL)
4255 return KERN_INVALID_ARGUMENT;
1c79356b 4256
0b4e3aa0
A
4257 vm_object_cache_lock();
4258 entry = vm_object_hash_lookup(pager, FALSE);
4259 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4260 (entry->object != VM_OBJECT_NULL)) {
4261 if (entry->object->named == TRUE)
4262 panic("memory_object_create_named: caller already holds the right"); }
1c79356b 4263
0b4e3aa0
A
4264 vm_object_cache_unlock();
4265 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4266 == VM_OBJECT_NULL) {
4267 return(KERN_INVALID_OBJECT);
4268 }
4269
4270 /* wait for object (if any) to be ready */
4271 if (object != VM_OBJECT_NULL) {
4272 vm_object_lock(object);
4273 object->named = TRUE;
4274 while (!object->pager_ready) {
9bccf70c
A
4275 vm_object_sleep(object,
4276 VM_OBJECT_EVENT_PAGER_READY,
4277 THREAD_UNINT);
0b4e3aa0
A
4278 }
4279 *control = object->pager_request;
4280 vm_object_unlock(object);
4281 }
4282 return (KERN_SUCCESS);
4283}
1c79356b 4284
1c79356b 4285
0b4e3aa0
A
4286/*
4287 * Routine: memory_object_recover_named [user interface]
4288 * Purpose:
4289 * Attempt to recover a named reference for a VM object.
4290 * VM will verify that the object has not already started
4291 * down the termination path, and if it has, will optionally
4292 * wait for that to finish.
4293 * Returns:
4294 * KERN_SUCCESS - we recovered a named reference on the object
4295 * KERN_FAILURE - we could not recover a reference (object dead)
4296 * KERN_INVALID_ARGUMENT - bad memory object control
4297 */
4298kern_return_t
4299memory_object_recover_named(
4300 memory_object_control_t control,
4301 boolean_t wait_on_terminating)
4302{
4303 vm_object_t object;
1c79356b 4304
0b4e3aa0
A
4305 vm_object_cache_lock();
4306 object = memory_object_control_to_vm_object(control);
4307 if (object == VM_OBJECT_NULL) {
4308 vm_object_cache_unlock();
4309 return (KERN_INVALID_ARGUMENT);
4310 }
1c79356b 4311
0b4e3aa0
A
4312restart:
4313 vm_object_lock(object);
1c79356b 4314
0b4e3aa0
A
4315 if (object->terminating && wait_on_terminating) {
4316 vm_object_cache_unlock();
4317 vm_object_wait(object,
4318 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4319 THREAD_UNINT);
4320 vm_object_cache_lock();
4321 goto restart;
4322 }
4323
4324 if (!object->alive) {
4325 vm_object_cache_unlock();
4326 vm_object_unlock(object);
4327 return KERN_FAILURE;
1c79356b
A
4328 }
4329
0b4e3aa0
A
4330 if (object->named == TRUE) {
4331 vm_object_cache_unlock();
4332 vm_object_unlock(object);
4333 return KERN_SUCCESS;
4334 }
1c79356b 4335
0b4e3aa0
A
4336 if((object->ref_count == 0) && (!object->terminating)){
4337 queue_remove(&vm_object_cached_list, object,
4338 vm_object_t, cached_list);
4339 vm_object_cached_count--;
4340 XPR(XPR_VM_OBJECT_CACHE,
4341 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4342 (integer_t)object,
4343 (integer_t)vm_object_cached_list.next,
4344 (integer_t)vm_object_cached_list.prev, 0,0);
4345 }
4346
4347 vm_object_cache_unlock();
4348
4349 object->named = TRUE;
4350 object->ref_count++;
4351 vm_object_res_reference(object);
4352 while (!object->pager_ready) {
9bccf70c
A
4353 vm_object_sleep(object,
4354 VM_OBJECT_EVENT_PAGER_READY,
4355 THREAD_UNINT);
0b4e3aa0
A
4356 }
4357 vm_object_unlock(object);
4358 return (KERN_SUCCESS);
1c79356b
A
4359}
4360
0b4e3aa0
A
4361
4362/*
4363 * vm_object_release_name:
4364 *
4365 * Enforces name semantic on memory_object reference count decrement
4366 * This routine should not be called unless the caller holds a name
4367 * reference gained through the memory_object_create_named.
4368 *
4369 * If the TERMINATE_IDLE flag is set, the call will return if the
4370 * reference count is not 1. i.e. idle with the only remaining reference
4371 * being the name.
4372 * If the decision is made to proceed the name field flag is set to
4373 * false and the reference count is decremented. If the RESPECT_CACHE
4374 * flag is set and the reference count has gone to zero, the
4375 * memory_object is checked to see if it is cacheable otherwise when
4376 * the reference count is zero, it is simply terminated.
4377 */
4378
4379__private_extern__ kern_return_t
4380vm_object_release_name(
4381 vm_object_t object,
4382 int flags)
1c79356b 4383{
0b4e3aa0
A
4384 vm_object_t shadow;
4385 boolean_t original_object = TRUE;
1c79356b 4386
0b4e3aa0 4387 while (object != VM_OBJECT_NULL) {
1c79356b 4388
0b4e3aa0
A
4389 /*
4390 * The cache holds a reference (uncounted) to
4391 * the object. We must locke it before removing
4392 * the object.
4393 *
4394 */
4395
1c79356b 4396 vm_object_cache_lock();
0b4e3aa0
A
4397 vm_object_lock(object);
4398 assert(object->alive);
4399 if(original_object)
4400 assert(object->named);
4401 assert(object->ref_count > 0);
4402
4403 /*
4404 * We have to wait for initialization before
4405 * destroying or caching the object.
4406 */
4407
4408 if (object->pager_created && !object->pager_initialized) {
4409 assert(!object->can_persist);
4410 vm_object_assert_wait(object,
4411 VM_OBJECT_EVENT_INITIALIZED,
4412 THREAD_UNINT);
4413 vm_object_unlock(object);
4414 vm_object_cache_unlock();
9bccf70c 4415 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0 4416 continue;
1c79356b
A
4417 }
4418
0b4e3aa0
A
4419 if (((object->ref_count > 1)
4420 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4421 || (object->terminating)) {
4422 vm_object_unlock(object);
4423 vm_object_cache_unlock();
4424 return KERN_FAILURE;
4425 } else {
4426 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4427 vm_object_unlock(object);
4428 vm_object_cache_unlock();
4429 return KERN_SUCCESS;
1c79356b 4430 }
0b4e3aa0
A
4431 }
4432
4433 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4434 (object->ref_count == 1)) {
4435 if(original_object)
4436 object->named = FALSE;
1c79356b 4437 vm_object_unlock(object);
0b4e3aa0
A
4438 vm_object_cache_unlock();
4439 /* let vm_object_deallocate push this thing into */
4440 /* the cache, if that it is where it is bound */
4441 vm_object_deallocate(object);
4442 return KERN_SUCCESS;
4443 }
4444 VM_OBJ_RES_DECR(object);
4445 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4446 if(object->ref_count == 1) {
4447 if(vm_object_terminate(object) != KERN_SUCCESS) {
4448 if(original_object) {
4449 return KERN_FAILURE;
4450 } else {
4451 return KERN_SUCCESS;
4452 }
4453 }
4454 if (shadow != VM_OBJECT_NULL) {
4455 original_object = FALSE;
4456 object = shadow;
4457 continue;
4458 }
4459 return KERN_SUCCESS;
4460 } else {
4461 object->ref_count--;
4462 assert(object->ref_count > 0);
4463 if(original_object)
4464 object->named = FALSE;
4465 vm_object_unlock(object);
4466 vm_object_cache_unlock();
4467 return KERN_SUCCESS;
1c79356b 4468 }
1c79356b
A
4469 }
4470}
4471
0b4e3aa0
A
4472
4473__private_extern__ kern_return_t
4474vm_object_lock_request(
4475 vm_object_t object,
4476 vm_object_offset_t offset,
4477 vm_object_size_t size,
4478 memory_object_return_t should_return,
4479 int flags,
4480 vm_prot_t prot)
1c79356b 4481{
0b4e3aa0
A
4482 vm_object_offset_t original_offset = offset;
4483 boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH;
1c79356b 4484
0b4e3aa0
A
4485 XPR(XPR_MEMORY_OBJECT,
4486 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4487 (integer_t)object, offset, size,
4488 (((should_return&1)<<1)|should_flush), prot);
1c79356b 4489
0b4e3aa0
A
4490 /*
4491 * Check for bogus arguments.
4492 */
4493 if (object == VM_OBJECT_NULL)
4494 return (KERN_INVALID_ARGUMENT);
1c79356b 4495
0b4e3aa0
A
4496 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4497 return (KERN_INVALID_ARGUMENT);
1c79356b 4498
55e303ae 4499 size = round_page_64(size);
0b4e3aa0
A
4500
4501 /*
4502 * Lock the object, and acquire a paging reference to
4503 * prevent the memory_object reference from being released.
4504 */
4505 vm_object_lock(object);
4506 vm_object_paging_begin(object);
0b4e3aa0
A
4507
4508 (void)vm_object_update(object,
4509 offset, size, should_return, flags, prot);
4510
4511 vm_object_paging_end(object);
4512 vm_object_unlock(object);
4513
4514 return (KERN_SUCCESS);
4515}
4516
4517
4518
4519#if TASK_SWAPPER
4520/*
4521 * vm_object_res_deallocate
4522 *
4523 * (recursively) decrement residence counts on vm objects and their shadows.
4524 * Called from vm_object_deallocate and when swapping out an object.
4525 *
4526 * The object is locked, and remains locked throughout the function,
4527 * even as we iterate down the shadow chain. Locks on intermediate objects
4528 * will be dropped, but not the original object.
4529 *
4530 * NOTE: this function used to use recursion, rather than iteration.
4531 */
4532
4533__private_extern__ void
4534vm_object_res_deallocate(
4535 vm_object_t object)
4536{
4537 vm_object_t orig_object = object;
4538 /*
4539 * Object is locked so it can be called directly
4540 * from vm_object_deallocate. Original object is never
4541 * unlocked.
4542 */
4543 assert(object->res_count > 0);
4544 while (--object->res_count == 0) {
4545 assert(object->ref_count >= object->res_count);
4546 vm_object_deactivate_all_pages(object);
4547 /* iterate on shadow, if present */
4548 if (object->shadow != VM_OBJECT_NULL) {
4549 vm_object_t tmp_object = object->shadow;
4550 vm_object_lock(tmp_object);
4551 if (object != orig_object)
4552 vm_object_unlock(object);
4553 object = tmp_object;
4554 assert(object->res_count > 0);
4555 } else
4556 break;
4557 }
4558 if (object != orig_object)
1c79356b 4559 vm_object_unlock(object);
0b4e3aa0
A
4560}
4561
4562/*
4563 * vm_object_res_reference
4564 *
4565 * Internal function to increment residence count on a vm object
4566 * and its shadows. It is called only from vm_object_reference, and
4567 * when swapping in a vm object, via vm_map_swap.
4568 *
4569 * The object is locked, and remains locked throughout the function,
4570 * even as we iterate down the shadow chain. Locks on intermediate objects
4571 * will be dropped, but not the original object.
4572 *
4573 * NOTE: this function used to use recursion, rather than iteration.
4574 */
4575
4576__private_extern__ void
4577vm_object_res_reference(
4578 vm_object_t object)
4579{
4580 vm_object_t orig_object = object;
4581 /*
4582 * Object is locked, so this can be called directly
4583 * from vm_object_reference. This lock is never released.
4584 */
4585 while ((++object->res_count == 1) &&
4586 (object->shadow != VM_OBJECT_NULL)) {
4587 vm_object_t tmp_object = object->shadow;
4588
4589 assert(object->ref_count >= object->res_count);
4590 vm_object_lock(tmp_object);
4591 if (object != orig_object)
4592 vm_object_unlock(object);
4593 object = tmp_object;
1c79356b 4594 }
0b4e3aa0
A
4595 if (object != orig_object)
4596 vm_object_unlock(object);
4597 assert(orig_object->ref_count >= orig_object->res_count);
1c79356b 4598}
0b4e3aa0
A
4599#endif /* TASK_SWAPPER */
4600
4601/*
4602 * vm_object_reference:
4603 *
4604 * Gets another reference to the given object.
4605 */
4606#ifdef vm_object_reference
4607#undef vm_object_reference
4608#endif
4609__private_extern__ void
4610vm_object_reference(
4611 register vm_object_t object)
4612{
4613 if (object == VM_OBJECT_NULL)
4614 return;
4615
4616 vm_object_lock(object);
4617 assert(object->ref_count > 0);
4618 vm_object_reference_locked(object);
4619 vm_object_unlock(object);
4620}
4621
1c79356b
A
4622#ifdef MACH_BSD
4623/*
4624 * Scale the vm_object_cache
4625 * This is required to make sure that the vm_object_cache is big
4626 * enough to effectively cache the mapped file.
4627 * This is really important with UBC as all the regular file vnodes
4628 * have memory object associated with them. Havving this cache too
4629 * small results in rapid reclaim of vnodes and hurts performance a LOT!
4630 *
4631 * This is also needed as number of vnodes can be dynamically scaled.
4632 */
4633kern_return_t
4634adjust_vm_object_cache(vm_size_t oval, vm_size_t nval)
4635{
4636 vm_object_cached_max = nval;
4637 vm_object_cache_trim(FALSE);
4638 return (KERN_SUCCESS);
4639}
4640#endif /* MACH_BSD */
4641