]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_object.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
37839358
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
e5568f75 11 *
37839358
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
37839358
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: vm/vm_object.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 *
56 * Virtual memory object module.
57 */
58
1c79356b
A
59#include <mach_pagemap.h>
60#include <task_swapper.h>
61
0b4e3aa0 62#include <mach/mach_types.h>
1c79356b
A
63#include <mach/memory_object.h>
64#include <mach/memory_object_default.h>
65#include <mach/memory_object_control_server.h>
66#include <mach/vm_param.h>
91447636
A
67
68#include <ipc/ipc_types.h>
1c79356b 69#include <ipc/ipc_port.h>
91447636
A
70
71#include <kern/kern_types.h>
1c79356b
A
72#include <kern/assert.h>
73#include <kern/lock.h>
74#include <kern/queue.h>
75#include <kern/xpr.h>
76#include <kern/zalloc.h>
77#include <kern/host.h>
78#include <kern/host_statistics.h>
79#include <kern/processor.h>
91447636
A
80#include <kern/misc_protos.h>
81
1c79356b
A
82#include <vm/memory_object.h>
83#include <vm/vm_fault.h>
84#include <vm/vm_map.h>
85#include <vm/vm_object.h>
86#include <vm/vm_page.h>
87#include <vm/vm_pageout.h>
91447636 88#include <vm/vm_protos.h>
1c79356b 89
1c79356b
A
90/*
91 * Virtual memory objects maintain the actual data
92 * associated with allocated virtual memory. A given
93 * page of memory exists within exactly one object.
94 *
95 * An object is only deallocated when all "references"
0b4e3aa0 96 * are given up.
1c79356b
A
97 *
98 * Associated with each object is a list of all resident
99 * memory pages belonging to that object; this list is
100 * maintained by the "vm_page" module, but locked by the object's
101 * lock.
102 *
0b4e3aa0 103 * Each object also records the memory object reference
1c79356b 104 * that is used by the kernel to request and write
0b4e3aa0 105 * back data (the memory object, field "pager"), etc...
1c79356b
A
106 *
107 * Virtual memory objects are allocated to provide
108 * zero-filled memory (vm_allocate) or map a user-defined
109 * memory object into a virtual address space (vm_map).
110 *
111 * Virtual memory objects that refer to a user-defined
112 * memory object are called "permanent", because all changes
113 * made in virtual memory are reflected back to the
114 * memory manager, which may then store it permanently.
115 * Other virtual memory objects are called "temporary",
116 * meaning that changes need be written back only when
117 * necessary to reclaim pages, and that storage associated
118 * with the object can be discarded once it is no longer
119 * mapped.
120 *
121 * A permanent memory object may be mapped into more
122 * than one virtual address space. Moreover, two threads
123 * may attempt to make the first mapping of a memory
124 * object concurrently. Only one thread is allowed to
125 * complete this mapping; all others wait for the
126 * "pager_initialized" field is asserted, indicating
127 * that the first thread has initialized all of the
128 * necessary fields in the virtual memory object structure.
129 *
130 * The kernel relies on a *default memory manager* to
131 * provide backing storage for the zero-filled virtual
0b4e3aa0 132 * memory objects. The pager memory objects associated
1c79356b 133 * with these temporary virtual memory objects are only
0b4e3aa0
A
134 * requested from the default memory manager when it
135 * becomes necessary. Virtual memory objects
1c79356b
A
136 * that depend on the default memory manager are called
137 * "internal". The "pager_created" field is provided to
138 * indicate whether these ports have ever been allocated.
139 *
140 * The kernel may also create virtual memory objects to
141 * hold changed pages after a copy-on-write operation.
142 * In this case, the virtual memory object (and its
143 * backing storage -- its memory object) only contain
144 * those pages that have been changed. The "shadow"
145 * field refers to the virtual memory object that contains
146 * the remainder of the contents. The "shadow_offset"
147 * field indicates where in the "shadow" these contents begin.
148 * The "copy" field refers to a virtual memory object
149 * to which changed pages must be copied before changing
150 * this object, in order to implement another form
151 * of copy-on-write optimization.
152 *
153 * The virtual memory object structure also records
154 * the attributes associated with its memory object.
155 * The "pager_ready", "can_persist" and "copy_strategy"
156 * fields represent those attributes. The "cached_list"
157 * field is used in the implementation of the persistence
158 * attribute.
159 *
160 * ZZZ Continue this comment.
161 */
162
163/* Forward declarations for internal functions. */
0b4e3aa0 164static kern_return_t vm_object_terminate(
1c79356b
A
165 vm_object_t object);
166
167extern void vm_object_remove(
168 vm_object_t object);
169
0b4e3aa0 170static vm_object_t vm_object_cache_trim(
1c79356b
A
171 boolean_t called_from_vm_object_deallocate);
172
0b4e3aa0 173static void vm_object_deactivate_all_pages(
1c79356b
A
174 vm_object_t object);
175
0b4e3aa0 176static kern_return_t vm_object_copy_call(
1c79356b
A
177 vm_object_t src_object,
178 vm_object_offset_t src_offset,
179 vm_object_size_t size,
180 vm_object_t *_result_object);
181
0b4e3aa0 182static void vm_object_do_collapse(
1c79356b
A
183 vm_object_t object,
184 vm_object_t backing_object);
185
0b4e3aa0 186static void vm_object_do_bypass(
1c79356b
A
187 vm_object_t object,
188 vm_object_t backing_object);
189
0b4e3aa0
A
190static void vm_object_release_pager(
191 memory_object_t pager);
1c79356b 192
0b4e3aa0 193static zone_t vm_object_zone; /* vm backing store zone */
1c79356b
A
194
195/*
196 * All wired-down kernel memory belongs to a single virtual
197 * memory object (kernel_object) to avoid wasting data structures.
198 */
0b4e3aa0 199static struct vm_object kernel_object_store;
c0fea474 200vm_object_t kernel_object;
1c79356b
A
201
202/*
203 * The submap object is used as a placeholder for vm_map_submap
204 * operations. The object is declared in vm_map.c because it
205 * is exported by the vm_map module. The storage is declared
206 * here because it must be initialized here.
207 */
0b4e3aa0 208static struct vm_object vm_submap_object_store;
1c79356b
A
209
210/*
211 * Virtual memory objects are initialized from
212 * a template (see vm_object_allocate).
213 *
214 * When adding a new field to the virtual memory
215 * object structure, be sure to add initialization
0b4e3aa0 216 * (see _vm_object_allocate()).
1c79356b 217 */
0b4e3aa0 218static struct vm_object vm_object_template;
1c79356b
A
219
220/*
221 * Virtual memory objects that are not referenced by
222 * any address maps, but that are allowed to persist
223 * (an attribute specified by the associated memory manager),
224 * are kept in a queue (vm_object_cached_list).
225 *
226 * When an object from this queue is referenced again,
227 * for example to make another address space mapping,
228 * it must be removed from the queue. That is, the
229 * queue contains *only* objects with zero references.
230 *
231 * The kernel may choose to terminate objects from this
232 * queue in order to reclaim storage. The current policy
233 * is to permit a fixed maximum number of unreferenced
234 * objects (vm_object_cached_max).
235 *
236 * A spin lock (accessed by routines
237 * vm_object_cache_{lock,lock_try,unlock}) governs the
238 * object cache. It must be held when objects are
239 * added to or removed from the cache (in vm_object_terminate).
240 * The routines that acquire a reference to a virtual
241 * memory object based on one of the memory object ports
242 * must also lock the cache.
243 *
244 * Ideally, the object cache should be more isolated
245 * from the reference mechanism, so that the lock need
246 * not be held to make simple references.
247 */
0b4e3aa0 248static queue_head_t vm_object_cached_list;
9bccf70c 249static int vm_object_cached_count=0;
0b4e3aa0
A
250static int vm_object_cached_high; /* highest # cached objects */
251static int vm_object_cached_max = 512; /* may be patched*/
1c79356b 252
0b4e3aa0 253static decl_mutex_data(,vm_object_cached_lock_data)
1c79356b
A
254
255#define vm_object_cache_lock() \
256 mutex_lock(&vm_object_cached_lock_data)
257#define vm_object_cache_lock_try() \
258 mutex_try(&vm_object_cached_lock_data)
259#define vm_object_cache_unlock() \
260 mutex_unlock(&vm_object_cached_lock_data)
261
262#define VM_OBJECT_HASH_COUNT 1024
0b4e3aa0
A
263static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
264static struct zone *vm_object_hash_zone;
1c79356b
A
265
266struct vm_object_hash_entry {
267 queue_chain_t hash_link; /* hash chain link */
0b4e3aa0 268 memory_object_t pager; /* pager we represent */
1c79356b
A
269 vm_object_t object; /* corresponding object */
270 boolean_t waiting; /* someone waiting for
271 * termination */
272};
273
274typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
275#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
276
277#define VM_OBJECT_HASH_SHIFT 8
278#define vm_object_hash(pager) \
279 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
280
91447636
A
281void vm_object_hash_entry_free(
282 vm_object_hash_entry_t entry);
283
1c79356b
A
284/*
285 * vm_object_hash_lookup looks up a pager in the hashtable
286 * and returns the corresponding entry, with optional removal.
287 */
288
0b4e3aa0 289static vm_object_hash_entry_t
1c79356b 290vm_object_hash_lookup(
0b4e3aa0 291 memory_object_t pager,
1c79356b
A
292 boolean_t remove_entry)
293{
294 register queue_t bucket;
295 register vm_object_hash_entry_t entry;
296
297 bucket = &vm_object_hashtable[vm_object_hash(pager)];
298
299 entry = (vm_object_hash_entry_t)queue_first(bucket);
300 while (!queue_end(bucket, (queue_entry_t)entry)) {
301 if (entry->pager == pager && !remove_entry)
302 return(entry);
303 else if (entry->pager == pager) {
304 queue_remove(bucket, entry,
305 vm_object_hash_entry_t, hash_link);
306 return(entry);
307 }
308
309 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
310 }
311
312 return(VM_OBJECT_HASH_ENTRY_NULL);
313}
314
315/*
316 * vm_object_hash_enter enters the specified
317 * pager / cache object association in the hashtable.
318 */
319
0b4e3aa0 320static void
1c79356b
A
321vm_object_hash_insert(
322 vm_object_hash_entry_t entry)
323{
324 register queue_t bucket;
325
326 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
327
328 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
329}
330
0b4e3aa0 331static vm_object_hash_entry_t
1c79356b 332vm_object_hash_entry_alloc(
0b4e3aa0 333 memory_object_t pager)
1c79356b
A
334{
335 vm_object_hash_entry_t entry;
336
337 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
338 entry->pager = pager;
339 entry->object = VM_OBJECT_NULL;
340 entry->waiting = FALSE;
341
342 return(entry);
343}
344
345void
346vm_object_hash_entry_free(
347 vm_object_hash_entry_t entry)
348{
91447636 349 zfree(vm_object_hash_zone, entry);
1c79356b
A
350}
351
352/*
353 * vm_object_allocate:
354 *
355 * Returns a new object with the given size.
356 */
357
91447636 358__private_extern__ void
1c79356b
A
359_vm_object_allocate(
360 vm_object_size_t size,
361 vm_object_t object)
362{
363 XPR(XPR_VM_OBJECT,
364 "vm_object_allocate, object 0x%X size 0x%X\n",
365 (integer_t)object, size, 0,0,0);
366
367 *object = vm_object_template;
368 queue_init(&object->memq);
369 queue_init(&object->msr_q);
91447636 370#ifdef UPL_DEBUG
1c79356b 371 queue_init(&object->uplq);
91447636 372#endif /* UPL_DEBUG */
1c79356b
A
373 vm_object_lock_init(object);
374 object->size = size;
375}
376
0b4e3aa0 377__private_extern__ vm_object_t
1c79356b
A
378vm_object_allocate(
379 vm_object_size_t size)
380{
381 register vm_object_t object;
1c79356b
A
382
383 object = (vm_object_t) zalloc(vm_object_zone);
384
0b4e3aa0
A
385// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
386
387 if (object != VM_OBJECT_NULL)
388 _vm_object_allocate(size, object);
1c79356b
A
389
390 return object;
391}
392
393/*
394 * vm_object_bootstrap:
395 *
396 * Initialize the VM objects module.
397 */
0b4e3aa0 398__private_extern__ void
1c79356b
A
399vm_object_bootstrap(void)
400{
91447636 401 register int i;
1c79356b
A
402
403 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
55e303ae
A
404 round_page_32(512*1024),
405 round_page_32(12*1024),
1c79356b
A
406 "vm objects");
407
408 queue_init(&vm_object_cached_list);
91447636 409 mutex_init(&vm_object_cached_lock_data, 0);
1c79356b
A
410
411 vm_object_hash_zone =
412 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
55e303ae
A
413 round_page_32(512*1024),
414 round_page_32(12*1024),
1c79356b
A
415 "vm object hash entries");
416
417 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
418 queue_init(&vm_object_hashtable[i]);
419
420 /*
421 * Fill in a template object, for quick initialization
422 */
423
424 /* memq; Lock; init after allocation */
425 vm_object_template.size = 0;
91447636 426 vm_object_template.memq_hint = VM_PAGE_NULL;
1c79356b
A
427 vm_object_template.ref_count = 1;
428#if TASK_SWAPPER
429 vm_object_template.res_count = 1;
430#endif /* TASK_SWAPPER */
431 vm_object_template.resident_page_count = 0;
432 vm_object_template.copy = VM_OBJECT_NULL;
433 vm_object_template.shadow = VM_OBJECT_NULL;
434 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
55e303ae 435 vm_object_template.cow_hint = ~(vm_offset_t)0;
1c79356b
A
436 vm_object_template.true_share = FALSE;
437
0b4e3aa0 438 vm_object_template.pager = MEMORY_OBJECT_NULL;
1c79356b 439 vm_object_template.paging_offset = 0;
91447636 440 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
441 /* msr_q; init after allocation */
442
443 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
444 vm_object_template.absent_count = 0;
445 vm_object_template.paging_in_progress = 0;
446
447 /* Begin bitfields */
448 vm_object_template.all_wanted = 0; /* all bits FALSE */
449 vm_object_template.pager_created = FALSE;
450 vm_object_template.pager_initialized = FALSE;
451 vm_object_template.pager_ready = FALSE;
452 vm_object_template.pager_trusted = FALSE;
453 vm_object_template.can_persist = FALSE;
454 vm_object_template.internal = TRUE;
455 vm_object_template.temporary = TRUE;
456 vm_object_template.private = FALSE;
457 vm_object_template.pageout = FALSE;
458 vm_object_template.alive = TRUE;
91447636 459 vm_object_template.purgable = VM_OBJECT_NONPURGABLE;
1c79356b
A
460 vm_object_template.silent_overwrite = FALSE;
461 vm_object_template.advisory_pageout = FALSE;
462 vm_object_template.shadowed = FALSE;
463 vm_object_template.terminating = FALSE;
464 vm_object_template.shadow_severed = FALSE;
465 vm_object_template.phys_contiguous = FALSE;
0b4e3aa0 466 vm_object_template.nophyscache = FALSE;
1c79356b
A
467 /* End bitfields */
468
9bccf70c
A
469 /* cache bitfields */
470 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
471
1c79356b
A
472 /* cached_list; init after allocation */
473 vm_object_template.last_alloc = (vm_object_offset_t) 0;
474 vm_object_template.cluster_size = 0;
475#if MACH_PAGEMAP
476 vm_object_template.existence_map = VM_EXTERNAL_NULL;
477#endif /* MACH_PAGEMAP */
478#if MACH_ASSERT
479 vm_object_template.paging_object = VM_OBJECT_NULL;
480#endif /* MACH_ASSERT */
481
482 /*
483 * Initialize the "kernel object"
484 */
485
486 kernel_object = &kernel_object_store;
487
488/*
489 * Note that in the following size specifications, we need to add 1 because
55e303ae 490 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
1c79356b 491 */
55e303ae
A
492
493#ifdef ppc
494 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
495 kernel_object);
496#else
1c79356b
A
497 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
498 kernel_object);
55e303ae
A
499#endif
500 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1c79356b
A
501
502 /*
503 * Initialize the "submap object". Make it as large as the
504 * kernel object so that no limit is imposed on submap sizes.
505 */
506
507 vm_submap_object = &vm_submap_object_store;
55e303ae
A
508#ifdef ppc
509 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
510 vm_submap_object);
511#else
1c79356b
A
512 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
513 vm_submap_object);
55e303ae
A
514#endif
515 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
516
1c79356b
A
517 /*
518 * Create an "extra" reference to this object so that we never
519 * try to deallocate it; zfree doesn't like to be called with
520 * non-zone memory.
521 */
522 vm_object_reference(vm_submap_object);
523
524#if MACH_PAGEMAP
525 vm_external_module_initialize();
526#endif /* MACH_PAGEMAP */
527}
528
0b4e3aa0 529__private_extern__ void
1c79356b
A
530vm_object_init(void)
531{
532 /*
533 * Finish initializing the kernel object.
534 */
535}
536
1c79356b
A
537/* remove the typedef below when emergency work-around is taken out */
538typedef struct vnode_pager {
0b4e3aa0
A
539 memory_object_t pager;
540 memory_object_t pager_handle; /* pager */
541 memory_object_control_t control_handle; /* memory object's control handle */
542 void *vnode_handle; /* vnode handle */
1c79356b
A
543} *vnode_pager_t;
544
545#define MIGHT_NOT_CACHE_SHADOWS 1
546#if MIGHT_NOT_CACHE_SHADOWS
0b4e3aa0 547static int cache_shadows = TRUE;
1c79356b
A
548#endif /* MIGHT_NOT_CACHE_SHADOWS */
549
550/*
551 * vm_object_deallocate:
552 *
553 * Release a reference to the specified object,
554 * gained either through a vm_object_allocate
555 * or a vm_object_reference call. When all references
556 * are gone, storage associated with this object
557 * may be relinquished.
558 *
559 * No object may be locked.
560 */
0b4e3aa0 561__private_extern__ void
1c79356b
A
562vm_object_deallocate(
563 register vm_object_t object)
564{
565 boolean_t retry_cache_trim = FALSE;
91447636 566 vm_object_t shadow = VM_OBJECT_NULL;
1c79356b
A
567
568// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
569// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
570
571
572 while (object != VM_OBJECT_NULL) {
573
574 /*
575 * The cache holds a reference (uncounted) to
576 * the object; we must lock it before removing
577 * the object.
578 */
55e303ae
A
579 for (;;) {
580 vm_object_cache_lock();
1c79356b 581
55e303ae
A
582 /*
583 * if we try to take a regular lock here
584 * we risk deadlocking against someone
585 * holding a lock on this object while
586 * trying to vm_object_deallocate a different
587 * object
588 */
589 if (vm_object_lock_try(object))
590 break;
591 vm_object_cache_unlock();
592 mutex_pause(); /* wait a bit */
593 }
0b4e3aa0
A
594 assert(object->ref_count > 0);
595
596 /*
597 * If the object has a named reference, and only
598 * that reference would remain, inform the pager
599 * about the last "mapping" reference going away.
600 */
601 if ((object->ref_count == 2) && (object->named)) {
602 memory_object_t pager = object->pager;
603
604 /* Notify the Pager that there are no */
605 /* more mappers for this object */
606
607 if (pager != MEMORY_OBJECT_NULL) {
608 vm_object_unlock(object);
609 vm_object_cache_unlock();
610
611 memory_object_unmap(pager);
612
55e303ae
A
613 for (;;) {
614 vm_object_cache_lock();
615
616 /*
617 * if we try to take a regular lock here
618 * we risk deadlocking against someone
619 * holding a lock on this object while
620 * trying to vm_object_deallocate a different
621 * object
622 */
623 if (vm_object_lock_try(object))
624 break;
625 vm_object_cache_unlock();
626 mutex_pause(); /* wait a bit */
627 }
0b4e3aa0
A
628 assert(object->ref_count > 0);
629 }
630 }
1c79356b
A
631
632 /*
633 * Lose the reference. If other references
634 * remain, then we are done, unless we need
635 * to retry a cache trim.
636 * If it is the last reference, then keep it
637 * until any pending initialization is completed.
638 */
639
0b4e3aa0
A
640 /* if the object is terminating, it cannot go into */
641 /* the cache and we obviously should not call */
642 /* terminate again. */
643
644 if ((object->ref_count > 1) || object->terminating) {
1c79356b 645 object->ref_count--;
1c79356b 646 vm_object_res_deallocate(object);
1c79356b 647 vm_object_cache_unlock();
91447636
A
648
649 if (object->ref_count == 1 &&
650 object->shadow != VM_OBJECT_NULL) {
651 /*
c0fea474
A
652 * There's only one reference left on this
653 * VM object. We can't tell if it's a valid
654 * one (from a mapping for example) or if this
655 * object is just part of a possibly stale and
656 * useless shadow chain.
657 * We would like to try and collapse it into
658 * its parent, but we don't have any pointers
659 * back to this parent object.
91447636
A
660 * But we can try and collapse this object with
661 * its own shadows, in case these are useless
662 * too...
c0fea474
A
663 * We can't bypass this object though, since we
664 * don't know if this last reference on it is
665 * meaningful or not.
91447636 666 */
c0fea474 667 vm_object_collapse(object, 0, FALSE);
91447636
A
668 }
669
670 vm_object_unlock(object);
1c79356b
A
671 if (retry_cache_trim &&
672 ((object = vm_object_cache_trim(TRUE)) !=
673 VM_OBJECT_NULL)) {
674 continue;
675 }
676 return;
677 }
678
679 /*
680 * We have to wait for initialization
681 * before destroying or caching the object.
682 */
683
684 if (object->pager_created && ! object->pager_initialized) {
685 assert(! object->can_persist);
686 vm_object_assert_wait(object,
687 VM_OBJECT_EVENT_INITIALIZED,
688 THREAD_UNINT);
689 vm_object_unlock(object);
690 vm_object_cache_unlock();
9bccf70c 691 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
692 continue;
693 }
694
695 /*
696 * If this object can persist, then enter it in
697 * the cache. Otherwise, terminate it.
698 *
699 * NOTE: Only permanent objects are cached, and
700 * permanent objects cannot have shadows. This
701 * affects the residence counting logic in a minor
702 * way (can do it in-line, mostly).
703 */
704
0b4e3aa0 705 if ((object->can_persist) && (object->alive)) {
1c79356b
A
706 /*
707 * Now it is safe to decrement reference count,
708 * and to return if reference count is > 0.
709 */
710 if (--object->ref_count > 0) {
711 vm_object_res_deallocate(object);
712 vm_object_unlock(object);
713 vm_object_cache_unlock();
714 if (retry_cache_trim &&
715 ((object = vm_object_cache_trim(TRUE)) !=
716 VM_OBJECT_NULL)) {
717 continue;
718 }
719 return;
720 }
721
722#if MIGHT_NOT_CACHE_SHADOWS
723 /*
724 * Remove shadow now if we don't
725 * want to cache shadows.
726 */
727 if (! cache_shadows) {
728 shadow = object->shadow;
729 object->shadow = VM_OBJECT_NULL;
730 }
731#endif /* MIGHT_NOT_CACHE_SHADOWS */
732
733 /*
734 * Enter the object onto the queue of
735 * cached objects, and deactivate
736 * all of its pages.
737 */
738 assert(object->shadow == VM_OBJECT_NULL);
739 VM_OBJ_RES_DECR(object);
740 XPR(XPR_VM_OBJECT,
741 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
742 (integer_t)object,
743 (integer_t)vm_object_cached_list.next,
744 (integer_t)vm_object_cached_list.prev,0,0);
745
746 vm_object_cached_count++;
747 if (vm_object_cached_count > vm_object_cached_high)
748 vm_object_cached_high = vm_object_cached_count;
749 queue_enter(&vm_object_cached_list, object,
750 vm_object_t, cached_list);
751 vm_object_cache_unlock();
0b4e3aa0 752 vm_object_deactivate_all_pages(object);
1c79356b
A
753 vm_object_unlock(object);
754
755#if MIGHT_NOT_CACHE_SHADOWS
756 /*
757 * If we have a shadow that we need
758 * to deallocate, do so now, remembering
759 * to trim the cache later.
760 */
761 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
762 object = shadow;
763 retry_cache_trim = TRUE;
764 continue;
765 }
766#endif /* MIGHT_NOT_CACHE_SHADOWS */
767
768 /*
769 * Trim the cache. If the cache trim
770 * returns with a shadow for us to deallocate,
771 * then remember to retry the cache trim
772 * when we are done deallocating the shadow.
773 * Otherwise, we are done.
774 */
775
776 object = vm_object_cache_trim(TRUE);
777 if (object == VM_OBJECT_NULL) {
778 return;
779 }
780 retry_cache_trim = TRUE;
781
782 } else {
783 /*
784 * This object is not cachable; terminate it.
785 */
786 XPR(XPR_VM_OBJECT,
91447636
A
787 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
788 (integer_t)object, object->resident_page_count,
789 object->paging_in_progress,
790 (void *)current_thread(),object->ref_count);
1c79356b
A
791
792 VM_OBJ_RES_DECR(object); /* XXX ? */
793 /*
794 * Terminate this object. If it had a shadow,
795 * then deallocate it; otherwise, if we need
796 * to retry a cache trim, do so now; otherwise,
797 * we are done. "pageout" objects have a shadow,
798 * but maintain a "paging reference" rather than
799 * a normal reference.
800 */
801 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
802 if(vm_object_terminate(object) != KERN_SUCCESS) {
803 return;
804 }
805 if (shadow != VM_OBJECT_NULL) {
806 object = shadow;
807 continue;
808 }
809 if (retry_cache_trim &&
810 ((object = vm_object_cache_trim(TRUE)) !=
811 VM_OBJECT_NULL)) {
812 continue;
813 }
814 return;
815 }
816 }
817 assert(! retry_cache_trim);
818}
819
820/*
821 * Check to see whether we really need to trim
822 * down the cache. If so, remove an object from
823 * the cache, terminate it, and repeat.
824 *
825 * Called with, and returns with, cache lock unlocked.
826 */
827vm_object_t
828vm_object_cache_trim(
829 boolean_t called_from_vm_object_deallocate)
830{
831 register vm_object_t object = VM_OBJECT_NULL;
832 vm_object_t shadow;
833
834 for (;;) {
835
836 /*
837 * If we no longer need to trim the cache,
838 * then we are done.
839 */
840
841 vm_object_cache_lock();
842 if (vm_object_cached_count <= vm_object_cached_max) {
843 vm_object_cache_unlock();
844 return VM_OBJECT_NULL;
845 }
846
847 /*
848 * We must trim down the cache, so remove
849 * the first object in the cache.
850 */
851 XPR(XPR_VM_OBJECT,
852 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
853 (integer_t)vm_object_cached_list.next,
854 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
855
856 object = (vm_object_t) queue_first(&vm_object_cached_list);
9bccf70c
A
857 if(object == (vm_object_t) &vm_object_cached_list) {
858 /* something's wrong with the calling parameter or */
859 /* the value of vm_object_cached_count, just fix */
860 /* and return */
861 if(vm_object_cached_max < 0)
862 vm_object_cached_max = 0;
863 vm_object_cached_count = 0;
864 vm_object_cache_unlock();
865 return VM_OBJECT_NULL;
866 }
1c79356b
A
867 vm_object_lock(object);
868 queue_remove(&vm_object_cached_list, object, vm_object_t,
869 cached_list);
870 vm_object_cached_count--;
871
872 /*
873 * Since this object is in the cache, we know
874 * that it is initialized and has no references.
875 * Take a reference to avoid recursive deallocations.
876 */
877
878 assert(object->pager_initialized);
879 assert(object->ref_count == 0);
880 object->ref_count++;
881
882 /*
883 * Terminate the object.
884 * If the object had a shadow, we let vm_object_deallocate
885 * deallocate it. "pageout" objects have a shadow, but
886 * maintain a "paging reference" rather than a normal
887 * reference.
888 * (We are careful here to limit recursion.)
889 */
890 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
891 if(vm_object_terminate(object) != KERN_SUCCESS)
892 continue;
893 if (shadow != VM_OBJECT_NULL) {
894 if (called_from_vm_object_deallocate) {
895 return shadow;
896 } else {
897 vm_object_deallocate(shadow);
898 }
899 }
900 }
901}
902
903boolean_t vm_object_terminate_remove_all = FALSE;
904
905/*
906 * Routine: vm_object_terminate
907 * Purpose:
908 * Free all resources associated with a vm_object.
909 * In/out conditions:
0b4e3aa0 910 * Upon entry, the object must be locked,
1c79356b
A
911 * and the object must have exactly one reference.
912 *
913 * The shadow object reference is left alone.
914 *
915 * The object must be unlocked if its found that pages
916 * must be flushed to a backing object. If someone
917 * manages to map the object while it is being flushed
918 * the object is returned unlocked and unchanged. Otherwise,
919 * upon exit, the cache will be unlocked, and the
920 * object will cease to exist.
921 */
0b4e3aa0 922static kern_return_t
1c79356b
A
923vm_object_terminate(
924 register vm_object_t object)
925{
0b4e3aa0 926 memory_object_t pager;
1c79356b
A
927 register vm_page_t p;
928 vm_object_t shadow_object;
929
930 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
931 (integer_t)object, object->ref_count, 0, 0, 0);
932
1c79356b
A
933 if (!object->pageout && (!object->temporary || object->can_persist)
934 && (object->pager != NULL || object->shadow_severed)) {
0b4e3aa0 935 vm_object_cache_unlock();
1c79356b
A
936 while (!queue_empty(&object->memq)) {
937 /*
938 * Clear pager_trusted bit so that the pages get yanked
939 * out of the object instead of cleaned in place. This
940 * prevents a deadlock in XMM and makes more sense anyway.
941 */
942 object->pager_trusted = FALSE;
943
944 p = (vm_page_t) queue_first(&object->memq);
945
946 VM_PAGE_CHECK(p);
947
948 if (p->busy || p->cleaning) {
949 if(p->cleaning || p->absent) {
950 vm_object_paging_wait(object, THREAD_UNINT);
951 continue;
952 } else {
953 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
954 }
955 }
956
957 vm_page_lock_queues();
55e303ae 958 p->busy = TRUE;
1c79356b
A
959 VM_PAGE_QUEUES_REMOVE(p);
960 vm_page_unlock_queues();
961
962 if (p->absent || p->private) {
963
964 /*
965 * For private pages, VM_PAGE_FREE just
966 * leaves the page structure around for
967 * its owner to clean up. For absent
968 * pages, the structure is returned to
969 * the appropriate pool.
970 */
971
972 goto free_page;
973 }
974
975 if (p->fictitious)
976 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
977
978 if (!p->dirty)
55e303ae 979 p->dirty = pmap_is_modified(p->phys_page);
1c79356b 980
0b4e3aa0 981 if ((p->dirty || p->precious) && !p->error && object->alive) {
1c79356b 982 vm_pageout_cluster(p); /* flush page */
1c79356b
A
983 vm_object_paging_wait(object, THREAD_UNINT);
984 XPR(XPR_VM_OBJECT,
985 "vm_object_terminate restart, object 0x%X ref %d\n",
986 (integer_t)object, object->ref_count, 0, 0, 0);
987 } else {
988 free_page:
989 VM_PAGE_FREE(p);
990 }
991 }
0b4e3aa0
A
992 vm_object_unlock(object);
993 vm_object_cache_lock();
994 vm_object_lock(object);
1c79356b 995 }
0b4e3aa0
A
996
997 /*
998 * Make sure the object isn't already being terminated
999 */
1000 if(object->terminating) {
1001 object->ref_count -= 1;
1002 assert(object->ref_count > 0);
1003 vm_object_cache_unlock();
1004 vm_object_unlock(object);
1005 return KERN_FAILURE;
1006 }
1007
1008 /*
1009 * Did somebody get a reference to the object while we were
1010 * cleaning it?
1011 */
1c79356b
A
1012 if(object->ref_count != 1) {
1013 object->ref_count -= 1;
0b4e3aa0 1014 assert(object->ref_count > 0);
1c79356b 1015 vm_object_res_deallocate(object);
0b4e3aa0 1016 vm_object_cache_unlock();
1c79356b
A
1017 vm_object_unlock(object);
1018 return KERN_FAILURE;
1019 }
1020
1c79356b
A
1021 /*
1022 * Make sure no one can look us up now.
1023 */
1024
0b4e3aa0
A
1025 object->terminating = TRUE;
1026 object->alive = FALSE;
1027 vm_object_remove(object);
1c79356b
A
1028
1029 /*
1030 * Detach the object from its shadow if we are the shadow's
55e303ae
A
1031 * copy. The reference we hold on the shadow must be dropped
1032 * by our caller.
1c79356b
A
1033 */
1034 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1035 !(object->pageout)) {
1036 vm_object_lock(shadow_object);
55e303ae
A
1037 if (shadow_object->copy == object)
1038 shadow_object->copy = VM_OBJECT_NULL;
1c79356b
A
1039 vm_object_unlock(shadow_object);
1040 }
1041
1042 /*
1043 * The pageout daemon might be playing with our pages.
1044 * Now that the object is dead, it won't touch any more
1045 * pages, but some pages might already be on their way out.
0b4e3aa0
A
1046 * Hence, we wait until the active paging activities have ceased
1047 * before we break the association with the pager itself.
1c79356b 1048 */
0b4e3aa0
A
1049 while (object->paging_in_progress != 0) {
1050 vm_object_cache_unlock();
1051 vm_object_wait(object,
1052 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1053 THREAD_UNINT);
1054 vm_object_cache_lock();
1055 vm_object_lock(object);
1056 }
1057
1058 pager = object->pager;
1059 object->pager = MEMORY_OBJECT_NULL;
1060
1061 if (pager != MEMORY_OBJECT_NULL)
91447636 1062 memory_object_control_disable(object->pager_control);
0b4e3aa0
A
1063 vm_object_cache_unlock();
1064
1c79356b
A
1065 object->ref_count--;
1066#if TASK_SWAPPER
1067 assert(object->res_count == 0);
1068#endif /* TASK_SWAPPER */
1069
1c79356b
A
1070 assert (object->ref_count == 0);
1071
1072 /*
1073 * Clean or free the pages, as appropriate.
1074 * It is possible for us to find busy/absent pages,
1075 * if some faults on this object were aborted.
1076 */
1077 if (object->pageout) {
1078 assert(shadow_object != VM_OBJECT_NULL);
1079 assert(shadow_object == object->shadow);
1080
1081 vm_pageout_object_terminate(object);
1082
0b4e3aa0
A
1083 } else if ((object->temporary && !object->can_persist) ||
1084 (pager == MEMORY_OBJECT_NULL)) {
1c79356b
A
1085 while (!queue_empty(&object->memq)) {
1086 p = (vm_page_t) queue_first(&object->memq);
1087
1088 VM_PAGE_CHECK(p);
1089 VM_PAGE_FREE(p);
1090 }
1091 } else if (!queue_empty(&object->memq)) {
1092 panic("vm_object_terminate: queue just emptied isn't");
1093 }
1094
1095 assert(object->paging_in_progress == 0);
1096 assert(object->ref_count == 0);
1097
1c79356b 1098 /*
0b4e3aa0
A
1099 * If the pager has not already been released by
1100 * vm_object_destroy, we need to terminate it and
1101 * release our reference to it here.
1c79356b 1102 */
0b4e3aa0
A
1103 if (pager != MEMORY_OBJECT_NULL) {
1104 vm_object_unlock(object);
1105 vm_object_release_pager(pager);
1106 vm_object_lock(object);
1c79356b 1107 }
0b4e3aa0 1108
1c79356b 1109 /* kick off anyone waiting on terminating */
0b4e3aa0 1110 object->terminating = FALSE;
1c79356b
A
1111 vm_object_paging_begin(object);
1112 vm_object_paging_end(object);
1113 vm_object_unlock(object);
1114
1115#if MACH_PAGEMAP
1116 vm_external_destroy(object->existence_map, object->size);
1117#endif /* MACH_PAGEMAP */
1118
1119 /*
1120 * Free the space for the object.
1121 */
91447636 1122 zfree(vm_object_zone, object);
1c79356b
A
1123 return KERN_SUCCESS;
1124}
1125
1126/*
1127 * Routine: vm_object_pager_wakeup
1128 * Purpose: Wake up anyone waiting for termination of a pager.
1129 */
1130
0b4e3aa0 1131static void
1c79356b 1132vm_object_pager_wakeup(
0b4e3aa0 1133 memory_object_t pager)
1c79356b
A
1134{
1135 vm_object_hash_entry_t entry;
1136 boolean_t waiting = FALSE;
1137
1138 /*
1139 * If anyone was waiting for the memory_object_terminate
1140 * to be queued, wake them up now.
1141 */
1142 vm_object_cache_lock();
1143 entry = vm_object_hash_lookup(pager, TRUE);
1144 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1145 waiting = entry->waiting;
1146 vm_object_cache_unlock();
1147 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1148 if (waiting)
1149 thread_wakeup((event_t) pager);
1150 vm_object_hash_entry_free(entry);
1151 }
1152}
1153
1154/*
0b4e3aa0
A
1155 * Routine: vm_object_release_pager
1156 * Purpose: Terminate the pager and, upon completion,
1157 * release our last reference to it.
1158 * just like memory_object_terminate, except
1159 * that we wake up anyone blocked in vm_object_enter
1160 * waiting for termination message to be queued
1161 * before calling memory_object_init.
1c79356b 1162 */
0b4e3aa0
A
1163static void
1164vm_object_release_pager(
1165 memory_object_t pager)
1c79356b 1166{
1c79356b 1167
0b4e3aa0
A
1168 /*
1169 * Terminate the pager.
1170 */
1c79356b 1171
0b4e3aa0 1172 (void) memory_object_terminate(pager);
1c79356b 1173
0b4e3aa0
A
1174 /*
1175 * Wakeup anyone waiting for this terminate
1176 */
1177 vm_object_pager_wakeup(pager);
1c79356b 1178
0b4e3aa0
A
1179 /*
1180 * Release reference to pager.
1181 */
1182 memory_object_deallocate(pager);
1183}
1c79356b 1184
1c79356b 1185/*
0b4e3aa0 1186 * Routine: vm_object_destroy
1c79356b 1187 * Purpose:
0b4e3aa0 1188 * Shut down a VM object, despite the
1c79356b
A
1189 * presence of address map (or other) references
1190 * to the vm_object.
1191 */
1192kern_return_t
0b4e3aa0
A
1193vm_object_destroy(
1194 vm_object_t object,
91447636 1195 __unused kern_return_t reason)
1c79356b 1196{
0b4e3aa0 1197 memory_object_t old_pager;
1c79356b
A
1198
1199 if (object == VM_OBJECT_NULL)
1200 return(KERN_SUCCESS);
1201
1202 /*
0b4e3aa0 1203 * Remove the pager association immediately.
1c79356b
A
1204 *
1205 * This will prevent the memory manager from further
1206 * meddling. [If it wanted to flush data or make
1207 * other changes, it should have done so before performing
1208 * the destroy call.]
1209 */
1210
1211 vm_object_cache_lock();
1212 vm_object_lock(object);
1c79356b
A
1213 object->can_persist = FALSE;
1214 object->named = FALSE;
0b4e3aa0 1215 object->alive = FALSE;
1c79356b
A
1216
1217 /*
0b4e3aa0 1218 * Rip out the pager from the vm_object now...
1c79356b
A
1219 */
1220
0b4e3aa0
A
1221 vm_object_remove(object);
1222 old_pager = object->pager;
1223 object->pager = MEMORY_OBJECT_NULL;
1224 if (old_pager != MEMORY_OBJECT_NULL)
91447636 1225 memory_object_control_disable(object->pager_control);
0b4e3aa0 1226 vm_object_cache_unlock();
1c79356b
A
1227
1228 /*
0b4e3aa0
A
1229 * Wait for the existing paging activity (that got
1230 * through before we nulled out the pager) to subside.
1c79356b
A
1231 */
1232
1233 vm_object_paging_wait(object, THREAD_UNINT);
1234 vm_object_unlock(object);
1235
1236 /*
0b4e3aa0 1237 * Terminate the object now.
1c79356b 1238 */
0b4e3aa0
A
1239 if (old_pager != MEMORY_OBJECT_NULL) {
1240 vm_object_release_pager(old_pager);
1241
1242 /*
1243 * JMM - Release the caller's reference. This assumes the
1244 * caller had a reference to release, which is a big (but
1245 * currently valid) assumption if this is driven from the
1246 * vnode pager (it is holding a named reference when making
1247 * this call)..
1248 */
1249 vm_object_deallocate(object);
1c79356b 1250
1c79356b 1251 }
1c79356b
A
1252 return(KERN_SUCCESS);
1253}
1254
1255/*
1256 * vm_object_deactivate_pages
1257 *
1258 * Deactivate all pages in the specified object. (Keep its pages
1259 * in memory even though it is no longer referenced.)
1260 *
1261 * The object must be locked.
1262 */
0b4e3aa0
A
1263static void
1264vm_object_deactivate_all_pages(
1c79356b
A
1265 register vm_object_t object)
1266{
1267 register vm_page_t p;
1268
1269 queue_iterate(&object->memq, p, vm_page_t, listq) {
1270 vm_page_lock_queues();
1271 if (!p->busy)
1272 vm_page_deactivate(p);
1273 vm_page_unlock_queues();
1274 }
1275}
1276
0b4e3aa0
A
1277__private_extern__ void
1278vm_object_deactivate_pages(
1279 vm_object_t object,
1280 vm_object_offset_t offset,
1281 vm_object_size_t size,
1282 boolean_t kill_page)
1283{
1284 vm_object_t orig_object;
1285 int pages_moved = 0;
1286 int pages_found = 0;
1287
1288 /*
1289 * entered with object lock held, acquire a paging reference to
1290 * prevent the memory_object and control ports from
1291 * being destroyed.
1292 */
1293 orig_object = object;
1294
1295 for (;;) {
1296 register vm_page_t m;
1297 vm_object_offset_t toffset;
1298 vm_object_size_t tsize;
1299
1300 vm_object_paging_begin(object);
1301 vm_page_lock_queues();
1302
1303 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1304
1305 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1306
1307 pages_found++;
1308
1309 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1310
91447636
A
1311 assert(!m->laundry);
1312
0b4e3aa0 1313 m->reference = FALSE;
55e303ae 1314 pmap_clear_reference(m->phys_page);
0b4e3aa0
A
1315
1316 if ((kill_page) && (object->internal)) {
1317 m->precious = FALSE;
1318 m->dirty = FALSE;
55e303ae 1319 pmap_clear_modify(m->phys_page);
0b4e3aa0
A
1320 vm_external_state_clr(object->existence_map, offset);
1321 }
1322 VM_PAGE_QUEUES_REMOVE(m);
1323
91447636
A
1324 assert(!m->laundry);
1325 assert(m->object != kernel_object);
1326 assert(m->pageq.next == NULL &&
1327 m->pageq.prev == NULL);
9bccf70c
A
1328 if(m->zero_fill) {
1329 queue_enter_first(
1330 &vm_page_queue_zf,
1331 m, vm_page_t, pageq);
1332 } else {
1333 queue_enter_first(
1334 &vm_page_queue_inactive,
1335 m, vm_page_t, pageq);
1336 }
0b4e3aa0
A
1337
1338 m->inactive = TRUE;
1339 if (!m->fictitious)
1340 vm_page_inactive_count++;
1341
1342 pages_moved++;
1343 }
1344 }
1345 }
1346 vm_page_unlock_queues();
1347 vm_object_paging_end(object);
1348
1349 if (object->shadow) {
1350 vm_object_t tmp_object;
1351
1352 kill_page = 0;
1353
1354 offset += object->shadow_offset;
1355
1356 tmp_object = object->shadow;
1357 vm_object_lock(tmp_object);
1358
1359 if (object != orig_object)
1360 vm_object_unlock(object);
1361 object = tmp_object;
1362 } else
1363 break;
1364 }
1365 if (object != orig_object)
1366 vm_object_unlock(object);
1367}
1c79356b
A
1368
1369/*
1370 * Routine: vm_object_pmap_protect
1371 *
1372 * Purpose:
1373 * Reduces the permission for all physical
1374 * pages in the specified object range.
1375 *
1376 * If removing write permission only, it is
1377 * sufficient to protect only the pages in
1378 * the top-level object; only those pages may
1379 * have write permission.
1380 *
1381 * If removing all access, we must follow the
1382 * shadow chain from the top-level object to
1383 * remove access to all pages in shadowed objects.
1384 *
1385 * The object must *not* be locked. The object must
1386 * be temporary/internal.
1387 *
1388 * If pmap is not NULL, this routine assumes that
1389 * the only mappings for the pages are in that
1390 * pmap.
1391 */
1392
0b4e3aa0 1393__private_extern__ void
1c79356b
A
1394vm_object_pmap_protect(
1395 register vm_object_t object,
1396 register vm_object_offset_t offset,
91447636 1397 vm_object_size_t size,
1c79356b 1398 pmap_t pmap,
91447636 1399 vm_map_offset_t pmap_start,
1c79356b
A
1400 vm_prot_t prot)
1401{
1402 if (object == VM_OBJECT_NULL)
1403 return;
91447636
A
1404 size = vm_object_round_page(size);
1405 offset = vm_object_trunc_page(offset);
1c79356b
A
1406
1407 vm_object_lock(object);
1408
55e303ae 1409 assert(object->internal);
de355530 1410
1c79356b 1411 while (TRUE) {
91447636 1412 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1c79356b
A
1413 vm_object_unlock(object);
1414 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1415 return;
1416 }
1417
9bccf70c
A
1418 /* if we are doing large ranges with respect to resident */
1419 /* page count then we should interate over pages otherwise */
1420 /* inverse page look-up will be faster */
91447636 1421 if (ptoa_64(object->resident_page_count / 4) < size) {
9bccf70c
A
1422 vm_page_t p;
1423 vm_object_offset_t end;
1c79356b
A
1424
1425 end = offset + size;
1426
1427 if (pmap != PMAP_NULL) {
1428 queue_iterate(&object->memq, p, vm_page_t, listq) {
1429 if (!p->fictitious &&
1430 (offset <= p->offset) && (p->offset < end)) {
91447636 1431 vm_map_offset_t start;
1c79356b 1432
91447636
A
1433 start = pmap_start + p->offset - offset;
1434 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1c79356b
A
1435 }
1436 }
1437 } else {
1438 queue_iterate(&object->memq, p, vm_page_t, listq) {
1439 if (!p->fictitious &&
1440 (offset <= p->offset) && (p->offset < end)) {
1441
55e303ae 1442 pmap_page_protect(p->phys_page,
1c79356b
A
1443 prot & ~p->page_lock);
1444 }
1445 }
1446 }
9bccf70c
A
1447 } else {
1448 vm_page_t p;
1449 vm_object_offset_t end;
1450 vm_object_offset_t target_off;
1451
1452 end = offset + size;
1453
1454 if (pmap != PMAP_NULL) {
1455 for(target_off = offset;
91447636
A
1456 target_off < end;
1457 target_off += PAGE_SIZE) {
1458 p = vm_page_lookup(object, target_off);
1459 if (p != VM_PAGE_NULL) {
1460 vm_offset_t start;
1461 start = pmap_start +
9bccf70c
A
1462 (vm_offset_t)(p->offset - offset);
1463 pmap_protect(pmap, start,
1464 start + PAGE_SIZE, prot);
1465 }
1466 }
1467 } else {
1468 for(target_off = offset;
1469 target_off < end; target_off += PAGE_SIZE) {
91447636
A
1470 p = vm_page_lookup(object, target_off);
1471 if (p != VM_PAGE_NULL) {
55e303ae 1472 pmap_page_protect(p->phys_page,
9bccf70c
A
1473 prot & ~p->page_lock);
1474 }
1475 }
1476 }
1477 }
1c79356b
A
1478
1479 if (prot == VM_PROT_NONE) {
1480 /*
1481 * Must follow shadow chain to remove access
1482 * to pages in shadowed objects.
1483 */
1484 register vm_object_t next_object;
1485
1486 next_object = object->shadow;
1487 if (next_object != VM_OBJECT_NULL) {
1488 offset += object->shadow_offset;
1489 vm_object_lock(next_object);
1490 vm_object_unlock(object);
1491 object = next_object;
1492 }
1493 else {
1494 /*
1495 * End of chain - we are done.
1496 */
1497 break;
1498 }
1499 }
1500 else {
1501 /*
1502 * Pages in shadowed objects may never have
1503 * write permission - we may stop here.
1504 */
1505 break;
1506 }
1507 }
1508
1509 vm_object_unlock(object);
1510}
1511
1512/*
1513 * Routine: vm_object_copy_slowly
1514 *
1515 * Description:
1516 * Copy the specified range of the source
1517 * virtual memory object without using
1518 * protection-based optimizations (such
1519 * as copy-on-write). The pages in the
1520 * region are actually copied.
1521 *
1522 * In/out conditions:
1523 * The caller must hold a reference and a lock
1524 * for the source virtual memory object. The source
1525 * object will be returned *unlocked*.
1526 *
1527 * Results:
1528 * If the copy is completed successfully, KERN_SUCCESS is
1529 * returned. If the caller asserted the interruptible
1530 * argument, and an interruption occurred while waiting
1531 * for a user-generated event, MACH_SEND_INTERRUPTED is
1532 * returned. Other values may be returned to indicate
1533 * hard errors during the copy operation.
1534 *
1535 * A new virtual memory object is returned in a
1536 * parameter (_result_object). The contents of this
1537 * new object, starting at a zero offset, are a copy
1538 * of the source memory region. In the event of
1539 * an error, this parameter will contain the value
1540 * VM_OBJECT_NULL.
1541 */
0b4e3aa0 1542__private_extern__ kern_return_t
1c79356b
A
1543vm_object_copy_slowly(
1544 register vm_object_t src_object,
1545 vm_object_offset_t src_offset,
1546 vm_object_size_t size,
1547 boolean_t interruptible,
1548 vm_object_t *_result_object) /* OUT */
1549{
1550 vm_object_t new_object;
1551 vm_object_offset_t new_offset;
1552
1553 vm_object_offset_t src_lo_offset = src_offset;
1554 vm_object_offset_t src_hi_offset = src_offset + size;
1555
1556 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1557 src_object, src_offset, size, 0, 0);
1558
1559 if (size == 0) {
1560 vm_object_unlock(src_object);
1561 *_result_object = VM_OBJECT_NULL;
1562 return(KERN_INVALID_ARGUMENT);
1563 }
1564
1565 /*
1566 * Prevent destruction of the source object while we copy.
1567 */
1568
1569 assert(src_object->ref_count > 0);
1570 src_object->ref_count++;
1571 VM_OBJ_RES_INCR(src_object);
1572 vm_object_unlock(src_object);
1573
1574 /*
1575 * Create a new object to hold the copied pages.
1576 * A few notes:
1577 * We fill the new object starting at offset 0,
1578 * regardless of the input offset.
1579 * We don't bother to lock the new object within
1580 * this routine, since we have the only reference.
1581 */
1582
1583 new_object = vm_object_allocate(size);
1584 new_offset = 0;
91447636 1585 vm_object_lock(new_object);
1c79356b
A
1586
1587 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1588
1589 for ( ;
1590 size != 0 ;
1591 src_offset += PAGE_SIZE_64,
1592 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1593 ) {
1594 vm_page_t new_page;
1595 vm_fault_return_t result;
1596
1597 while ((new_page = vm_page_alloc(new_object, new_offset))
1598 == VM_PAGE_NULL) {
1599 if (!vm_page_wait(interruptible)) {
91447636 1600 vm_object_unlock(new_object);
1c79356b 1601 vm_object_deallocate(new_object);
91447636 1602 vm_object_deallocate(src_object);
1c79356b
A
1603 *_result_object = VM_OBJECT_NULL;
1604 return(MACH_SEND_INTERRUPTED);
1605 }
1606 }
1607
1608 do {
1609 vm_prot_t prot = VM_PROT_READ;
1610 vm_page_t _result_page;
1611 vm_page_t top_page;
1612 register
1613 vm_page_t result_page;
1614 kern_return_t error_code;
1615
1616 vm_object_lock(src_object);
1617 vm_object_paging_begin(src_object);
1618
1619 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1620 result = vm_fault_page(src_object, src_offset,
1621 VM_PROT_READ, FALSE, interruptible,
1622 src_lo_offset, src_hi_offset,
1623 VM_BEHAVIOR_SEQUENTIAL,
1624 &prot, &_result_page, &top_page,
1625 (int *)0,
0b4e3aa0 1626 &error_code, FALSE, FALSE, NULL, 0);
1c79356b
A
1627
1628 switch(result) {
1629 case VM_FAULT_SUCCESS:
1630 result_page = _result_page;
1631
1632 /*
1633 * We don't need to hold the object
1634 * lock -- the busy page will be enough.
1635 * [We don't care about picking up any
1636 * new modifications.]
1637 *
1638 * Copy the page to the new object.
1639 *
1640 * POLICY DECISION:
1641 * If result_page is clean,
1642 * we could steal it instead
1643 * of copying.
1644 */
1645
1646 vm_object_unlock(result_page->object);
1647 vm_page_copy(result_page, new_page);
1648
1649 /*
1650 * Let go of both pages (make them
1651 * not busy, perform wakeup, activate).
1652 */
1653
1654 new_page->busy = FALSE;
1655 new_page->dirty = TRUE;
1656 vm_object_lock(result_page->object);
1657 PAGE_WAKEUP_DONE(result_page);
1658
1659 vm_page_lock_queues();
1660 if (!result_page->active &&
1661 !result_page->inactive)
1662 vm_page_activate(result_page);
1663 vm_page_activate(new_page);
1664 vm_page_unlock_queues();
1665
1666 /*
1667 * Release paging references and
1668 * top-level placeholder page, if any.
1669 */
1670
1671 vm_fault_cleanup(result_page->object,
1672 top_page);
1673
1674 break;
1675
1676 case VM_FAULT_RETRY:
1677 break;
1678
1679 case VM_FAULT_FICTITIOUS_SHORTAGE:
1680 vm_page_more_fictitious();
1681 break;
1682
1683 case VM_FAULT_MEMORY_SHORTAGE:
1684 if (vm_page_wait(interruptible))
1685 break;
1686 /* fall thru */
1687
1688 case VM_FAULT_INTERRUPTED:
1689 vm_page_free(new_page);
91447636 1690 vm_object_unlock(new_object);
1c79356b
A
1691 vm_object_deallocate(new_object);
1692 vm_object_deallocate(src_object);
1693 *_result_object = VM_OBJECT_NULL;
1694 return(MACH_SEND_INTERRUPTED);
1695
1696 case VM_FAULT_MEMORY_ERROR:
1697 /*
1698 * A policy choice:
1699 * (a) ignore pages that we can't
1700 * copy
1701 * (b) return the null object if
1702 * any page fails [chosen]
1703 */
1704
1705 vm_page_lock_queues();
1706 vm_page_free(new_page);
1707 vm_page_unlock_queues();
91447636 1708 vm_object_unlock(new_object);
1c79356b
A
1709 vm_object_deallocate(new_object);
1710 vm_object_deallocate(src_object);
1711 *_result_object = VM_OBJECT_NULL;
1712 return(error_code ? error_code:
1713 KERN_MEMORY_ERROR);
1714 }
1715 } while (result != VM_FAULT_SUCCESS);
1716 }
1717
1718 /*
1719 * Lose the extra reference, and return our object.
1720 */
1721
91447636 1722 vm_object_unlock(new_object);
1c79356b
A
1723 vm_object_deallocate(src_object);
1724 *_result_object = new_object;
1725 return(KERN_SUCCESS);
1726}
1727
1728/*
1729 * Routine: vm_object_copy_quickly
1730 *
1731 * Purpose:
1732 * Copy the specified range of the source virtual
1733 * memory object, if it can be done without waiting
1734 * for user-generated events.
1735 *
1736 * Results:
1737 * If the copy is successful, the copy is returned in
1738 * the arguments; otherwise, the arguments are not
1739 * affected.
1740 *
1741 * In/out conditions:
1742 * The object should be unlocked on entry and exit.
1743 */
1744
1745/*ARGSUSED*/
0b4e3aa0 1746__private_extern__ boolean_t
1c79356b
A
1747vm_object_copy_quickly(
1748 vm_object_t *_object, /* INOUT */
91447636
A
1749 __unused vm_object_offset_t offset, /* IN */
1750 __unused vm_object_size_t size, /* IN */
1c79356b
A
1751 boolean_t *_src_needs_copy, /* OUT */
1752 boolean_t *_dst_needs_copy) /* OUT */
1753{
1754 vm_object_t object = *_object;
1755 memory_object_copy_strategy_t copy_strategy;
1756
1757 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1758 *_object, offset, size, 0, 0);
1759 if (object == VM_OBJECT_NULL) {
1760 *_src_needs_copy = FALSE;
1761 *_dst_needs_copy = FALSE;
1762 return(TRUE);
1763 }
1764
1765 vm_object_lock(object);
1766
1767 copy_strategy = object->copy_strategy;
1768
1769 switch (copy_strategy) {
1770 case MEMORY_OBJECT_COPY_SYMMETRIC:
1771
1772 /*
1773 * Symmetric copy strategy.
1774 * Make another reference to the object.
1775 * Leave object/offset unchanged.
1776 */
1777
1778 assert(object->ref_count > 0);
1779 object->ref_count++;
1780 vm_object_res_reference(object);
1781 object->shadowed = TRUE;
1782 vm_object_unlock(object);
1783
1784 /*
1785 * Both source and destination must make
1786 * shadows, and the source must be made
1787 * read-only if not already.
1788 */
1789
1790 *_src_needs_copy = TRUE;
1791 *_dst_needs_copy = TRUE;
1792
1793 break;
1794
1795 case MEMORY_OBJECT_COPY_DELAY:
1796 vm_object_unlock(object);
1797 return(FALSE);
1798
1799 default:
1800 vm_object_unlock(object);
1801 return(FALSE);
1802 }
1803 return(TRUE);
1804}
1805
0b4e3aa0
A
1806static int copy_call_count = 0;
1807static int copy_call_sleep_count = 0;
1808static int copy_call_restart_count = 0;
1c79356b
A
1809
1810/*
1811 * Routine: vm_object_copy_call [internal]
1812 *
1813 * Description:
1814 * Copy the source object (src_object), using the
1815 * user-managed copy algorithm.
1816 *
1817 * In/out conditions:
1818 * The source object must be locked on entry. It
1819 * will be *unlocked* on exit.
1820 *
1821 * Results:
1822 * If the copy is successful, KERN_SUCCESS is returned.
1823 * A new object that represents the copied virtual
1824 * memory is returned in a parameter (*_result_object).
1825 * If the return value indicates an error, this parameter
1826 * is not valid.
1827 */
0b4e3aa0 1828static kern_return_t
1c79356b
A
1829vm_object_copy_call(
1830 vm_object_t src_object,
1831 vm_object_offset_t src_offset,
1832 vm_object_size_t size,
1833 vm_object_t *_result_object) /* OUT */
1834{
1835 kern_return_t kr;
1836 vm_object_t copy;
1837 boolean_t check_ready = FALSE;
1838
1839 /*
1840 * If a copy is already in progress, wait and retry.
1841 *
1842 * XXX
1843 * Consider making this call interruptable, as Mike
1844 * intended it to be.
1845 *
1846 * XXXO
1847 * Need a counter or version or something to allow
1848 * us to use the copy that the currently requesting
1849 * thread is obtaining -- is it worth adding to the
1850 * vm object structure? Depends how common this case it.
1851 */
1852 copy_call_count++;
1853 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 1854 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 1855 THREAD_UNINT);
1c79356b
A
1856 copy_call_restart_count++;
1857 }
1858
1859 /*
1860 * Indicate (for the benefit of memory_object_create_copy)
1861 * that we want a copy for src_object. (Note that we cannot
1862 * do a real assert_wait before calling memory_object_copy,
1863 * so we simply set the flag.)
1864 */
1865
1866 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1867 vm_object_unlock(src_object);
1868
1869 /*
1870 * Ask the memory manager to give us a memory object
1871 * which represents a copy of the src object.
1872 * The memory manager may give us a memory object
1873 * which we already have, or it may give us a
1874 * new memory object. This memory object will arrive
1875 * via memory_object_create_copy.
1876 */
1877
1878 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1879 if (kr != KERN_SUCCESS) {
1880 return kr;
1881 }
1882
1883 /*
1884 * Wait for the copy to arrive.
1885 */
1886 vm_object_lock(src_object);
1887 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 1888 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 1889 THREAD_UNINT);
1c79356b
A
1890 copy_call_sleep_count++;
1891 }
1892Retry:
1893 assert(src_object->copy != VM_OBJECT_NULL);
1894 copy = src_object->copy;
1895 if (!vm_object_lock_try(copy)) {
1896 vm_object_unlock(src_object);
1897 mutex_pause(); /* wait a bit */
1898 vm_object_lock(src_object);
1899 goto Retry;
1900 }
1901 if (copy->size < src_offset+size)
1902 copy->size = src_offset+size;
1903
1904 if (!copy->pager_ready)
1905 check_ready = TRUE;
1906
1907 /*
1908 * Return the copy.
1909 */
1910 *_result_object = copy;
1911 vm_object_unlock(copy);
1912 vm_object_unlock(src_object);
1913
1914 /* Wait for the copy to be ready. */
1915 if (check_ready == TRUE) {
1916 vm_object_lock(copy);
1917 while (!copy->pager_ready) {
9bccf70c 1918 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1c79356b
A
1919 }
1920 vm_object_unlock(copy);
1921 }
1922
1923 return KERN_SUCCESS;
1924}
1925
0b4e3aa0
A
1926static int copy_delayed_lock_collisions = 0;
1927static int copy_delayed_max_collisions = 0;
1928static int copy_delayed_lock_contention = 0;
1929static int copy_delayed_protect_iterate = 0;
1c79356b
A
1930
1931/*
1932 * Routine: vm_object_copy_delayed [internal]
1933 *
1934 * Description:
1935 * Copy the specified virtual memory object, using
1936 * the asymmetric copy-on-write algorithm.
1937 *
1938 * In/out conditions:
55e303ae
A
1939 * The src_object must be locked on entry. It will be unlocked
1940 * on exit - so the caller must also hold a reference to it.
1c79356b
A
1941 *
1942 * This routine will not block waiting for user-generated
1943 * events. It is not interruptible.
1944 */
0b4e3aa0 1945__private_extern__ vm_object_t
1c79356b
A
1946vm_object_copy_delayed(
1947 vm_object_t src_object,
1948 vm_object_offset_t src_offset,
1949 vm_object_size_t size)
1950{
1951 vm_object_t new_copy = VM_OBJECT_NULL;
1952 vm_object_t old_copy;
1953 vm_page_t p;
55e303ae 1954 vm_object_size_t copy_size = src_offset + size;
1c79356b
A
1955
1956 int collisions = 0;
1957 /*
1958 * The user-level memory manager wants to see all of the changes
1959 * to this object, but it has promised not to make any changes on
1960 * its own.
1961 *
1962 * Perform an asymmetric copy-on-write, as follows:
1963 * Create a new object, called a "copy object" to hold
1964 * pages modified by the new mapping (i.e., the copy,
1965 * not the original mapping).
1966 * Record the original object as the backing object for
1967 * the copy object. If the original mapping does not
1968 * change a page, it may be used read-only by the copy.
1969 * Record the copy object in the original object.
1970 * When the original mapping causes a page to be modified,
1971 * it must be copied to a new page that is "pushed" to
1972 * the copy object.
1973 * Mark the new mapping (the copy object) copy-on-write.
1974 * This makes the copy object itself read-only, allowing
1975 * it to be reused if the original mapping makes no
1976 * changes, and simplifying the synchronization required
1977 * in the "push" operation described above.
1978 *
1979 * The copy-on-write is said to be assymetric because the original
1980 * object is *not* marked copy-on-write. A copied page is pushed
1981 * to the copy object, regardless which party attempted to modify
1982 * the page.
1983 *
1984 * Repeated asymmetric copy operations may be done. If the
1985 * original object has not been changed since the last copy, its
1986 * copy object can be reused. Otherwise, a new copy object can be
1987 * inserted between the original object and its previous copy
1988 * object. Since any copy object is read-only, this cannot affect
1989 * affect the contents of the previous copy object.
1990 *
1991 * Note that a copy object is higher in the object tree than the
1992 * original object; therefore, use of the copy object recorded in
1993 * the original object must be done carefully, to avoid deadlock.
1994 */
1995
1996 Retry:
1c79356b 1997
55e303ae
A
1998 /*
1999 * Wait for paging in progress.
2000 */
2001 if (!src_object->true_share)
2002 vm_object_paging_wait(src_object, THREAD_UNINT);
2003
1c79356b
A
2004 /*
2005 * See whether we can reuse the result of a previous
2006 * copy operation.
2007 */
2008
2009 old_copy = src_object->copy;
2010 if (old_copy != VM_OBJECT_NULL) {
2011 /*
2012 * Try to get the locks (out of order)
2013 */
2014 if (!vm_object_lock_try(old_copy)) {
2015 vm_object_unlock(src_object);
2016 mutex_pause();
2017
2018 /* Heisenberg Rules */
2019 copy_delayed_lock_collisions++;
2020 if (collisions++ == 0)
2021 copy_delayed_lock_contention++;
2022
2023 if (collisions > copy_delayed_max_collisions)
2024 copy_delayed_max_collisions = collisions;
2025
55e303ae 2026 vm_object_lock(src_object);
1c79356b
A
2027 goto Retry;
2028 }
2029
2030 /*
2031 * Determine whether the old copy object has
2032 * been modified.
2033 */
2034
2035 if (old_copy->resident_page_count == 0 &&
2036 !old_copy->pager_created) {
2037 /*
2038 * It has not been modified.
2039 *
2040 * Return another reference to
55e303ae
A
2041 * the existing copy-object if
2042 * we can safely grow it (if
2043 * needed).
de355530 2044 */
1c79356b 2045
55e303ae
A
2046 if (old_copy->size < copy_size) {
2047 /*
2048 * We can't perform a delayed copy if any of the
2049 * pages in the extended range are wired (because
2050 * we can't safely take write permission away from
2051 * wired pages). If the pages aren't wired, then
2052 * go ahead and protect them.
2053 */
2054 copy_delayed_protect_iterate++;
2055 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2056 if (!p->fictitious &&
2057 p->offset >= old_copy->size &&
2058 p->offset < copy_size) {
2059 if (p->wire_count > 0) {
2060 vm_object_unlock(old_copy);
2061 vm_object_unlock(src_object);
91447636
A
2062
2063 if (new_copy != VM_OBJECT_NULL) {
2064 vm_object_unlock(new_copy);
2065 vm_object_deallocate(new_copy);
2066 }
2067
55e303ae
A
2068 return VM_OBJECT_NULL;
2069 } else {
2070 pmap_page_protect(p->phys_page,
2071 (VM_PROT_ALL & ~VM_PROT_WRITE &
2072 ~p->page_lock));
2073 }
2074 }
2075 }
2076 old_copy->size = copy_size;
2077 }
2078
2079 vm_object_reference_locked(old_copy);
d7e50217
A
2080 vm_object_unlock(old_copy);
2081 vm_object_unlock(src_object);
91447636
A
2082
2083 if (new_copy != VM_OBJECT_NULL) {
2084 vm_object_unlock(new_copy);
2085 vm_object_deallocate(new_copy);
2086 }
2087
55e303ae 2088 return(old_copy);
d7e50217 2089 }
de355530
A
2090
2091 /*
2092 * Adjust the size argument so that the newly-created
2093 * copy object will be large enough to back either the
55e303ae 2094 * old copy object or the new mapping.
de355530 2095 */
55e303ae
A
2096 if (old_copy->size > copy_size)
2097 copy_size = old_copy->size;
2098
2099 if (new_copy == VM_OBJECT_NULL) {
2100 vm_object_unlock(old_copy);
2101 vm_object_unlock(src_object);
2102 new_copy = vm_object_allocate(copy_size);
2103 vm_object_lock(src_object);
2104 vm_object_lock(new_copy);
2105 goto Retry;
2106 }
2107 new_copy->size = copy_size;
1c79356b
A
2108
2109 /*
2110 * The copy-object is always made large enough to
2111 * completely shadow the original object, since
2112 * it may have several users who want to shadow
2113 * the original object at different points.
2114 */
2115
2116 assert((old_copy->shadow == src_object) &&
2117 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2118
55e303ae
A
2119 } else if (new_copy == VM_OBJECT_NULL) {
2120 vm_object_unlock(src_object);
2121 new_copy = vm_object_allocate(copy_size);
2122 vm_object_lock(src_object);
2123 vm_object_lock(new_copy);
2124 goto Retry;
2125 }
2126
2127 /*
2128 * We now have the src object locked, and the new copy object
2129 * allocated and locked (and potentially the old copy locked).
2130 * Before we go any further, make sure we can still perform
2131 * a delayed copy, as the situation may have changed.
2132 *
2133 * Specifically, we can't perform a delayed copy if any of the
2134 * pages in the range are wired (because we can't safely take
2135 * write permission away from wired pages). If the pages aren't
2136 * wired, then go ahead and protect them.
2137 */
2138 copy_delayed_protect_iterate++;
2139 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2140 if (!p->fictitious && p->offset < copy_size) {
2141 if (p->wire_count > 0) {
2142 if (old_copy)
2143 vm_object_unlock(old_copy);
2144 vm_object_unlock(src_object);
2145 vm_object_unlock(new_copy);
2146 vm_object_deallocate(new_copy);
2147 return VM_OBJECT_NULL;
2148 } else {
2149 pmap_page_protect(p->phys_page,
2150 (VM_PROT_ALL & ~VM_PROT_WRITE &
2151 ~p->page_lock));
2152 }
2153 }
2154 }
2155
2156 if (old_copy != VM_OBJECT_NULL) {
1c79356b
A
2157 /*
2158 * Make the old copy-object shadow the new one.
2159 * It will receive no more pages from the original
2160 * object.
2161 */
2162
2163 src_object->ref_count--; /* remove ref. from old_copy */
2164 assert(src_object->ref_count > 0);
2165 old_copy->shadow = new_copy;
2166 assert(new_copy->ref_count > 0);
2167 new_copy->ref_count++; /* for old_copy->shadow ref. */
2168
2169#if TASK_SWAPPER
2170 if (old_copy->res_count) {
2171 VM_OBJ_RES_INCR(new_copy);
2172 VM_OBJ_RES_DECR(src_object);
2173 }
2174#endif
2175
2176 vm_object_unlock(old_copy); /* done with old_copy */
1c79356b
A
2177 }
2178
2179 /*
2180 * Point the new copy at the existing object.
2181 */
1c79356b
A
2182 new_copy->shadow = src_object;
2183 new_copy->shadow_offset = 0;
2184 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2185 assert(src_object->ref_count > 0);
2186 src_object->ref_count++;
2187 VM_OBJ_RES_INCR(src_object);
2188 src_object->copy = new_copy;
55e303ae 2189 vm_object_unlock(src_object);
1c79356b
A
2190 vm_object_unlock(new_copy);
2191
1c79356b
A
2192 XPR(XPR_VM_OBJECT,
2193 "vm_object_copy_delayed: used copy object %X for source %X\n",
2194 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2195
2196 return(new_copy);
2197}
2198
2199/*
2200 * Routine: vm_object_copy_strategically
2201 *
2202 * Purpose:
2203 * Perform a copy according to the source object's
2204 * declared strategy. This operation may block,
2205 * and may be interrupted.
2206 */
0b4e3aa0 2207__private_extern__ kern_return_t
1c79356b
A
2208vm_object_copy_strategically(
2209 register vm_object_t src_object,
2210 vm_object_offset_t src_offset,
2211 vm_object_size_t size,
2212 vm_object_t *dst_object, /* OUT */
2213 vm_object_offset_t *dst_offset, /* OUT */
2214 boolean_t *dst_needs_copy) /* OUT */
2215{
2216 boolean_t result;
2217 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2218 memory_object_copy_strategy_t copy_strategy;
2219
2220 assert(src_object != VM_OBJECT_NULL);
2221
2222 vm_object_lock(src_object);
2223
2224 /*
2225 * The copy strategy is only valid if the memory manager
2226 * is "ready". Internal objects are always ready.
2227 */
2228
2229 while (!src_object->internal && !src_object->pager_ready) {
9bccf70c 2230 wait_result_t wait_result;
1c79356b 2231
9bccf70c
A
2232 wait_result = vm_object_sleep( src_object,
2233 VM_OBJECT_EVENT_PAGER_READY,
2234 interruptible);
2235 if (wait_result != THREAD_AWAKENED) {
2236 vm_object_unlock(src_object);
1c79356b
A
2237 *dst_object = VM_OBJECT_NULL;
2238 *dst_offset = 0;
2239 *dst_needs_copy = FALSE;
2240 return(MACH_SEND_INTERRUPTED);
2241 }
1c79356b
A
2242 }
2243
2244 copy_strategy = src_object->copy_strategy;
2245
2246 /*
2247 * Use the appropriate copy strategy.
2248 */
2249
2250 switch (copy_strategy) {
55e303ae
A
2251 case MEMORY_OBJECT_COPY_DELAY:
2252 *dst_object = vm_object_copy_delayed(src_object,
2253 src_offset, size);
2254 if (*dst_object != VM_OBJECT_NULL) {
2255 *dst_offset = src_offset;
2256 *dst_needs_copy = TRUE;
2257 result = KERN_SUCCESS;
2258 break;
2259 }
2260 vm_object_lock(src_object);
2261 /* fall thru when delayed copy not allowed */
2262
1c79356b
A
2263 case MEMORY_OBJECT_COPY_NONE:
2264 result = vm_object_copy_slowly(src_object, src_offset, size,
2265 interruptible, dst_object);
2266 if (result == KERN_SUCCESS) {
2267 *dst_offset = 0;
2268 *dst_needs_copy = FALSE;
2269 }
2270 break;
2271
2272 case MEMORY_OBJECT_COPY_CALL:
2273 result = vm_object_copy_call(src_object, src_offset, size,
2274 dst_object);
2275 if (result == KERN_SUCCESS) {
2276 *dst_offset = src_offset;
2277 *dst_needs_copy = TRUE;
2278 }
2279 break;
2280
1c79356b
A
2281 case MEMORY_OBJECT_COPY_SYMMETRIC:
2282 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2283 vm_object_unlock(src_object);
2284 result = KERN_MEMORY_RESTART_COPY;
2285 break;
2286
2287 default:
2288 panic("copy_strategically: bad strategy");
2289 result = KERN_INVALID_ARGUMENT;
2290 }
2291 return(result);
2292}
2293
2294/*
2295 * vm_object_shadow:
2296 *
2297 * Create a new object which is backed by the
2298 * specified existing object range. The source
2299 * object reference is deallocated.
2300 *
2301 * The new object and offset into that object
2302 * are returned in the source parameters.
2303 */
2304boolean_t vm_object_shadow_check = FALSE;
2305
0b4e3aa0 2306__private_extern__ boolean_t
1c79356b
A
2307vm_object_shadow(
2308 vm_object_t *object, /* IN/OUT */
2309 vm_object_offset_t *offset, /* IN/OUT */
2310 vm_object_size_t length)
2311{
2312 register vm_object_t source;
2313 register vm_object_t result;
2314
2315 source = *object;
2316 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2317
2318 /*
2319 * Determine if we really need a shadow.
2320 */
2321
2322 if (vm_object_shadow_check && source->ref_count == 1 &&
2323 (source->shadow == VM_OBJECT_NULL ||
2324 source->shadow->copy == VM_OBJECT_NULL))
2325 {
2326 source->shadowed = FALSE;
2327 return FALSE;
2328 }
2329
2330 /*
2331 * Allocate a new object with the given length
2332 */
2333
2334 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2335 panic("vm_object_shadow: no object for shadowing");
2336
2337 /*
2338 * The new object shadows the source object, adding
2339 * a reference to it. Our caller changes his reference
2340 * to point to the new object, removing a reference to
2341 * the source object. Net result: no change of reference
2342 * count.
2343 */
2344 result->shadow = source;
2345
2346 /*
2347 * Store the offset into the source object,
2348 * and fix up the offset into the new object.
2349 */
2350
2351 result->shadow_offset = *offset;
2352
2353 /*
2354 * Return the new things
2355 */
2356
2357 *offset = 0;
2358 *object = result;
2359 return TRUE;
2360}
2361
2362/*
2363 * The relationship between vm_object structures and
0b4e3aa0 2364 * the memory_object requires careful synchronization.
1c79356b 2365 *
0b4e3aa0
A
2366 * All associations are created by memory_object_create_named
2367 * for external pagers and vm_object_pager_create for internal
2368 * objects as follows:
2369 *
2370 * pager: the memory_object itself, supplied by
1c79356b
A
2371 * the user requesting a mapping (or the kernel,
2372 * when initializing internal objects); the
2373 * kernel simulates holding send rights by keeping
2374 * a port reference;
0b4e3aa0 2375 *
1c79356b
A
2376 * pager_request:
2377 * the memory object control port,
2378 * created by the kernel; the kernel holds
2379 * receive (and ownership) rights to this
2380 * port, but no other references.
1c79356b
A
2381 *
2382 * When initialization is complete, the "initialized" field
2383 * is asserted. Other mappings using a particular memory object,
2384 * and any references to the vm_object gained through the
2385 * port association must wait for this initialization to occur.
2386 *
2387 * In order to allow the memory manager to set attributes before
2388 * requests (notably virtual copy operations, but also data or
2389 * unlock requests) are made, a "ready" attribute is made available.
2390 * Only the memory manager may affect the value of this attribute.
2391 * Its value does not affect critical kernel functions, such as
2392 * internal object initialization or destruction. [Furthermore,
2393 * memory objects created by the kernel are assumed to be ready
2394 * immediately; the default memory manager need not explicitly
2395 * set the "ready" attribute.]
2396 *
2397 * [Both the "initialized" and "ready" attribute wait conditions
2398 * use the "pager" field as the wait event.]
2399 *
2400 * The port associations can be broken down by any of the
2401 * following routines:
2402 * vm_object_terminate:
2403 * No references to the vm_object remain, and
2404 * the object cannot (or will not) be cached.
2405 * This is the normal case, and is done even
2406 * though one of the other cases has already been
2407 * done.
1c79356b
A
2408 * memory_object_destroy:
2409 * The memory manager has requested that the
0b4e3aa0
A
2410 * kernel relinquish references to the memory
2411 * object. [The memory manager may not want to
2412 * destroy the memory object, but may wish to
2413 * refuse or tear down existing memory mappings.]
2414 *
1c79356b
A
2415 * Each routine that breaks an association must break all of
2416 * them at once. At some later time, that routine must clear
0b4e3aa0 2417 * the pager field and release the memory object references.
1c79356b
A
2418 * [Furthermore, each routine must cope with the simultaneous
2419 * or previous operations of the others.]
2420 *
2421 * In addition to the lock on the object, the vm_object_cache_lock
0b4e3aa0
A
2422 * governs the associations. References gained through the
2423 * association require use of the cache lock.
1c79356b 2424 *
0b4e3aa0 2425 * Because the pager field may be cleared spontaneously, it
1c79356b
A
2426 * cannot be used to determine whether a memory object has
2427 * ever been associated with a particular vm_object. [This
2428 * knowledge is important to the shadow object mechanism.]
2429 * For this reason, an additional "created" attribute is
2430 * provided.
2431 *
0b4e3aa0
A
2432 * During various paging operations, the pager reference found in the
2433 * vm_object must be valid. To prevent this from being released,
1c79356b
A
2434 * (other than being removed, i.e., made null), routines may use
2435 * the vm_object_paging_begin/end routines [actually, macros].
2436 * The implementation uses the "paging_in_progress" and "wanted" fields.
0b4e3aa0 2437 * [Operations that alter the validity of the pager values include the
1c79356b
A
2438 * termination routines and vm_object_collapse.]
2439 */
2440
0b4e3aa0 2441#if 0
91447636
A
2442static void vm_object_abort_activity(
2443 vm_object_t object);
2444
2445/*
2446 * Routine: vm_object_abort_activity [internal use only]
2447 * Purpose:
2448 * Abort paging requests pending on this object.
2449 * In/out conditions:
2450 * The object is locked on entry and exit.
2451 */
2452static void
2453vm_object_abort_activity(
2454 vm_object_t object)
2455{
2456 register
2457 vm_page_t p;
2458 vm_page_t next;
2459
2460 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
2461 (integer_t)object, 0, 0, 0, 0);
2462
2463 /*
2464 * Abort all activity that would be waiting
2465 * for a result on this memory object.
2466 *
2467 * We could also choose to destroy all pages
2468 * that we have in memory for this object, but
2469 * we don't.
2470 */
2471
2472 p = (vm_page_t) queue_first(&object->memq);
2473 while (!queue_end(&object->memq, (queue_entry_t) p)) {
2474 next = (vm_page_t) queue_next(&p->listq);
2475
2476 /*
2477 * If it's being paged in, destroy it.
2478 * If an unlock has been requested, start it again.
2479 */
2480
2481 if (p->busy && p->absent) {
2482 VM_PAGE_FREE(p);
2483 }
2484 else {
2485 if (p->unlock_request != VM_PROT_NONE)
2486 p->unlock_request = VM_PROT_NONE;
2487 PAGE_WAKEUP(p);
2488 }
2489
2490 p = next;
2491 }
2492
2493 /*
2494 * Wake up threads waiting for the memory object to
2495 * become ready.
2496 */
2497
2498 object->pager_ready = TRUE;
2499 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2500}
2501
0b4e3aa0
A
2502/*
2503 * Routine: vm_object_pager_dead
2504 *
2505 * Purpose:
2506 * A port is being destroy, and the IPC kobject code
2507 * can't tell if it represents a pager port or not.
2508 * So this function is called each time it sees a port
2509 * die.
2510 * THIS IS HORRIBLY INEFFICIENT. We should only call
2511 * this routine if we had requested a notification on
2512 * the port.
2513 */
1c79356b 2514
0b4e3aa0
A
2515__private_extern__ void
2516vm_object_pager_dead(
1c79356b
A
2517 ipc_port_t pager)
2518{
2519 vm_object_t object;
2520 vm_object_hash_entry_t entry;
1c79356b
A
2521
2522 /*
2523 * Perform essentially the same operations as in vm_object_lookup,
2524 * except that this time we look up based on the memory_object
2525 * port, not the control port.
2526 */
2527 vm_object_cache_lock();
2528 entry = vm_object_hash_lookup(pager, FALSE);
2529 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2530 entry->object == VM_OBJECT_NULL) {
2531 vm_object_cache_unlock();
2532 return;
2533 }
2534
2535 object = entry->object;
2536 entry->object = VM_OBJECT_NULL;
2537
2538 vm_object_lock(object);
2539 if (object->ref_count == 0) {
2540 XPR(XPR_VM_OBJECT_CACHE,
2541 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2542 (integer_t)object,
2543 (integer_t)vm_object_cached_list.next,
2544 (integer_t)vm_object_cached_list.prev, 0,0);
2545
2546 queue_remove(&vm_object_cached_list, object,
2547 vm_object_t, cached_list);
2548 vm_object_cached_count--;
2549 }
2550 object->ref_count++;
2551 vm_object_res_reference(object);
2552
2553 object->can_persist = FALSE;
2554
2555 assert(object->pager == pager);
2556
2557 /*
0b4e3aa0 2558 * Remove the pager association.
1c79356b
A
2559 *
2560 * Note that the memory_object itself is dead, so
2561 * we don't bother with it.
2562 */
2563
0b4e3aa0 2564 object->pager = MEMORY_OBJECT_NULL;
1c79356b
A
2565
2566 vm_object_unlock(object);
2567 vm_object_cache_unlock();
2568
2569 vm_object_pager_wakeup(pager);
2570
2571 /*
0b4e3aa0 2572 * Release the pager reference. Note that there's no
1c79356b 2573 * point in trying the memory_object_terminate call
0b4e3aa0
A
2574 * because the memory_object itself is dead. Also
2575 * release the memory_object_control reference, since
2576 * the pager didn't do that either.
1c79356b
A
2577 */
2578
0b4e3aa0
A
2579 memory_object_deallocate(pager);
2580 memory_object_control_deallocate(object->pager_request);
2581
1c79356b
A
2582
2583 /*
2584 * Restart pending page requests
2585 */
2586 vm_object_lock(object);
1c79356b 2587 vm_object_abort_activity(object);
1c79356b
A
2588 vm_object_unlock(object);
2589
2590 /*
2591 * Lose the object reference.
2592 */
2593
2594 vm_object_deallocate(object);
2595}
0b4e3aa0 2596#endif
1c79356b
A
2597
2598/*
2599 * Routine: vm_object_enter
2600 * Purpose:
2601 * Find a VM object corresponding to the given
2602 * pager; if no such object exists, create one,
2603 * and initialize the pager.
2604 */
2605vm_object_t
2606vm_object_enter(
0b4e3aa0 2607 memory_object_t pager,
1c79356b
A
2608 vm_object_size_t size,
2609 boolean_t internal,
2610 boolean_t init,
0b4e3aa0 2611 boolean_t named)
1c79356b
A
2612{
2613 register vm_object_t object;
2614 vm_object_t new_object;
2615 boolean_t must_init;
1c79356b 2616 vm_object_hash_entry_t entry, new_entry;
1c79356b 2617
0b4e3aa0 2618 if (pager == MEMORY_OBJECT_NULL)
1c79356b
A
2619 return(vm_object_allocate(size));
2620
2621 new_object = VM_OBJECT_NULL;
2622 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2623 must_init = init;
2624
2625 /*
2626 * Look for an object associated with this port.
2627 */
2628
1c79356b 2629 vm_object_cache_lock();
55e303ae 2630 do {
1c79356b
A
2631 entry = vm_object_hash_lookup(pager, FALSE);
2632
55e303ae
A
2633 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2634 if (new_object == VM_OBJECT_NULL) {
2635 /*
2636 * We must unlock to create a new object;
2637 * if we do so, we must try the lookup again.
2638 */
2639 vm_object_cache_unlock();
2640 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2641 new_entry = vm_object_hash_entry_alloc(pager);
2642 new_object = vm_object_allocate(size);
2643 vm_object_cache_lock();
2644 } else {
2645 /*
2646 * Lookup failed twice, and we have something
2647 * to insert; set the object.
2648 */
2649 vm_object_hash_insert(new_entry);
2650 entry = new_entry;
2651 entry->object = new_object;
2652 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2653 new_object = VM_OBJECT_NULL;
2654 must_init = TRUE;
2655 }
2656 } else if (entry->object == VM_OBJECT_NULL) {
2657 /*
2658 * If a previous object is being terminated,
2659 * we must wait for the termination message
2660 * to be queued (and lookup the entry again).
2661 */
1c79356b 2662 entry->waiting = TRUE;
55e303ae 2663 entry = VM_OBJECT_HASH_ENTRY_NULL;
1c79356b
A
2664 assert_wait((event_t) pager, THREAD_UNINT);
2665 vm_object_cache_unlock();
91447636 2666 thread_block(THREAD_CONTINUE_NULL);
1c79356b 2667 vm_object_cache_lock();
1c79356b 2668 }
55e303ae 2669 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
1c79356b
A
2670
2671 object = entry->object;
2672 assert(object != VM_OBJECT_NULL);
2673
2674 if (!must_init) {
2675 vm_object_lock(object);
1c79356b 2676 assert(!internal || object->internal);
0b4e3aa0
A
2677 if (named) {
2678 assert(!object->named);
1c79356b 2679 object->named = TRUE;
0b4e3aa0 2680 }
1c79356b
A
2681 if (object->ref_count == 0) {
2682 XPR(XPR_VM_OBJECT_CACHE,
2683 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2684 (integer_t)object,
2685 (integer_t)vm_object_cached_list.next,
2686 (integer_t)vm_object_cached_list.prev, 0,0);
2687 queue_remove(&vm_object_cached_list, object,
2688 vm_object_t, cached_list);
2689 vm_object_cached_count--;
2690 }
2691 object->ref_count++;
2692 vm_object_res_reference(object);
2693 vm_object_unlock(object);
2694
2695 VM_STAT(hits++);
2696 }
2697 assert(object->ref_count > 0);
2698
2699 VM_STAT(lookups++);
2700
2701 vm_object_cache_unlock();
2702
2703 XPR(XPR_VM_OBJECT,
2704 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2705 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2706
2707 /*
2708 * If we raced to create a vm_object but lost, let's
2709 * throw away ours.
2710 */
2711
2712 if (new_object != VM_OBJECT_NULL)
2713 vm_object_deallocate(new_object);
2714
2715 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2716 vm_object_hash_entry_free(new_entry);
2717
2718 if (must_init) {
91447636 2719 memory_object_control_t control;
1c79356b
A
2720
2721 /*
2722 * Allocate request port.
2723 */
2724
91447636
A
2725 control = memory_object_control_allocate(object);
2726 assert (control != MEMORY_OBJECT_CONTROL_NULL);
1c79356b
A
2727
2728 vm_object_lock(object);
91447636 2729 assert(object != kernel_object);
1c79356b
A
2730
2731 /*
0b4e3aa0 2732 * Copy the reference we were given.
1c79356b
A
2733 */
2734
0b4e3aa0 2735 memory_object_reference(pager);
1c79356b
A
2736 object->pager_created = TRUE;
2737 object->pager = pager;
2738 object->internal = internal;
2739 object->pager_trusted = internal;
2740 if (!internal) {
2741 /* copy strategy invalid until set by memory manager */
2742 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2743 }
91447636 2744 object->pager_control = control;
1c79356b
A
2745 object->pager_ready = FALSE;
2746
1c79356b
A
2747 vm_object_unlock(object);
2748
2749 /*
2750 * Let the pager know we're using it.
2751 */
2752
0b4e3aa0 2753 (void) memory_object_init(pager,
91447636 2754 object->pager_control,
0b4e3aa0 2755 PAGE_SIZE);
1c79356b
A
2756
2757 vm_object_lock(object);
0b4e3aa0
A
2758 if (named)
2759 object->named = TRUE;
1c79356b
A
2760 if (internal) {
2761 object->pager_ready = TRUE;
2762 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2763 }
2764
2765 object->pager_initialized = TRUE;
2766 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2767 } else {
2768 vm_object_lock(object);
2769 }
2770
2771 /*
2772 * [At this point, the object must be locked]
2773 */
2774
2775 /*
2776 * Wait for the work above to be done by the first
2777 * thread to map this object.
2778 */
2779
2780 while (!object->pager_initialized) {
9bccf70c 2781 vm_object_sleep(object,
1c79356b
A
2782 VM_OBJECT_EVENT_INITIALIZED,
2783 THREAD_UNINT);
1c79356b
A
2784 }
2785 vm_object_unlock(object);
2786
2787 XPR(XPR_VM_OBJECT,
2788 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2789 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2790 return(object);
2791}
2792
2793/*
2794 * Routine: vm_object_pager_create
2795 * Purpose:
2796 * Create a memory object for an internal object.
2797 * In/out conditions:
2798 * The object is locked on entry and exit;
2799 * it may be unlocked within this call.
2800 * Limitations:
2801 * Only one thread may be performing a
2802 * vm_object_pager_create on an object at
2803 * a time. Presumably, only the pageout
2804 * daemon will be using this routine.
2805 */
2806
2807void
2808vm_object_pager_create(
2809 register vm_object_t object)
2810{
0b4e3aa0 2811 memory_object_t pager;
1c79356b
A
2812 vm_object_hash_entry_t entry;
2813#if MACH_PAGEMAP
2814 vm_object_size_t size;
2815 vm_external_map_t map;
2816#endif /* MACH_PAGEMAP */
2817
2818 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2819 (integer_t)object, 0,0,0,0);
2820
91447636
A
2821 assert(object != kernel_object);
2822
1c79356b
A
2823 if (memory_manager_default_check() != KERN_SUCCESS)
2824 return;
2825
2826 /*
2827 * Prevent collapse or termination by holding a paging reference
2828 */
2829
2830 vm_object_paging_begin(object);
2831 if (object->pager_created) {
2832 /*
2833 * Someone else got to it first...
2834 * wait for them to finish initializing the ports
2835 */
2836 while (!object->pager_initialized) {
9bccf70c
A
2837 vm_object_sleep(object,
2838 VM_OBJECT_EVENT_INITIALIZED,
2839 THREAD_UNINT);
1c79356b
A
2840 }
2841 vm_object_paging_end(object);
2842 return;
2843 }
2844
2845 /*
2846 * Indicate that a memory object has been assigned
2847 * before dropping the lock, to prevent a race.
2848 */
2849
2850 object->pager_created = TRUE;
2851 object->paging_offset = 0;
2852
2853#if MACH_PAGEMAP
2854 size = object->size;
2855#endif /* MACH_PAGEMAP */
2856 vm_object_unlock(object);
2857
2858#if MACH_PAGEMAP
2859 map = vm_external_create(size);
2860 vm_object_lock(object);
2861 assert(object->size == size);
2862 object->existence_map = map;
2863 vm_object_unlock(object);
2864#endif /* MACH_PAGEMAP */
2865
2866 /*
0b4e3aa0 2867 * Create the [internal] pager, and associate it with this object.
1c79356b 2868 *
0b4e3aa0 2869 * We make the association here so that vm_object_enter()
1c79356b
A
2870 * can look up the object to complete initializing it. No
2871 * user will ever map this object.
2872 */
2873 {
0b4e3aa0 2874 memory_object_default_t dmm;
1c79356b
A
2875 vm_size_t cluster_size;
2876
0b4e3aa0
A
2877 /* acquire a reference for the default memory manager */
2878 dmm = memory_manager_default_reference(&cluster_size);
1c79356b
A
2879 assert(cluster_size >= PAGE_SIZE);
2880
2881 object->cluster_size = cluster_size; /* XXX ??? */
2882 assert(object->temporary);
2883
0b4e3aa0
A
2884 /* create our new memory object */
2885 (void) memory_object_create(dmm, object->size, &pager);
2886
2887 memory_object_default_deallocate(dmm);
1c79356b
A
2888 }
2889
2890 entry = vm_object_hash_entry_alloc(pager);
2891
2892 vm_object_cache_lock();
2893 vm_object_hash_insert(entry);
2894
2895 entry->object = object;
2896 vm_object_cache_unlock();
2897
2898 /*
0b4e3aa0 2899 * A reference was returned by
1c79356b
A
2900 * memory_object_create(), and it is
2901 * copied by vm_object_enter().
2902 */
2903
2904 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2905 panic("vm_object_pager_create: mismatch");
2906
2907 /*
0b4e3aa0 2908 * Drop the reference we were passed.
1c79356b 2909 */
0b4e3aa0 2910 memory_object_deallocate(pager);
1c79356b
A
2911
2912 vm_object_lock(object);
2913
2914 /*
2915 * Release the paging reference
2916 */
2917 vm_object_paging_end(object);
2918}
2919
2920/*
2921 * Routine: vm_object_remove
2922 * Purpose:
2923 * Eliminate the pager/object association
2924 * for this pager.
2925 * Conditions:
2926 * The object cache must be locked.
2927 */
0b4e3aa0 2928__private_extern__ void
1c79356b
A
2929vm_object_remove(
2930 vm_object_t object)
2931{
0b4e3aa0 2932 memory_object_t pager;
1c79356b 2933
0b4e3aa0 2934 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
1c79356b
A
2935 vm_object_hash_entry_t entry;
2936
0b4e3aa0 2937 entry = vm_object_hash_lookup(pager, FALSE);
1c79356b
A
2938 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2939 entry->object = VM_OBJECT_NULL;
2940 }
2941
1c79356b
A
2942}
2943
2944/*
2945 * Global variables for vm_object_collapse():
2946 *
2947 * Counts for normal collapses and bypasses.
2948 * Debugging variables, to watch or disable collapse.
2949 */
0b4e3aa0
A
2950static long object_collapses = 0;
2951static long object_bypasses = 0;
1c79356b 2952
0b4e3aa0
A
2953static boolean_t vm_object_collapse_allowed = TRUE;
2954static boolean_t vm_object_bypass_allowed = TRUE;
2955
2956static int vm_external_discarded;
2957static int vm_external_collapsed;
1c79356b 2958
91447636
A
2959unsigned long vm_object_collapse_encrypted = 0;
2960
1c79356b 2961/*
0b4e3aa0
A
2962 * Routine: vm_object_do_collapse
2963 * Purpose:
2964 * Collapse an object with the object backing it.
2965 * Pages in the backing object are moved into the
2966 * parent, and the backing object is deallocated.
2967 * Conditions:
2968 * Both objects and the cache are locked; the page
2969 * queues are unlocked.
1c79356b
A
2970 *
2971 */
0b4e3aa0 2972static void
1c79356b
A
2973vm_object_do_collapse(
2974 vm_object_t object,
2975 vm_object_t backing_object)
2976{
2977 vm_page_t p, pp;
2978 vm_object_offset_t new_offset, backing_offset;
2979 vm_object_size_t size;
2980
2981 backing_offset = object->shadow_offset;
2982 size = object->size;
2983
1c79356b
A
2984 /*
2985 * Move all in-memory pages from backing_object
2986 * to the parent. Pages that have been paged out
2987 * will be overwritten by any of the parent's
2988 * pages that shadow them.
2989 */
2990
2991 while (!queue_empty(&backing_object->memq)) {
2992
2993 p = (vm_page_t) queue_first(&backing_object->memq);
2994
2995 new_offset = (p->offset - backing_offset);
2996
2997 assert(!p->busy || p->absent);
91447636 2998
1c79356b
A
2999 /*
3000 * If the parent has a page here, or if
3001 * this page falls outside the parent,
3002 * dispose of it.
3003 *
3004 * Otherwise, move it as planned.
3005 */
3006
3007 if (p->offset < backing_offset || new_offset >= size) {
3008 VM_PAGE_FREE(p);
3009 } else {
91447636
A
3010 /*
3011 * ENCRYPTED SWAP:
3012 * The encryption key includes the "pager" and the
3013 * "paging_offset". These might not be the same in
3014 * the new object, so we can't just move an encrypted
3015 * page from one object to the other. We can't just
3016 * decrypt the page here either, because that would drop
3017 * the object lock.
3018 * The caller should check for encrypted pages before
3019 * attempting to collapse.
3020 */
3021 ASSERT_PAGE_DECRYPTED(p);
3022
1c79356b
A
3023 pp = vm_page_lookup(object, new_offset);
3024 if (pp == VM_PAGE_NULL) {
3025
3026 /*
3027 * Parent now has no page.
3028 * Move the backing object's page up.
3029 */
3030
3031 vm_page_rename(p, object, new_offset);
3032#if MACH_PAGEMAP
3033 } else if (pp->absent) {
3034
3035 /*
3036 * Parent has an absent page...
3037 * it's not being paged in, so
3038 * it must really be missing from
3039 * the parent.
3040 *
3041 * Throw out the absent page...
3042 * any faults looking for that
3043 * page will restart with the new
3044 * one.
3045 */
3046
3047 VM_PAGE_FREE(pp);
3048 vm_page_rename(p, object, new_offset);
3049#endif /* MACH_PAGEMAP */
3050 } else {
3051 assert(! pp->absent);
3052
3053 /*
3054 * Parent object has a real page.
3055 * Throw away the backing object's
3056 * page.
3057 */
3058 VM_PAGE_FREE(p);
3059 }
3060 }
3061 }
3062
55e303ae
A
3063#if !MACH_PAGEMAP
3064 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL
3065 || (!backing_object->pager_created
3066 && backing_object->pager == MEMORY_OBJECT_NULL));
3067#else
3068 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3069#endif /* !MACH_PAGEMAP */
1c79356b 3070
0b4e3aa0 3071 if (backing_object->pager != MEMORY_OBJECT_NULL) {
1c79356b
A
3072 vm_object_hash_entry_t entry;
3073
3074 /*
3075 * Move the pager from backing_object to object.
3076 *
3077 * XXX We're only using part of the paging space
3078 * for keeps now... we ought to discard the
3079 * unused portion.
3080 */
3081
55e303ae 3082 assert(!object->paging_in_progress);
1c79356b
A
3083 object->pager = backing_object->pager;
3084 entry = vm_object_hash_lookup(object->pager, FALSE);
3085 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3086 entry->object = object;
3087 object->pager_created = backing_object->pager_created;
91447636 3088 object->pager_control = backing_object->pager_control;
1c79356b
A
3089 object->pager_ready = backing_object->pager_ready;
3090 object->pager_initialized = backing_object->pager_initialized;
3091 object->cluster_size = backing_object->cluster_size;
3092 object->paging_offset =
3093 backing_object->paging_offset + backing_offset;
91447636
A
3094 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
3095 memory_object_control_collapse(object->pager_control,
0b4e3aa0 3096 object);
1c79356b
A
3097 }
3098 }
3099
3100 vm_object_cache_unlock();
3101
1c79356b
A
3102#if MACH_PAGEMAP
3103 /*
3104 * If the shadow offset is 0, the use the existence map from
3105 * the backing object if there is one. If the shadow offset is
3106 * not zero, toss it.
3107 *
3108 * XXX - If the shadow offset is not 0 then a bit copy is needed
3109 * if the map is to be salvaged. For now, we just just toss the
3110 * old map, giving the collapsed object no map. This means that
3111 * the pager is invoked for zero fill pages. If analysis shows
3112 * that this happens frequently and is a performance hit, then
3113 * this code should be fixed to salvage the map.
3114 */
3115 assert(object->existence_map == VM_EXTERNAL_NULL);
3116 if (backing_offset || (size != backing_object->size)) {
3117 vm_external_discarded++;
3118 vm_external_destroy(backing_object->existence_map,
3119 backing_object->size);
3120 }
3121 else {
3122 vm_external_collapsed++;
3123 object->existence_map = backing_object->existence_map;
3124 }
3125 backing_object->existence_map = VM_EXTERNAL_NULL;
3126#endif /* MACH_PAGEMAP */
3127
3128 /*
3129 * Object now shadows whatever backing_object did.
3130 * Note that the reference to backing_object->shadow
3131 * moves from within backing_object to within object.
3132 */
3133
91447636
A
3134 assert(!object->phys_contiguous);
3135 assert(!backing_object->phys_contiguous);
1c79356b 3136 object->shadow = backing_object->shadow;
91447636
A
3137 if (object->shadow) {
3138 object->shadow_offset += backing_object->shadow_offset;
3139 } else {
3140 /* no shadow, therefore no shadow offset... */
3141 object->shadow_offset = 0;
3142 }
1c79356b 3143 assert((object->shadow == VM_OBJECT_NULL) ||
55e303ae 3144 (object->shadow->copy != backing_object));
1c79356b
A
3145
3146 /*
3147 * Discard backing_object.
3148 *
3149 * Since the backing object has no pages, no
3150 * pager left, and no object references within it,
3151 * all that is necessary is to dispose of it.
3152 */
3153
3154 assert((backing_object->ref_count == 1) &&
3155 (backing_object->resident_page_count == 0) &&
3156 (backing_object->paging_in_progress == 0));
3157
1c79356b
A
3158 backing_object->alive = FALSE;
3159 vm_object_unlock(backing_object);
3160
3161 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3162 (integer_t)backing_object, 0,0,0,0);
3163
91447636 3164 zfree(vm_object_zone, backing_object);
1c79356b
A
3165
3166 object_collapses++;
3167}
3168
0b4e3aa0 3169static void
1c79356b
A
3170vm_object_do_bypass(
3171 vm_object_t object,
3172 vm_object_t backing_object)
3173{
3174 /*
3175 * Make the parent shadow the next object
3176 * in the chain.
3177 */
3178
3179#if TASK_SWAPPER
3180 /*
3181 * Do object reference in-line to
3182 * conditionally increment shadow's
3183 * residence count. If object is not
3184 * resident, leave residence count
3185 * on shadow alone.
3186 */
3187 if (backing_object->shadow != VM_OBJECT_NULL) {
3188 vm_object_lock(backing_object->shadow);
3189 backing_object->shadow->ref_count++;
3190 if (object->res_count != 0)
3191 vm_object_res_reference(backing_object->shadow);
3192 vm_object_unlock(backing_object->shadow);
3193 }
3194#else /* TASK_SWAPPER */
3195 vm_object_reference(backing_object->shadow);
3196#endif /* TASK_SWAPPER */
3197
91447636
A
3198 assert(!object->phys_contiguous);
3199 assert(!backing_object->phys_contiguous);
1c79356b 3200 object->shadow = backing_object->shadow;
91447636
A
3201 if (object->shadow) {
3202 object->shadow_offset += backing_object->shadow_offset;
3203 } else {
3204 /* no shadow, therefore no shadow offset... */
3205 object->shadow_offset = 0;
3206 }
1c79356b
A
3207
3208 /*
3209 * Backing object might have had a copy pointer
3210 * to us. If it did, clear it.
3211 */
3212 if (backing_object->copy == object) {
3213 backing_object->copy = VM_OBJECT_NULL;
3214 }
3215
3216 /*
3217 * Drop the reference count on backing_object.
3218#if TASK_SWAPPER
3219 * Since its ref_count was at least 2, it
3220 * will not vanish; so we don't need to call
3221 * vm_object_deallocate.
3222 * [FBDP: that doesn't seem to be true any more]
3223 *
3224 * The res_count on the backing object is
3225 * conditionally decremented. It's possible
3226 * (via vm_pageout_scan) to get here with
3227 * a "swapped" object, which has a 0 res_count,
3228 * in which case, the backing object res_count
3229 * is already down by one.
3230#else
3231 * Don't call vm_object_deallocate unless
3232 * ref_count drops to zero.
3233 *
3234 * The ref_count can drop to zero here if the
3235 * backing object could be bypassed but not
3236 * collapsed, such as when the backing object
3237 * is temporary and cachable.
3238#endif
3239 */
3240 if (backing_object->ref_count > 1) {
3241 backing_object->ref_count--;
3242#if TASK_SWAPPER
3243 if (object->res_count != 0)
3244 vm_object_res_deallocate(backing_object);
3245 assert(backing_object->ref_count > 0);
3246#endif /* TASK_SWAPPER */
3247 vm_object_unlock(backing_object);
3248 } else {
3249
3250 /*
3251 * Drop locks so that we can deallocate
3252 * the backing object.
3253 */
3254
3255#if TASK_SWAPPER
3256 if (object->res_count == 0) {
3257 /* XXX get a reference for the deallocate below */
3258 vm_object_res_reference(backing_object);
3259 }
3260#endif /* TASK_SWAPPER */
3261 vm_object_unlock(object);
3262 vm_object_unlock(backing_object);
3263 vm_object_deallocate(backing_object);
3264
3265 /*
3266 * Relock object. We don't have to reverify
3267 * its state since vm_object_collapse will
3268 * do that for us as it starts at the
3269 * top of its loop.
3270 */
3271
3272 vm_object_lock(object);
3273 }
3274
3275 object_bypasses++;
3276}
0b4e3aa0 3277
1c79356b
A
3278
3279/*
3280 * vm_object_collapse:
3281 *
3282 * Perform an object collapse or an object bypass if appropriate.
3283 * The real work of collapsing and bypassing is performed in
3284 * the routines vm_object_do_collapse and vm_object_do_bypass.
3285 *
3286 * Requires that the object be locked and the page queues be unlocked.
3287 *
3288 */
91447636
A
3289static unsigned long vm_object_collapse_calls = 0;
3290static unsigned long vm_object_collapse_objects = 0;
3291static unsigned long vm_object_collapse_do_collapse = 0;
3292static unsigned long vm_object_collapse_do_bypass = 0;
0b4e3aa0 3293__private_extern__ void
1c79356b 3294vm_object_collapse(
55e303ae 3295 register vm_object_t object,
c0fea474
A
3296 register vm_object_offset_t hint_offset,
3297 boolean_t can_bypass)
1c79356b
A
3298{
3299 register vm_object_t backing_object;
55e303ae
A
3300 register unsigned int rcount;
3301 register unsigned int size;
91447636
A
3302 vm_object_offset_t collapse_min_offset;
3303 vm_object_offset_t collapse_max_offset;
3304 vm_page_t page;
3305 vm_object_t original_object;
3306
3307 vm_object_collapse_calls++;
0b4e3aa0 3308
c0fea474
A
3309 if (! vm_object_collapse_allowed &&
3310 ! (can_bypass && vm_object_bypass_allowed)) {
1c79356b
A
3311 return;
3312 }
3313
3314 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3315 (integer_t)object, 0,0,0,0);
3316
91447636
A
3317 if (object == VM_OBJECT_NULL)
3318 return;
3319
3320 original_object = object;
3321
1c79356b 3322 while (TRUE) {
91447636 3323 vm_object_collapse_objects++;
1c79356b
A
3324 /*
3325 * Verify that the conditions are right for either
3326 * collapse or bypass:
1c79356b 3327 */
1c79356b
A
3328
3329 /*
3330 * There is a backing object, and
3331 */
3332
91447636
A
3333 backing_object = object->shadow;
3334 if (backing_object == VM_OBJECT_NULL) {
3335 if (object != original_object) {
3336 vm_object_unlock(object);
3337 }
1c79356b 3338 return;
91447636 3339 }
1c79356b 3340
91447636
A
3341 /*
3342 * No pages in the object are currently
3343 * being paged out, and
3344 */
3345 if (object->paging_in_progress != 0 ||
3346 object->absent_count != 0) {
3347 /* try and collapse the rest of the shadow chain */
3348 vm_object_lock(backing_object);
3349 if (object != original_object) {
3350 vm_object_unlock(object);
3351 }
3352 object = backing_object;
3353 continue;
3354 }
3355
1c79356b
A
3356 vm_object_lock(backing_object);
3357
3358 /*
3359 * ...
3360 * The backing object is not read_only,
3361 * and no pages in the backing object are
3362 * currently being paged out.
3363 * The backing object is internal.
3364 *
3365 */
3366
3367 if (!backing_object->internal ||
3368 backing_object->paging_in_progress != 0) {
91447636
A
3369 /* try and collapse the rest of the shadow chain */
3370 if (object != original_object) {
3371 vm_object_unlock(object);
3372 }
3373 object = backing_object;
3374 continue;
1c79356b
A
3375 }
3376
3377 /*
3378 * The backing object can't be a copy-object:
3379 * the shadow_offset for the copy-object must stay
3380 * as 0. Furthermore (for the 'we have all the
3381 * pages' case), if we bypass backing_object and
3382 * just shadow the next object in the chain, old
3383 * pages from that object would then have to be copied
3384 * BOTH into the (former) backing_object and into the
3385 * parent object.
3386 */
3387 if (backing_object->shadow != VM_OBJECT_NULL &&
55e303ae 3388 backing_object->shadow->copy == backing_object) {
91447636
A
3389 /* try and collapse the rest of the shadow chain */
3390 if (object != original_object) {
3391 vm_object_unlock(object);
3392 }
3393 object = backing_object;
3394 continue;
1c79356b
A
3395 }
3396
3397 /*
3398 * We can now try to either collapse the backing
3399 * object (if the parent is the only reference to
3400 * it) or (perhaps) remove the parent's reference
3401 * to it.
1c79356b 3402 *
0b4e3aa0
A
3403 * If there is exactly one reference to the backing
3404 * object, we may be able to collapse it into the
3405 * parent.
1c79356b 3406 *
55e303ae
A
3407 * If MACH_PAGEMAP is defined:
3408 * The parent must not have a pager created for it,
3409 * since collapsing a backing_object dumps new pages
3410 * into the parent that its pager doesn't know about
3411 * (and the collapse code can't merge the existence
3412 * maps).
3413 * Otherwise:
3414 * As long as one of the objects is still not known
3415 * to the pager, we can collapse them.
1c79356b 3416 */
1c79356b 3417 if (backing_object->ref_count == 1 &&
55e303ae
A
3418 (!object->pager_created
3419#if !MACH_PAGEMAP
91447636 3420 || !backing_object->pager_created
55e303ae
A
3421#endif /*!MACH_PAGEMAP */
3422 ) && vm_object_collapse_allowed) {
1c79356b
A
3423
3424 XPR(XPR_VM_OBJECT,
91447636 3425 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
1c79356b
A
3426 (integer_t)backing_object, (integer_t)object,
3427 (integer_t)backing_object->pager,
91447636 3428 (integer_t)backing_object->pager_control, 0);
1c79356b
A
3429
3430 /*
3431 * We need the cache lock for collapsing,
3432 * but we must not deadlock.
3433 */
3434
3435 if (! vm_object_cache_lock_try()) {
91447636
A
3436 if (object != original_object) {
3437 vm_object_unlock(object);
3438 }
1c79356b
A
3439 vm_object_unlock(backing_object);
3440 return;
3441 }
3442
91447636
A
3443 /*
3444 * ENCRYPTED SWAP
3445 * We can't collapse the object if it contains
3446 * any encypted page, because the encryption key
3447 * includes the <object,offset> info. We can't
3448 * drop the object lock in vm_object_do_collapse()
3449 * so we can't decrypt the page there either.
3450 */
3451 if (vm_pages_encrypted) {
3452 collapse_min_offset = object->shadow_offset;
3453 collapse_max_offset =
3454 object->shadow_offset + object->size;
3455 queue_iterate(&backing_object->memq,
3456 page, vm_page_t, listq) {
3457 if (page->encrypted &&
3458 (page->offset >=
3459 collapse_min_offset) &&
3460 (page->offset <
3461 collapse_max_offset)) {
3462 /*
3463 * We found an encrypted page
3464 * in the backing object,
3465 * within the range covered
3466 * by the parent object: we can
3467 * not collapse them.
3468 */
3469 vm_object_collapse_encrypted++;
3470 vm_object_cache_unlock();
3471 goto try_bypass;
3472 }
3473 }
3474 }
3475
1c79356b
A
3476 /*
3477 * Collapse the object with its backing
3478 * object, and try again with the object's
3479 * new backing object.
3480 */
3481
3482 vm_object_do_collapse(object, backing_object);
91447636 3483 vm_object_collapse_do_collapse++;
1c79356b
A
3484 continue;
3485 }
3486
91447636 3487 try_bypass:
1c79356b
A
3488 /*
3489 * Collapsing the backing object was not possible
3490 * or permitted, so let's try bypassing it.
3491 */
3492
c0fea474 3493 if (! (can_bypass && vm_object_bypass_allowed)) {
91447636
A
3494 /* try and collapse the rest of the shadow chain */
3495 if (object != original_object) {
3496 vm_object_unlock(object);
3497 }
3498 object = backing_object;
3499 continue;
1c79356b
A
3500 }
3501
0b4e3aa0 3502
1c79356b 3503 /*
55e303ae
A
3504 * If the object doesn't have all its pages present,
3505 * we have to make sure no pages in the backing object
3506 * "show through" before bypassing it.
1c79356b 3507 */
55e303ae
A
3508 size = atop(object->size);
3509 rcount = object->resident_page_count;
3510 if (rcount != size) {
55e303ae
A
3511 vm_object_offset_t offset;
3512 vm_object_offset_t backing_offset;
3513 unsigned int backing_rcount;
3514 unsigned int lookups = 0;
3515
3516 /*
3517 * If the backing object has a pager but no pagemap,
3518 * then we cannot bypass it, because we don't know
3519 * what pages it has.
3520 */
3521 if (backing_object->pager_created
1c79356b 3522#if MACH_PAGEMAP
55e303ae 3523 && (backing_object->existence_map == VM_EXTERNAL_NULL)
1c79356b 3524#endif /* MACH_PAGEMAP */
55e303ae 3525 ) {
91447636
A
3526 /* try and collapse the rest of the shadow chain */
3527 if (object != original_object) {
3528 vm_object_unlock(object);
3529 }
3530 object = backing_object;
3531 continue;
55e303ae 3532 }
1c79356b 3533
55e303ae
A
3534 /*
3535 * If the object has a pager but no pagemap,
3536 * then we cannot bypass it, because we don't know
3537 * what pages it has.
3538 */
3539 if (object->pager_created
0b4e3aa0 3540#if MACH_PAGEMAP
55e303ae 3541 && (object->existence_map == VM_EXTERNAL_NULL)
0b4e3aa0 3542#endif /* MACH_PAGEMAP */
55e303ae 3543 ) {
91447636
A
3544 /* try and collapse the rest of the shadow chain */
3545 if (object != original_object) {
3546 vm_object_unlock(object);
3547 }
3548 object = backing_object;
3549 continue;
55e303ae 3550 }
0b4e3aa0 3551
55e303ae
A
3552 /*
3553 * If all of the pages in the backing object are
3554 * shadowed by the parent object, the parent
3555 * object no longer has to shadow the backing
3556 * object; it can shadow the next one in the
3557 * chain.
3558 *
3559 * If the backing object has existence info,
3560 * we must check examine its existence info
3561 * as well.
3562 *
3563 */
1c79356b 3564
55e303ae
A
3565 backing_offset = object->shadow_offset;
3566 backing_rcount = backing_object->resident_page_count;
1c79356b 3567
55e303ae
A
3568#define EXISTS_IN_OBJECT(obj, off, rc) \
3569 (vm_external_state_get((obj)->existence_map, \
3570 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3571 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3572
3573 /*
3574 * Check the hint location first
3575 * (since it is often the quickest way out of here).
3576 */
3577 if (object->cow_hint != ~(vm_offset_t)0)
3578 hint_offset = (vm_object_offset_t)object->cow_hint;
3579 else
3580 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
3581 (hint_offset - 8 * PAGE_SIZE_64) : 0;
3582
3583 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
3584 backing_offset, backing_rcount) &&
3585 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
3586 /* dependency right at the hint */
3587 object->cow_hint = (vm_offset_t)hint_offset;
91447636
A
3588 /* try and collapse the rest of the shadow chain */
3589 if (object != original_object) {
3590 vm_object_unlock(object);
3591 }
3592 object = backing_object;
3593 continue;
0b4e3aa0 3594 }
55e303ae
A
3595
3596 /*
3597 * If the object's window onto the backing_object
3598 * is large compared to the number of resident
3599 * pages in the backing object, it makes sense to
3600 * walk the backing_object's resident pages first.
3601 *
3602 * NOTE: Pages may be in both the existence map and
3603 * resident. So, we can't permanently decrement
3604 * the rcount here because the second loop may
3605 * find the same pages in the backing object'
3606 * existence map that we found here and we would
3607 * double-decrement the rcount. We also may or
3608 * may not have found the
3609 */
3610 if (backing_rcount && size >
3611 ((backing_object->existence_map) ?
3612 backing_rcount : (backing_rcount >> 1))) {
3613 unsigned int rc = rcount;
3614 vm_page_t p;
3615
3616 backing_rcount = backing_object->resident_page_count;
3617 p = (vm_page_t)queue_first(&backing_object->memq);
3618 do {
3619 /* Until we get more than one lookup lock */
3620 if (lookups > 256) {
3621 lookups = 0;
3622 delay(1);
3623 }
3624
3625 offset = (p->offset - backing_offset);
3626 if (offset < object->size &&
3627 offset != hint_offset &&
3628 !EXISTS_IN_OBJECT(object, offset, rc)) {
3629 /* found a dependency */
3630 object->cow_hint = (vm_offset_t)offset;
91447636 3631 break;
55e303ae 3632 }
91447636 3633 p = (vm_page_t) queue_next(&p->listq);
55e303ae
A
3634
3635 } while (--backing_rcount);
91447636
A
3636 if (backing_rcount != 0 ) {
3637 /* try and collapse the rest of the shadow chain */
3638 if (object != original_object) {
3639 vm_object_unlock(object);
3640 }
3641 object = backing_object;
3642 continue;
3643 }
0b4e3aa0 3644 }
55e303ae
A
3645
3646 /*
3647 * Walk through the offsets looking for pages in the
3648 * backing object that show through to the object.
3649 */
3650 if (backing_rcount || backing_object->existence_map) {
3651 offset = hint_offset;
3652
3653 while((offset =
3654 (offset + PAGE_SIZE_64 < object->size) ?
3655 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
3656
3657 /* Until we get more than one lookup lock */
3658 if (lookups > 256) {
3659 lookups = 0;
3660 delay(1);
3661 }
3662
3663 if (EXISTS_IN_OBJECT(backing_object, offset +
3664 backing_offset, backing_rcount) &&
3665 !EXISTS_IN_OBJECT(object, offset, rcount)) {
3666 /* found a dependency */
3667 object->cow_hint = (vm_offset_t)offset;
91447636 3668 break;
55e303ae
A
3669 }
3670 }
91447636
A
3671 if (offset != hint_offset) {
3672 /* try and collapse the rest of the shadow chain */
3673 if (object != original_object) {
3674 vm_object_unlock(object);
3675 }
3676 object = backing_object;
3677 continue;
3678 }
0b4e3aa0
A
3679 }
3680 }
1c79356b 3681
55e303ae
A
3682 /* reset the offset hint for any objects deeper in the chain */
3683 object->cow_hint = (vm_offset_t)0;
1c79356b
A
3684
3685 /*
3686 * All interesting pages in the backing object
3687 * already live in the parent or its pager.
3688 * Thus we can bypass the backing object.
3689 */
3690
3691 vm_object_do_bypass(object, backing_object);
91447636 3692 vm_object_collapse_do_bypass++;
1c79356b
A
3693
3694 /*
3695 * Try again with this object's new backing object.
3696 */
3697
3698 continue;
3699 }
91447636
A
3700
3701 if (object != original_object) {
3702 vm_object_unlock(object);
3703 }
1c79356b
A
3704}
3705
3706/*
3707 * Routine: vm_object_page_remove: [internal]
3708 * Purpose:
3709 * Removes all physical pages in the specified
3710 * object range from the object's list of pages.
3711 *
3712 * In/out conditions:
3713 * The object must be locked.
3714 * The object must not have paging_in_progress, usually
3715 * guaranteed by not having a pager.
3716 */
3717unsigned int vm_object_page_remove_lookup = 0;
3718unsigned int vm_object_page_remove_iterate = 0;
3719
0b4e3aa0 3720__private_extern__ void
1c79356b
A
3721vm_object_page_remove(
3722 register vm_object_t object,
3723 register vm_object_offset_t start,
3724 register vm_object_offset_t end)
3725{
3726 register vm_page_t p, next;
3727
3728 /*
3729 * One and two page removals are most popular.
3730 * The factor of 16 here is somewhat arbitrary.
3731 * It balances vm_object_lookup vs iteration.
3732 */
3733
55e303ae 3734 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
1c79356b
A
3735 vm_object_page_remove_lookup++;
3736
3737 for (; start < end; start += PAGE_SIZE_64) {
3738 p = vm_page_lookup(object, start);
3739 if (p != VM_PAGE_NULL) {
3740 assert(!p->cleaning && !p->pageout);
3741 if (!p->fictitious)
91447636 3742 pmap_disconnect(p->phys_page);
1c79356b
A
3743 VM_PAGE_FREE(p);
3744 }
3745 }
3746 } else {
3747 vm_object_page_remove_iterate++;
3748
3749 p = (vm_page_t) queue_first(&object->memq);
3750 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3751 next = (vm_page_t) queue_next(&p->listq);
3752 if ((start <= p->offset) && (p->offset < end)) {
3753 assert(!p->cleaning && !p->pageout);
3754 if (!p->fictitious)
91447636 3755 pmap_disconnect(p->phys_page);
1c79356b
A
3756 VM_PAGE_FREE(p);
3757 }
3758 p = next;
3759 }
3760 }
3761}
3762
0b4e3aa0 3763
1c79356b
A
3764/*
3765 * Routine: vm_object_coalesce
3766 * Function: Coalesces two objects backing up adjoining
3767 * regions of memory into a single object.
3768 *
3769 * returns TRUE if objects were combined.
3770 *
3771 * NOTE: Only works at the moment if the second object is NULL -
3772 * if it's not, which object do we lock first?
3773 *
3774 * Parameters:
3775 * prev_object First object to coalesce
3776 * prev_offset Offset into prev_object
3777 * next_object Second object into coalesce
3778 * next_offset Offset into next_object
3779 *
3780 * prev_size Size of reference to prev_object
3781 * next_size Size of reference to next_object
3782 *
3783 * Conditions:
3784 * The object(s) must *not* be locked. The map must be locked
3785 * to preserve the reference to the object(s).
3786 */
0b4e3aa0 3787static int vm_object_coalesce_count = 0;
1c79356b 3788
0b4e3aa0 3789__private_extern__ boolean_t
1c79356b
A
3790vm_object_coalesce(
3791 register vm_object_t prev_object,
3792 vm_object_t next_object,
3793 vm_object_offset_t prev_offset,
91447636 3794 __unused vm_object_offset_t next_offset,
1c79356b
A
3795 vm_object_size_t prev_size,
3796 vm_object_size_t next_size)
3797{
3798 vm_object_size_t newsize;
3799
3800#ifdef lint
3801 next_offset++;
3802#endif /* lint */
3803
3804 if (next_object != VM_OBJECT_NULL) {
3805 return(FALSE);
3806 }
3807
3808 if (prev_object == VM_OBJECT_NULL) {
3809 return(TRUE);
3810 }
3811
3812 XPR(XPR_VM_OBJECT,
3813 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3814 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3815
3816 vm_object_lock(prev_object);
3817
3818 /*
3819 * Try to collapse the object first
3820 */
c0fea474 3821 vm_object_collapse(prev_object, prev_offset, TRUE);
1c79356b
A
3822
3823 /*
3824 * Can't coalesce if pages not mapped to
3825 * prev_entry may be in use any way:
3826 * . more than one reference
3827 * . paged out
3828 * . shadows another object
3829 * . has a copy elsewhere
91447636 3830 * . is purgable
1c79356b
A
3831 * . paging references (pages might be in page-list)
3832 */
3833
3834 if ((prev_object->ref_count > 1) ||
3835 prev_object->pager_created ||
3836 (prev_object->shadow != VM_OBJECT_NULL) ||
3837 (prev_object->copy != VM_OBJECT_NULL) ||
3838 (prev_object->true_share != FALSE) ||
91447636 3839 (prev_object->purgable != VM_OBJECT_NONPURGABLE) ||
1c79356b
A
3840 (prev_object->paging_in_progress != 0)) {
3841 vm_object_unlock(prev_object);
3842 return(FALSE);
3843 }
3844
3845 vm_object_coalesce_count++;
3846
3847 /*
3848 * Remove any pages that may still be in the object from
3849 * a previous deallocation.
3850 */
3851 vm_object_page_remove(prev_object,
3852 prev_offset + prev_size,
3853 prev_offset + prev_size + next_size);
3854
3855 /*
3856 * Extend the object if necessary.
3857 */
3858 newsize = prev_offset + prev_size + next_size;
3859 if (newsize > prev_object->size) {
3860#if MACH_PAGEMAP
3861 /*
3862 * We cannot extend an object that has existence info,
3863 * since the existence info might then fail to cover
3864 * the entire object.
3865 *
3866 * This assertion must be true because the object
3867 * has no pager, and we only create existence info
3868 * for objects with pagers.
3869 */
3870 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3871#endif /* MACH_PAGEMAP */
3872 prev_object->size = newsize;
3873 }
3874
3875 vm_object_unlock(prev_object);
3876 return(TRUE);
3877}
3878
3879/*
3880 * Attach a set of physical pages to an object, so that they can
3881 * be mapped by mapping the object. Typically used to map IO memory.
3882 *
3883 * The mapping function and its private data are used to obtain the
3884 * physical addresses for each page to be mapped.
3885 */
3886void
3887vm_object_page_map(
3888 vm_object_t object,
3889 vm_object_offset_t offset,
3890 vm_object_size_t size,
3891 vm_object_offset_t (*map_fn)(void *map_fn_data,
3892 vm_object_offset_t offset),
3893 void *map_fn_data) /* private to map_fn */
3894{
3895 int num_pages;
3896 int i;
3897 vm_page_t m;
3898 vm_page_t old_page;
3899 vm_object_offset_t addr;
3900
55e303ae 3901 num_pages = atop_64(size);
1c79356b
A
3902
3903 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3904
3905 addr = (*map_fn)(map_fn_data, offset);
3906
3907 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3908 vm_page_more_fictitious();
3909
3910 vm_object_lock(object);
3911 if ((old_page = vm_page_lookup(object, offset))
3912 != VM_PAGE_NULL)
3913 {
3914 vm_page_lock_queues();
3915 vm_page_free(old_page);
3916 vm_page_unlock_queues();
3917 }
3918
3919 vm_page_init(m, addr);
0b4e3aa0
A
3920 /* private normally requires lock_queues but since we */
3921 /* are initializing the page, its not necessary here */
1c79356b
A
3922 m->private = TRUE; /* don`t free page */
3923 m->wire_count = 1;
3924 vm_page_insert(m, object, offset);
3925
3926 PAGE_WAKEUP_DONE(m);
3927 vm_object_unlock(object);
3928 }
3929}
3930
3931#include <mach_kdb.h>
3932
3933#if MACH_KDB
3934#include <ddb/db_output.h>
3935#include <vm/vm_print.h>
3936
3937#define printf kdbprintf
3938
3939extern boolean_t vm_object_cached(
3940 vm_object_t object);
3941
3942extern void print_bitstring(
3943 char byte);
3944
3945boolean_t vm_object_print_pages = FALSE;
3946
3947void
3948print_bitstring(
3949 char byte)
3950{
3951 printf("%c%c%c%c%c%c%c%c",
3952 ((byte & (1 << 0)) ? '1' : '0'),
3953 ((byte & (1 << 1)) ? '1' : '0'),
3954 ((byte & (1 << 2)) ? '1' : '0'),
3955 ((byte & (1 << 3)) ? '1' : '0'),
3956 ((byte & (1 << 4)) ? '1' : '0'),
3957 ((byte & (1 << 5)) ? '1' : '0'),
3958 ((byte & (1 << 6)) ? '1' : '0'),
3959 ((byte & (1 << 7)) ? '1' : '0'));
3960}
3961
3962boolean_t
3963vm_object_cached(
3964 register vm_object_t object)
3965{
3966 register vm_object_t o;
3967
3968 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3969 if (object == o) {
3970 return TRUE;
3971 }
3972 }
3973 return FALSE;
3974}
3975
3976#if MACH_PAGEMAP
3977/*
3978 * vm_external_print: [ debug ]
3979 */
3980void
3981vm_external_print(
91447636
A
3982 vm_external_map_t emap,
3983 vm_size_t size)
1c79356b 3984{
91447636 3985 if (emap == VM_EXTERNAL_NULL) {
1c79356b
A
3986 printf("0 ");
3987 } else {
3988 vm_size_t existence_size = stob(size);
3989 printf("{ size=%d, map=[", existence_size);
3990 if (existence_size > 0) {
91447636 3991 print_bitstring(emap[0]);
1c79356b
A
3992 }
3993 if (existence_size > 1) {
91447636 3994 print_bitstring(emap[1]);
1c79356b
A
3995 }
3996 if (existence_size > 2) {
3997 printf("...");
91447636 3998 print_bitstring(emap[existence_size-1]);
1c79356b
A
3999 }
4000 printf("] }\n");
4001 }
4002 return;
4003}
4004#endif /* MACH_PAGEMAP */
4005
4006int
4007vm_follow_object(
4008 vm_object_t object)
4009{
0b4e3aa0
A
4010 int count = 0;
4011 int orig_db_indent = db_indent;
1c79356b 4012
0b4e3aa0
A
4013 while (TRUE) {
4014 if (object == VM_OBJECT_NULL) {
4015 db_indent = orig_db_indent;
4016 return count;
4017 }
1c79356b 4018
0b4e3aa0 4019 count += 1;
1c79356b 4020
0b4e3aa0
A
4021 iprintf("object 0x%x", object);
4022 printf(", shadow=0x%x", object->shadow);
4023 printf(", copy=0x%x", object->copy);
4024 printf(", pager=0x%x", object->pager);
4025 printf(", ref=%d\n", object->ref_count);
4026
4027 db_indent += 2;
4028 object = object->shadow;
4029 }
1c79356b 4030
1c79356b
A
4031}
4032
4033/*
4034 * vm_object_print: [ debug ]
4035 */
4036void
4037vm_object_print(
91447636
A
4038 db_addr_t db_addr,
4039 __unused boolean_t have_addr,
4040 __unused int arg_count,
4041 __unused char *modif)
1c79356b 4042{
91447636 4043 vm_object_t object;
1c79356b 4044 register vm_page_t p;
91447636 4045 const char *s;
1c79356b
A
4046
4047 register int count;
4048
91447636 4049 object = (vm_object_t) (long) db_addr;
1c79356b
A
4050 if (object == VM_OBJECT_NULL)
4051 return;
4052
4053 iprintf("object 0x%x\n", object);
4054
4055 db_indent += 2;
4056
4057 iprintf("size=0x%x", object->size);
4058 printf(", cluster=0x%x", object->cluster_size);
91447636 4059 printf(", memq_hint=%p", object->memq_hint);
1c79356b
A
4060 printf(", ref_count=%d\n", object->ref_count);
4061 iprintf("");
4062#if TASK_SWAPPER
4063 printf("res_count=%d, ", object->res_count);
4064#endif /* TASK_SWAPPER */
4065 printf("resident_page_count=%d\n", object->resident_page_count);
4066
4067 iprintf("shadow=0x%x", object->shadow);
4068 if (object->shadow) {
4069 register int i = 0;
4070 vm_object_t shadow = object;
91447636 4071 while((shadow = shadow->shadow))
1c79356b
A
4072 i++;
4073 printf(" (depth %d)", i);
4074 }
4075 printf(", copy=0x%x", object->copy);
4076 printf(", shadow_offset=0x%x", object->shadow_offset);
4077 printf(", last_alloc=0x%x\n", object->last_alloc);
4078
4079 iprintf("pager=0x%x", object->pager);
4080 printf(", paging_offset=0x%x", object->paging_offset);
91447636 4081 printf(", pager_control=0x%x\n", object->pager_control);
1c79356b
A
4082
4083 iprintf("copy_strategy=%d[", object->copy_strategy);
4084 switch (object->copy_strategy) {
4085 case MEMORY_OBJECT_COPY_NONE:
4086 printf("copy_none");
4087 break;
4088
4089 case MEMORY_OBJECT_COPY_CALL:
4090 printf("copy_call");
4091 break;
4092
4093 case MEMORY_OBJECT_COPY_DELAY:
4094 printf("copy_delay");
4095 break;
4096
4097 case MEMORY_OBJECT_COPY_SYMMETRIC:
4098 printf("copy_symmetric");
4099 break;
4100
4101 case MEMORY_OBJECT_COPY_INVALID:
4102 printf("copy_invalid");
4103 break;
4104
4105 default:
4106 printf("?");
4107 }
4108 printf("]");
4109 printf(", absent_count=%d\n", object->absent_count);
4110
4111 iprintf("all_wanted=0x%x<", object->all_wanted);
4112 s = "";
4113 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4114 printf("%sinit", s);
4115 s = ",";
4116 }
4117 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4118 printf("%sready", s);
4119 s = ",";
4120 }
4121 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4122 printf("%spaging", s);
4123 s = ",";
4124 }
4125 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
4126 printf("%sabsent", s);
4127 s = ",";
4128 }
4129 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4130 printf("%slock", s);
4131 s = ",";
4132 }
4133 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4134 printf("%suncaching", s);
4135 s = ",";
4136 }
4137 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4138 printf("%scopy_call", s);
4139 s = ",";
4140 }
4141 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4142 printf("%scaching", s);
4143 s = ",";
4144 }
4145 printf(">");
4146 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4147
4148 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4149 (object->pager_created ? "" : "!"),
4150 (object->pager_initialized ? "" : "!"),
4151 (object->pager_ready ? "" : "!"),
4152 (object->can_persist ? "" : "!"),
4153 (object->pager_trusted ? "" : "!"),
4154 (object->pageout ? "" : "!"),
4155 (object->internal ? "internal" : "external"),
4156 (object->temporary ? "temporary" : "permanent"));
91447636 4157 iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
1c79356b 4158 (object->alive ? "" : "!"),
91447636
A
4159 ((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"),
4160 ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"),
4161 ((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"),
1c79356b
A
4162 (object->shadowed ? "" : "!"),
4163 (vm_object_cached(object) ? "" : "!"),
4164 (object->private ? "" : "!"));
4165 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4166 (object->advisory_pageout ? "" : "!"),
4167 (object->silent_overwrite ? "" : "!"));
4168
4169#if MACH_PAGEMAP
4170 iprintf("existence_map=");
4171 vm_external_print(object->existence_map, object->size);
4172#endif /* MACH_PAGEMAP */
4173#if MACH_ASSERT
4174 iprintf("paging_object=0x%x\n", object->paging_object);
4175#endif /* MACH_ASSERT */
4176
4177 if (vm_object_print_pages) {
4178 count = 0;
4179 p = (vm_page_t) queue_first(&object->memq);
4180 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4181 if (count == 0) {
4182 iprintf("memory:=");
4183 } else if (count == 2) {
4184 printf("\n");
4185 iprintf(" ...");
4186 count = 0;
4187 } else {
4188 printf(",");
4189 }
4190 count++;
4191
91447636 4192 printf("(off=0x%llX,page=%p)", p->offset, p);
1c79356b
A
4193 p = (vm_page_t) queue_next(&p->listq);
4194 }
4195 if (count != 0) {
4196 printf("\n");
4197 }
4198 }
4199 db_indent -= 2;
4200}
4201
4202
4203/*
4204 * vm_object_find [ debug ]
4205 *
4206 * Find all tasks which reference the given vm_object.
4207 */
4208
4209boolean_t vm_object_find(vm_object_t object);
4210boolean_t vm_object_print_verbose = FALSE;
4211
4212boolean_t
4213vm_object_find(
4214 vm_object_t object)
4215{
4216 task_t task;
4217 vm_map_t map;
4218 vm_map_entry_t entry;
4219 processor_set_t pset = &default_pset;
4220 boolean_t found = FALSE;
4221
4222 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
4223 map = task->map;
4224 for (entry = vm_map_first_entry(map);
4225 entry && entry != vm_map_to_entry(map);
4226 entry = entry->vme_next) {
4227
4228 vm_object_t obj;
4229
4230 /*
4231 * For the time being skip submaps,
4232 * only the kernel can have submaps,
4233 * and unless we are interested in
4234 * kernel objects, we can simply skip
4235 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4236 * for a full solution.
4237 */
4238 if (entry->is_sub_map)
4239 continue;
4240 if (entry)
4241 obj = entry->object.vm_object;
4242 else
4243 continue;
4244
4245 while (obj != VM_OBJECT_NULL) {
4246 if (obj == object) {
4247 if (!found) {
4248 printf("TASK\t\tMAP\t\tENTRY\n");
4249 found = TRUE;
4250 }
4251 printf("0x%x\t0x%x\t0x%x\n",
4252 task, map, entry);
4253 }
4254 obj = obj->shadow;
4255 }
4256 }
4257 }
4258
4259 return(found);
4260}
4261
4262#endif /* MACH_KDB */
4263
0b4e3aa0
A
4264kern_return_t
4265vm_object_populate_with_private(
55e303ae 4266 vm_object_t object,
0b4e3aa0 4267 vm_object_offset_t offset,
55e303ae
A
4268 ppnum_t phys_page,
4269 vm_size_t size)
0b4e3aa0 4270{
55e303ae 4271 ppnum_t base_page;
0b4e3aa0
A
4272 vm_object_offset_t base_offset;
4273
4274
4275 if(!object->private)
4276 return KERN_FAILURE;
4277
55e303ae 4278 base_page = phys_page;
0b4e3aa0
A
4279
4280 vm_object_lock(object);
4281 if(!object->phys_contiguous) {
4282 vm_page_t m;
55e303ae 4283 if((base_offset = trunc_page_64(offset)) != offset) {
0b4e3aa0
A
4284 vm_object_unlock(object);
4285 return KERN_FAILURE;
4286 }
4287 base_offset += object->paging_offset;
4288 while(size) {
4289 m = vm_page_lookup(object, base_offset);
4290 if(m != VM_PAGE_NULL) {
4291 if(m->fictitious) {
4292 vm_page_lock_queues();
4293 m->fictitious = FALSE;
4294 m->private = TRUE;
55e303ae 4295 m->phys_page = base_page;
0b4e3aa0
A
4296 if(!m->busy) {
4297 m->busy = TRUE;
4298 }
4299 if(!m->absent) {
4300 m->absent = TRUE;
4301 object->absent_count++;
4302 }
4303 m->list_req_pending = TRUE;
4304 vm_page_unlock_queues();
55e303ae 4305 } else if (m->phys_page != base_page) {
0b4e3aa0 4306 /* pmap call to clear old mapping */
91447636 4307 pmap_disconnect(m->phys_page);
55e303ae 4308 m->phys_page = base_page;
0b4e3aa0 4309 }
91447636
A
4310
4311 /*
4312 * ENCRYPTED SWAP:
4313 * We're not pointing to the same
4314 * physical page any longer and the
4315 * contents of the new one are not
4316 * supposed to be encrypted.
4317 * XXX What happens to the original
4318 * physical page. Is it lost ?
4319 */
4320 m->encrypted = FALSE;
4321
0b4e3aa0
A
4322 } else {
4323 while ((m = vm_page_grab_fictitious())
4324 == VM_PAGE_NULL)
4325 vm_page_more_fictitious();
4326 vm_page_lock_queues();
4327 m->fictitious = FALSE;
4328 m->private = TRUE;
55e303ae 4329 m->phys_page = base_page;
0b4e3aa0
A
4330 m->list_req_pending = TRUE;
4331 m->absent = TRUE;
4332 m->unusual = TRUE;
4333 object->absent_count++;
4334 vm_page_unlock_queues();
4335 vm_page_insert(m, object, base_offset);
4336 }
55e303ae 4337 base_page++; /* Go to the next physical page */
0b4e3aa0
A
4338 base_offset += PAGE_SIZE;
4339 size -= PAGE_SIZE;
4340 }
4341 } else {
4342 /* NOTE: we should check the original settings here */
4343 /* if we have a size > zero a pmap call should be made */
4344 /* to disable the range */
4345
4346 /* pmap_? */
4347
4348 /* shadows on contiguous memory are not allowed */
4349 /* we therefore can use the offset field */
55e303ae 4350 object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
0b4e3aa0
A
4351 object->size = size;
4352 }
4353 vm_object_unlock(object);
4354 return KERN_SUCCESS;
4355}
4356
1c79356b
A
4357/*
4358 * memory_object_free_from_cache:
4359 *
4360 * Walk the vm_object cache list, removing and freeing vm_objects
c0fea474 4361 * which are backed by the pager identified by the caller, (pager_ops).
1c79356b
A
4362 * Remove up to "count" objects, if there are that may available
4363 * in the cache.
0b4e3aa0 4364 *
1c79356b
A
4365 * Walk the list at most once, return the number of vm_objects
4366 * actually freed.
1c79356b
A
4367 */
4368
0b4e3aa0 4369__private_extern__ kern_return_t
1c79356b 4370memory_object_free_from_cache(
91447636 4371 __unused host_t host,
c0fea474 4372 memory_object_pager_ops_t pager_ops,
1c79356b
A
4373 int *count)
4374{
4375
4376 int object_released = 0;
1c79356b
A
4377
4378 register vm_object_t object = VM_OBJECT_NULL;
4379 vm_object_t shadow;
4380
4381/*
4382 if(host == HOST_NULL)
4383 return(KERN_INVALID_ARGUMENT);
4384*/
4385
4386 try_again:
4387 vm_object_cache_lock();
4388
4389 queue_iterate(&vm_object_cached_list, object,
4390 vm_object_t, cached_list) {
c0fea474
A
4391 if (object->pager &&
4392 (pager_ops == object->pager->mo_pager_ops)) {
1c79356b
A
4393 vm_object_lock(object);
4394 queue_remove(&vm_object_cached_list, object,
4395 vm_object_t, cached_list);
4396 vm_object_cached_count--;
4397
4398 /*
4399 * Since this object is in the cache, we know
0b4e3aa0
A
4400 * that it is initialized and has only a pager's
4401 * (implicit) reference. Take a reference to avoid
4402 * recursive deallocations.
1c79356b
A
4403 */
4404
4405 assert(object->pager_initialized);
4406 assert(object->ref_count == 0);
4407 object->ref_count++;
4408
4409 /*
4410 * Terminate the object.
4411 * If the object had a shadow, we let
4412 * vm_object_deallocate deallocate it.
4413 * "pageout" objects have a shadow, but
4414 * maintain a "paging reference" rather
4415 * than a normal reference.
4416 * (We are careful here to limit recursion.)
4417 */
4418 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4419 if ((vm_object_terminate(object) == KERN_SUCCESS)
4420 && (shadow != VM_OBJECT_NULL)) {
4421 vm_object_deallocate(shadow);
4422 }
4423
4424 if(object_released++ == *count)
4425 return KERN_SUCCESS;
4426 goto try_again;
4427 }
4428 }
4429 vm_object_cache_unlock();
4430 *count = object_released;
4431 return KERN_SUCCESS;
4432}
4433
0b4e3aa0 4434
1c79356b
A
4435
4436kern_return_t
0b4e3aa0
A
4437memory_object_create_named(
4438 memory_object_t pager,
4439 memory_object_offset_t size,
4440 memory_object_control_t *control)
1c79356b 4441{
0b4e3aa0
A
4442 vm_object_t object;
4443 vm_object_hash_entry_t entry;
1c79356b 4444
0b4e3aa0
A
4445 *control = MEMORY_OBJECT_CONTROL_NULL;
4446 if (pager == MEMORY_OBJECT_NULL)
4447 return KERN_INVALID_ARGUMENT;
1c79356b 4448
0b4e3aa0
A
4449 vm_object_cache_lock();
4450 entry = vm_object_hash_lookup(pager, FALSE);
4451 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4452 (entry->object != VM_OBJECT_NULL)) {
4453 if (entry->object->named == TRUE)
4454 panic("memory_object_create_named: caller already holds the right"); }
1c79356b 4455
0b4e3aa0
A
4456 vm_object_cache_unlock();
4457 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4458 == VM_OBJECT_NULL) {
4459 return(KERN_INVALID_OBJECT);
4460 }
4461
4462 /* wait for object (if any) to be ready */
4463 if (object != VM_OBJECT_NULL) {
4464 vm_object_lock(object);
4465 object->named = TRUE;
4466 while (!object->pager_ready) {
9bccf70c
A
4467 vm_object_sleep(object,
4468 VM_OBJECT_EVENT_PAGER_READY,
4469 THREAD_UNINT);
0b4e3aa0 4470 }
91447636 4471 *control = object->pager_control;
0b4e3aa0
A
4472 vm_object_unlock(object);
4473 }
4474 return (KERN_SUCCESS);
4475}
1c79356b 4476
1c79356b 4477
0b4e3aa0
A
4478/*
4479 * Routine: memory_object_recover_named [user interface]
4480 * Purpose:
4481 * Attempt to recover a named reference for a VM object.
4482 * VM will verify that the object has not already started
4483 * down the termination path, and if it has, will optionally
4484 * wait for that to finish.
4485 * Returns:
4486 * KERN_SUCCESS - we recovered a named reference on the object
4487 * KERN_FAILURE - we could not recover a reference (object dead)
4488 * KERN_INVALID_ARGUMENT - bad memory object control
4489 */
4490kern_return_t
4491memory_object_recover_named(
4492 memory_object_control_t control,
4493 boolean_t wait_on_terminating)
4494{
4495 vm_object_t object;
1c79356b 4496
0b4e3aa0
A
4497 vm_object_cache_lock();
4498 object = memory_object_control_to_vm_object(control);
4499 if (object == VM_OBJECT_NULL) {
4500 vm_object_cache_unlock();
4501 return (KERN_INVALID_ARGUMENT);
4502 }
1c79356b 4503
0b4e3aa0
A
4504restart:
4505 vm_object_lock(object);
1c79356b 4506
0b4e3aa0
A
4507 if (object->terminating && wait_on_terminating) {
4508 vm_object_cache_unlock();
4509 vm_object_wait(object,
4510 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4511 THREAD_UNINT);
4512 vm_object_cache_lock();
4513 goto restart;
4514 }
4515
4516 if (!object->alive) {
4517 vm_object_cache_unlock();
4518 vm_object_unlock(object);
4519 return KERN_FAILURE;
1c79356b
A
4520 }
4521
0b4e3aa0
A
4522 if (object->named == TRUE) {
4523 vm_object_cache_unlock();
4524 vm_object_unlock(object);
4525 return KERN_SUCCESS;
4526 }
1c79356b 4527
0b4e3aa0
A
4528 if((object->ref_count == 0) && (!object->terminating)){
4529 queue_remove(&vm_object_cached_list, object,
4530 vm_object_t, cached_list);
4531 vm_object_cached_count--;
4532 XPR(XPR_VM_OBJECT_CACHE,
4533 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4534 (integer_t)object,
4535 (integer_t)vm_object_cached_list.next,
4536 (integer_t)vm_object_cached_list.prev, 0,0);
4537 }
4538
4539 vm_object_cache_unlock();
4540
4541 object->named = TRUE;
4542 object->ref_count++;
4543 vm_object_res_reference(object);
4544 while (!object->pager_ready) {
9bccf70c
A
4545 vm_object_sleep(object,
4546 VM_OBJECT_EVENT_PAGER_READY,
4547 THREAD_UNINT);
0b4e3aa0
A
4548 }
4549 vm_object_unlock(object);
4550 return (KERN_SUCCESS);
1c79356b
A
4551}
4552
0b4e3aa0
A
4553
4554/*
4555 * vm_object_release_name:
4556 *
4557 * Enforces name semantic on memory_object reference count decrement
4558 * This routine should not be called unless the caller holds a name
4559 * reference gained through the memory_object_create_named.
4560 *
4561 * If the TERMINATE_IDLE flag is set, the call will return if the
4562 * reference count is not 1. i.e. idle with the only remaining reference
4563 * being the name.
4564 * If the decision is made to proceed the name field flag is set to
4565 * false and the reference count is decremented. If the RESPECT_CACHE
4566 * flag is set and the reference count has gone to zero, the
4567 * memory_object is checked to see if it is cacheable otherwise when
4568 * the reference count is zero, it is simply terminated.
4569 */
4570
4571__private_extern__ kern_return_t
4572vm_object_release_name(
4573 vm_object_t object,
4574 int flags)
1c79356b 4575{
0b4e3aa0
A
4576 vm_object_t shadow;
4577 boolean_t original_object = TRUE;
1c79356b 4578
0b4e3aa0 4579 while (object != VM_OBJECT_NULL) {
1c79356b 4580
0b4e3aa0
A
4581 /*
4582 * The cache holds a reference (uncounted) to
4583 * the object. We must locke it before removing
4584 * the object.
4585 *
4586 */
4587
1c79356b 4588 vm_object_cache_lock();
0b4e3aa0
A
4589 vm_object_lock(object);
4590 assert(object->alive);
4591 if(original_object)
4592 assert(object->named);
4593 assert(object->ref_count > 0);
4594
4595 /*
4596 * We have to wait for initialization before
4597 * destroying or caching the object.
4598 */
4599
4600 if (object->pager_created && !object->pager_initialized) {
4601 assert(!object->can_persist);
4602 vm_object_assert_wait(object,
4603 VM_OBJECT_EVENT_INITIALIZED,
4604 THREAD_UNINT);
4605 vm_object_unlock(object);
4606 vm_object_cache_unlock();
9bccf70c 4607 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0 4608 continue;
1c79356b
A
4609 }
4610
0b4e3aa0
A
4611 if (((object->ref_count > 1)
4612 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4613 || (object->terminating)) {
4614 vm_object_unlock(object);
4615 vm_object_cache_unlock();
4616 return KERN_FAILURE;
4617 } else {
4618 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4619 vm_object_unlock(object);
4620 vm_object_cache_unlock();
4621 return KERN_SUCCESS;
1c79356b 4622 }
0b4e3aa0
A
4623 }
4624
4625 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4626 (object->ref_count == 1)) {
4627 if(original_object)
4628 object->named = FALSE;
1c79356b 4629 vm_object_unlock(object);
0b4e3aa0
A
4630 vm_object_cache_unlock();
4631 /* let vm_object_deallocate push this thing into */
4632 /* the cache, if that it is where it is bound */
4633 vm_object_deallocate(object);
4634 return KERN_SUCCESS;
4635 }
4636 VM_OBJ_RES_DECR(object);
4637 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4638 if(object->ref_count == 1) {
4639 if(vm_object_terminate(object) != KERN_SUCCESS) {
4640 if(original_object) {
4641 return KERN_FAILURE;
4642 } else {
4643 return KERN_SUCCESS;
4644 }
4645 }
4646 if (shadow != VM_OBJECT_NULL) {
4647 original_object = FALSE;
4648 object = shadow;
4649 continue;
4650 }
4651 return KERN_SUCCESS;
4652 } else {
4653 object->ref_count--;
4654 assert(object->ref_count > 0);
4655 if(original_object)
4656 object->named = FALSE;
4657 vm_object_unlock(object);
4658 vm_object_cache_unlock();
4659 return KERN_SUCCESS;
1c79356b 4660 }
1c79356b 4661 }
91447636
A
4662 /*NOTREACHED*/
4663 assert(0);
4664 return KERN_FAILURE;
1c79356b
A
4665}
4666
0b4e3aa0
A
4667
4668__private_extern__ kern_return_t
4669vm_object_lock_request(
4670 vm_object_t object,
4671 vm_object_offset_t offset,
4672 vm_object_size_t size,
4673 memory_object_return_t should_return,
4674 int flags,
4675 vm_prot_t prot)
1c79356b 4676{
91447636
A
4677 __unused boolean_t should_flush;
4678
4679 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
1c79356b 4680
0b4e3aa0
A
4681 XPR(XPR_MEMORY_OBJECT,
4682 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4683 (integer_t)object, offset, size,
4684 (((should_return&1)<<1)|should_flush), prot);
1c79356b 4685
0b4e3aa0
A
4686 /*
4687 * Check for bogus arguments.
4688 */
4689 if (object == VM_OBJECT_NULL)
4690 return (KERN_INVALID_ARGUMENT);
1c79356b 4691
0b4e3aa0
A
4692 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4693 return (KERN_INVALID_ARGUMENT);
1c79356b 4694
55e303ae 4695 size = round_page_64(size);
0b4e3aa0
A
4696
4697 /*
4698 * Lock the object, and acquire a paging reference to
4699 * prevent the memory_object reference from being released.
4700 */
4701 vm_object_lock(object);
4702 vm_object_paging_begin(object);
0b4e3aa0
A
4703
4704 (void)vm_object_update(object,
91447636 4705 offset, size, NULL, NULL, should_return, flags, prot);
0b4e3aa0
A
4706
4707 vm_object_paging_end(object);
4708 vm_object_unlock(object);
4709
4710 return (KERN_SUCCESS);
4711}
4712
91447636
A
4713/*
4714 * Empty a purgable object by grabbing the physical pages assigned to it and
4715 * putting them on the free queue without writing them to backing store, etc.
4716 * When the pages are next touched they will be demand zero-fill pages. We
4717 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
4718 * skip referenced/dirty pages, pages on the active queue, etc. We're more
4719 * than happy to grab these since this is a purgable object. We mark the
4720 * object as "empty" after reaping its pages.
4721 *
4722 * On entry the object and page queues are locked, the object must be a
4723 * purgable object with no delayed copies pending.
4724 */
4725unsigned int
4726vm_object_purge(vm_object_t object)
4727{
4728 vm_page_t p, next;
4729 unsigned int num_purged_pages;
4730 vm_page_t local_freeq;
4731 unsigned long local_freed;
4732 int purge_loop_quota;
4733/* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
4734#define PURGE_BATCH_FREE_LIMIT 50
4735/* release page queues lock every PURGE_LOOP_QUOTA iterations */
4736#define PURGE_LOOP_QUOTA 100
4737
4738 num_purged_pages = 0;
4739 if (object->purgable == VM_OBJECT_NONPURGABLE)
4740 return num_purged_pages;
0b4e3aa0 4741
91447636
A
4742 object->purgable = VM_OBJECT_PURGABLE_EMPTY;
4743
4744 assert(object->copy == VM_OBJECT_NULL);
4745 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4746 purge_loop_quota = PURGE_LOOP_QUOTA;
4747
4748 local_freeq = VM_PAGE_NULL;
4749 local_freed = 0;
4750
4751 /*
4752 * Go through the object's resident pages and try and discard them.
4753 */
4754 next = (vm_page_t)queue_first(&object->memq);
4755 while (!queue_end(&object->memq, (queue_entry_t)next)) {
4756 p = next;
4757 next = (vm_page_t)queue_next(&next->listq);
4758
4759 if (purge_loop_quota-- == 0) {
4760 /*
4761 * Avoid holding the page queues lock for too long.
4762 * Let someone else take it for a while if needed.
4763 * Keep holding the object's lock to guarantee that
4764 * the object's page list doesn't change under us
4765 * while we yield.
4766 */
4767 if (local_freeq != VM_PAGE_NULL) {
4768 /*
4769 * Flush our queue of pages to free.
4770 */
4771 vm_page_free_list(local_freeq);
4772 local_freeq = VM_PAGE_NULL;
4773 local_freed = 0;
4774 }
4775 vm_page_unlock_queues();
4776 mutex_pause();
4777 vm_page_lock_queues();
4778
4779 /* resume with the current page and a new quota */
4780 purge_loop_quota = PURGE_LOOP_QUOTA;
4781 }
4782
4783
4784 if (p->busy || p->cleaning || p->laundry ||
4785 p->list_req_pending) {
4786 /* page is being acted upon, so don't mess with it */
4787 continue;
4788 }
4789 if (p->wire_count) {
4790 /* don't discard a wired page */
4791 continue;
4792 }
4793
4794 if (p->tabled) {
4795 /* clean up the object/offset table */
4796 vm_page_remove(p);
4797 }
4798 if (p->absent) {
4799 /* update the object's count of absent pages */
4800 vm_object_absent_release(object);
4801 }
4802
4803 /* we can discard this page */
4804
4805 /* advertize that this page is in a transition state */
4806 p->busy = TRUE;
4807
4808 if (p->no_isync == TRUE) {
4809 /* the page hasn't been mapped yet */
4810 /* (optimization to delay the i-cache sync) */
4811 } else {
4812 /* unmap the page */
4813 int refmod_state;
4814
4815 refmod_state = pmap_disconnect(p->phys_page);
4816 if (refmod_state & VM_MEM_MODIFIED) {
4817 p->dirty = TRUE;
4818 }
4819 }
4820
4821 if (p->dirty || p->precious) {
4822 /* we saved the cost of cleaning this page ! */
4823 num_purged_pages++;
4824 vm_page_purged_count++;
4825 }
4826
4827 /* remove page from active or inactive queue... */
4828 VM_PAGE_QUEUES_REMOVE(p);
4829
4830 /* ... and put it on our queue of pages to free */
4831 assert(!p->laundry);
4832 assert(p->object != kernel_object);
4833 assert(p->pageq.next == NULL &&
4834 p->pageq.prev == NULL);
4835 p->pageq.next = (queue_entry_t) local_freeq;
4836 local_freeq = p;
4837 if (++local_freed >= PURGE_BATCH_FREE_LIMIT) {
4838 /* flush our queue of pages to free */
4839 vm_page_free_list(local_freeq);
4840 local_freeq = VM_PAGE_NULL;
4841 local_freed = 0;
4842 }
4843 }
4844
4845 /* flush our local queue of pages to free one last time */
4846 if (local_freeq != VM_PAGE_NULL) {
4847 vm_page_free_list(local_freeq);
4848 local_freeq = VM_PAGE_NULL;
4849 local_freed = 0;
4850 }
4851
4852 return num_purged_pages;
4853}
4854
4855/*
4856 * vm_object_purgable_control() allows the caller to control and investigate the
4857 * state of a purgable object. A purgable object is created via a call to
4858 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will
4859 * never be coalesced with any other object -- even other purgable objects --
4860 * and will thus always remain a distinct object. A purgable object has
4861 * special semantics when its reference count is exactly 1. If its reference
4862 * count is greater than 1, then a purgable object will behave like a normal
4863 * object and attempts to use this interface will result in an error return
4864 * of KERN_INVALID_ARGUMENT.
4865 *
4866 * A purgable object may be put into a "volatile" state which will make the
4867 * object's pages elligable for being reclaimed without paging to backing
4868 * store if the system runs low on memory. If the pages in a volatile
4869 * purgable object are reclaimed, the purgable object is said to have been
4870 * "emptied." When a purgable object is emptied the system will reclaim as
4871 * many pages from the object as it can in a convenient manner (pages already
4872 * en route to backing store or busy for other reasons are left as is). When
4873 * a purgable object is made volatile, its pages will generally be reclaimed
4874 * before other pages in the application's working set. This semantic is
4875 * generally used by applications which can recreate the data in the object
4876 * faster than it can be paged in. One such example might be media assets
4877 * which can be reread from a much faster RAID volume.
4878 *
4879 * A purgable object may be designated as "non-volatile" which means it will
4880 * behave like all other objects in the system with pages being written to and
4881 * read from backing store as needed to satisfy system memory needs. If the
4882 * object was emptied before the object was made non-volatile, that fact will
4883 * be returned as the old state of the purgable object (see
4884 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
4885 * were reclaimed as part of emptying the object will be refaulted in as
4886 * zero-fill on demand. It is up to the application to note that an object
4887 * was emptied and recreate the objects contents if necessary. When a
4888 * purgable object is made non-volatile, its pages will generally not be paged
4889 * out to backing store in the immediate future. A purgable object may also
4890 * be manually emptied.
4891 *
4892 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
4893 * volatile purgable object may be queried at any time. This information may
4894 * be used as a control input to let the application know when the system is
4895 * experiencing memory pressure and is reclaiming memory.
4896 *
4897 * The specified address may be any address within the purgable object. If
4898 * the specified address does not represent any object in the target task's
4899 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
4900 * object containing the specified address is not a purgable object, then
4901 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
4902 * returned.
4903 *
4904 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
4905 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
4906 * state is used to set the new state of the purgable object and return its
4907 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable
4908 * object is returned in the parameter state.
4909 *
4910 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
4911 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
4912 * the non-volatile, volatile and volatile/empty states described above.
4913 * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
4914 * immediately reclaim as many pages in the object as can be conveniently
4915 * collected (some may have already been written to backing store or be
4916 * otherwise busy).
4917 *
4918 * The process of making a purgable object non-volatile and determining its
4919 * previous state is atomic. Thus, if a purgable object is made
4920 * VM_PURGABLE_NONVOLATILE and the old state is returned as
4921 * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
4922 * completely intact and will remain so until the object is made volatile
4923 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
4924 * was reclaimed while it was in a volatile state and its previous contents
4925 * have been lost.
4926 */
4927/*
4928 * The object must be locked.
4929 */
4930kern_return_t
4931vm_object_purgable_control(
4932 vm_object_t object,
4933 vm_purgable_t control,
4934 int *state)
4935{
4936 int old_state;
4937 vm_page_t p;
4938
4939 if (object == VM_OBJECT_NULL) {
4940 /*
4941 * Object must already be present or it can't be purgable.
4942 */
4943 return KERN_INVALID_ARGUMENT;
4944 }
4945
4946 /*
4947 * Get current state of the purgable object.
4948 */
4949 switch (object->purgable) {
4950 case VM_OBJECT_NONPURGABLE:
4951 return KERN_INVALID_ARGUMENT;
4952
4953 case VM_OBJECT_PURGABLE_NONVOLATILE:
4954 old_state = VM_PURGABLE_NONVOLATILE;
4955 break;
4956
4957 case VM_OBJECT_PURGABLE_VOLATILE:
4958 old_state = VM_PURGABLE_VOLATILE;
4959 break;
4960
4961 case VM_OBJECT_PURGABLE_EMPTY:
4962 old_state = VM_PURGABLE_EMPTY;
4963 break;
4964
4965 default:
4966 old_state = VM_PURGABLE_NONVOLATILE;
4967 panic("Bad state (%d) for purgable object!\n",
4968 object->purgable);
4969 /*NOTREACHED*/
4970 }
4971
4972 /* purgable cant have delayed copies - now or in the future */
4973 assert(object->copy == VM_OBJECT_NULL);
4974 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4975
4976 /*
4977 * Execute the desired operation.
4978 */
4979 if (control == VM_PURGABLE_GET_STATE) {
4980 *state = old_state;
4981 return KERN_SUCCESS;
4982 }
4983
4984 switch (*state) {
4985 case VM_PURGABLE_NONVOLATILE:
4986 vm_page_lock_queues();
4987 if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) {
4988 assert(vm_page_purgeable_count >=
4989 object->resident_page_count);
4990 vm_page_purgeable_count -= object->resident_page_count;
4991 }
4992
4993 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
4994
4995 /*
4996 * If the object wasn't emptied, then mark all pages of the
4997 * object as referenced in order to give them a complete turn
4998 * of the virtual memory "clock" before becoming candidates
4999 * for paging out (if the system is suffering from memory
5000 * pressure). We don't really need to set the pmap reference
5001 * bits (which would be expensive) since the software copies
5002 * are believed if they're set to true ...
5003 */
5004 if (old_state != VM_PURGABLE_EMPTY) {
5005 for (p = (vm_page_t)queue_first(&object->memq);
5006 !queue_end(&object->memq, (queue_entry_t)p);
5007 p = (vm_page_t)queue_next(&p->listq))
5008 p->reference = TRUE;
5009 }
5010
5011 vm_page_unlock_queues();
5012
5013 break;
5014
5015 case VM_PURGABLE_VOLATILE:
5016 vm_page_lock_queues();
5017
5018 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5019 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5020 vm_page_purgeable_count += object->resident_page_count;
5021 }
5022
5023 object->purgable = VM_OBJECT_PURGABLE_VOLATILE;
5024
5025 /*
5026 * We want the newly volatile purgable object to be a
5027 * candidate for the pageout scan before other pages in the
5028 * application if the system is suffering from memory
5029 * pressure. To do this, we move a page of the object from
5030 * the active queue onto the inactive queue in order to
5031 * promote the object for early reclaim. We only need to move
5032 * a single page since the pageout scan will reap the entire
5033 * purgable object if it finds a single page in a volatile
5034 * state. Obviously we don't do this if there are no pages
5035 * associated with the object or we find a page of the object
5036 * already on the inactive queue.
5037 */
5038 for (p = (vm_page_t)queue_first(&object->memq);
5039 !queue_end(&object->memq, (queue_entry_t)p);
5040 p = (vm_page_t)queue_next(&p->listq)) {
5041 if (p->inactive) {
5042 /* already a page on the inactive queue */
5043 break;
5044 }
5045 if (p->active && !p->busy) {
5046 /* found one we can move */
5047 vm_page_deactivate(p);
5048 break;
5049 }
5050 }
5051 vm_page_unlock_queues();
5052
5053 break;
5054
5055
5056 case VM_PURGABLE_EMPTY:
5057 vm_page_lock_queues();
5058 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5059 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5060 vm_page_purgeable_count += object->resident_page_count;
5061 }
5062 (void) vm_object_purge(object);
5063 vm_page_unlock_queues();
5064 break;
5065
5066 }
5067 *state = old_state;
5068
5069 return KERN_SUCCESS;
5070}
0b4e3aa0
A
5071
5072#if TASK_SWAPPER
5073/*
5074 * vm_object_res_deallocate
5075 *
5076 * (recursively) decrement residence counts on vm objects and their shadows.
5077 * Called from vm_object_deallocate and when swapping out an object.
5078 *
5079 * The object is locked, and remains locked throughout the function,
5080 * even as we iterate down the shadow chain. Locks on intermediate objects
5081 * will be dropped, but not the original object.
5082 *
5083 * NOTE: this function used to use recursion, rather than iteration.
5084 */
5085
5086__private_extern__ void
5087vm_object_res_deallocate(
5088 vm_object_t object)
5089{
5090 vm_object_t orig_object = object;
5091 /*
5092 * Object is locked so it can be called directly
5093 * from vm_object_deallocate. Original object is never
5094 * unlocked.
5095 */
5096 assert(object->res_count > 0);
5097 while (--object->res_count == 0) {
5098 assert(object->ref_count >= object->res_count);
5099 vm_object_deactivate_all_pages(object);
5100 /* iterate on shadow, if present */
5101 if (object->shadow != VM_OBJECT_NULL) {
5102 vm_object_t tmp_object = object->shadow;
5103 vm_object_lock(tmp_object);
5104 if (object != orig_object)
5105 vm_object_unlock(object);
5106 object = tmp_object;
5107 assert(object->res_count > 0);
5108 } else
5109 break;
5110 }
5111 if (object != orig_object)
1c79356b 5112 vm_object_unlock(object);
0b4e3aa0
A
5113}
5114
5115/*
5116 * vm_object_res_reference
5117 *
5118 * Internal function to increment residence count on a vm object
5119 * and its shadows. It is called only from vm_object_reference, and
5120 * when swapping in a vm object, via vm_map_swap.
5121 *
5122 * The object is locked, and remains locked throughout the function,
5123 * even as we iterate down the shadow chain. Locks on intermediate objects
5124 * will be dropped, but not the original object.
5125 *
5126 * NOTE: this function used to use recursion, rather than iteration.
5127 */
5128
5129__private_extern__ void
5130vm_object_res_reference(
5131 vm_object_t object)
5132{
5133 vm_object_t orig_object = object;
5134 /*
5135 * Object is locked, so this can be called directly
5136 * from vm_object_reference. This lock is never released.
5137 */
5138 while ((++object->res_count == 1) &&
5139 (object->shadow != VM_OBJECT_NULL)) {
5140 vm_object_t tmp_object = object->shadow;
5141
5142 assert(object->ref_count >= object->res_count);
5143 vm_object_lock(tmp_object);
5144 if (object != orig_object)
5145 vm_object_unlock(object);
5146 object = tmp_object;
1c79356b 5147 }
0b4e3aa0
A
5148 if (object != orig_object)
5149 vm_object_unlock(object);
5150 assert(orig_object->ref_count >= orig_object->res_count);
1c79356b 5151}
0b4e3aa0
A
5152#endif /* TASK_SWAPPER */
5153
5154/*
5155 * vm_object_reference:
5156 *
5157 * Gets another reference to the given object.
5158 */
5159#ifdef vm_object_reference
5160#undef vm_object_reference
5161#endif
5162__private_extern__ void
5163vm_object_reference(
5164 register vm_object_t object)
5165{
5166 if (object == VM_OBJECT_NULL)
5167 return;
5168
5169 vm_object_lock(object);
5170 assert(object->ref_count > 0);
5171 vm_object_reference_locked(object);
5172 vm_object_unlock(object);
5173}
5174
1c79356b
A
5175#ifdef MACH_BSD
5176/*
5177 * Scale the vm_object_cache
5178 * This is required to make sure that the vm_object_cache is big
5179 * enough to effectively cache the mapped file.
5180 * This is really important with UBC as all the regular file vnodes
5181 * have memory object associated with them. Havving this cache too
5182 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5183 *
5184 * This is also needed as number of vnodes can be dynamically scaled.
5185 */
5186kern_return_t
91447636
A
5187adjust_vm_object_cache(
5188 __unused vm_size_t oval,
5189 vm_size_t nval)
1c79356b
A
5190{
5191 vm_object_cached_max = nval;
5192 vm_object_cache_trim(FALSE);
5193 return (KERN_SUCCESS);
5194}
5195#endif /* MACH_BSD */
5196
91447636
A
5197
5198/*
5199 * vm_object_transpose
5200 *
5201 * This routine takes two VM objects of the same size and exchanges
5202 * their backing store.
5203 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5204 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5205 *
5206 * The VM objects must not be locked by caller.
5207 */
5208kern_return_t
5209vm_object_transpose(
5210 vm_object_t object1,
5211 vm_object_t object2,
5212 vm_object_size_t transpose_size)
5213{
5214 vm_object_t tmp_object;
5215 kern_return_t retval;
5216 boolean_t object1_locked, object2_locked;
5217 boolean_t object1_paging, object2_paging;
5218 vm_page_t page;
5219 vm_object_offset_t page_offset;
5220
5221 tmp_object = VM_OBJECT_NULL;
5222 object1_locked = FALSE; object2_locked = FALSE;
5223 object1_paging = FALSE; object2_paging = FALSE;
5224
5225 if (object1 == object2 ||
5226 object1 == VM_OBJECT_NULL ||
5227 object2 == VM_OBJECT_NULL) {
5228 /*
5229 * If the 2 VM objects are the same, there's
5230 * no point in exchanging their backing store.
5231 */
5232 retval = KERN_INVALID_VALUE;
5233 goto done;
5234 }
5235
5236 vm_object_lock(object1);
5237 object1_locked = TRUE;
5238 if (object1->copy || object1->shadow || object1->shadowed ||
5239 object1->purgable != VM_OBJECT_NONPURGABLE) {
5240 /*
5241 * We don't deal with copy or shadow objects (yet).
5242 */
5243 retval = KERN_INVALID_VALUE;
5244 goto done;
5245 }
5246 /*
5247 * Since we're about to mess with the object's backing store,
5248 * mark it as "paging_in_progress". Note that this is not enough
5249 * to prevent any paging activity on this object, so the caller should
5250 * have "quiesced" the objects beforehand, via a UPL operation with
5251 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5252 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5253 */
5254 vm_object_paging_begin(object1);
5255 object1_paging = TRUE;
5256 vm_object_unlock(object1);
5257 object1_locked = FALSE;
5258
5259 /*
5260 * Same as above for the 2nd object...
5261 */
5262 vm_object_lock(object2);
5263 object2_locked = TRUE;
5264 if (object2->copy || object2->shadow || object2->shadowed ||
5265 object2->purgable != VM_OBJECT_NONPURGABLE) {
5266 retval = KERN_INVALID_VALUE;
5267 goto done;
5268 }
5269 vm_object_paging_begin(object2);
5270 object2_paging = TRUE;
5271 vm_object_unlock(object2);
5272 object2_locked = FALSE;
5273
5274 /*
5275 * Allocate a temporary VM object to hold object1's contents
5276 * while we copy object2 to object1.
5277 */
5278 tmp_object = vm_object_allocate(transpose_size);
5279 vm_object_lock(tmp_object);
5280 vm_object_paging_begin(tmp_object);
5281 tmp_object->can_persist = FALSE;
5282
5283 /*
5284 * Since we need to lock both objects at the same time,
5285 * make sure we always lock them in the same order to
5286 * avoid deadlocks.
5287 */
5288 if (object1 < object2) {
5289 vm_object_lock(object1);
5290 vm_object_lock(object2);
5291 } else {
5292 vm_object_lock(object2);
5293 vm_object_lock(object1);
5294 }
5295 object1_locked = TRUE;
5296 object2_locked = TRUE;
5297
5298 if (object1->size != object2->size ||
5299 object1->size != transpose_size) {
5300 /*
5301 * If the 2 objects don't have the same size, we can't
5302 * exchange their backing stores or one would overflow.
5303 * If their size doesn't match the caller's
5304 * "transpose_size", we can't do it either because the
5305 * transpose operation will affect the entire span of
5306 * the objects.
5307 */
5308 retval = KERN_INVALID_VALUE;
5309 goto done;
5310 }
5311
5312
5313 /*
5314 * Transpose the lists of resident pages.
5315 */
5316 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
5317 /*
5318 * No pages in object1, just transfer pages
5319 * from object2 to object1. No need to go through
5320 * an intermediate object.
5321 */
5322 while (!queue_empty(&object2->memq)) {
5323 page = (vm_page_t) queue_first(&object2->memq);
5324 vm_page_rename(page, object1, page->offset);
5325 }
5326 assert(queue_empty(&object2->memq));
5327 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
5328 /*
5329 * No pages in object2, just transfer pages
5330 * from object1 to object2. No need to go through
5331 * an intermediate object.
5332 */
5333 while (!queue_empty(&object1->memq)) {
5334 page = (vm_page_t) queue_first(&object1->memq);
5335 vm_page_rename(page, object2, page->offset);
5336 }
5337 assert(queue_empty(&object1->memq));
5338 } else {
5339 /* transfer object1's pages to tmp_object */
5340 vm_page_lock_queues();
5341 while (!queue_empty(&object1->memq)) {
5342 page = (vm_page_t) queue_first(&object1->memq);
5343 page_offset = page->offset;
5344 vm_page_remove(page);
5345 page->offset = page_offset;
5346 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
5347 }
5348 vm_page_unlock_queues();
5349 assert(queue_empty(&object1->memq));
5350 /* transfer object2's pages to object1 */
5351 while (!queue_empty(&object2->memq)) {
5352 page = (vm_page_t) queue_first(&object2->memq);
5353 vm_page_rename(page, object1, page->offset);
5354 }
5355 assert(queue_empty(&object2->memq));
5356 /* transfer tmp_object's pages to object1 */
5357 while (!queue_empty(&tmp_object->memq)) {
5358 page = (vm_page_t) queue_first(&tmp_object->memq);
5359 queue_remove(&tmp_object->memq, page,
5360 vm_page_t, listq);
5361 vm_page_insert(page, object2, page->offset);
5362 }
5363 assert(queue_empty(&tmp_object->memq));
5364 }
5365
5366 /* no need to transpose the size: they should be identical */
5367 assert(object1->size == object2->size);
5368
5369#define __TRANSPOSE_FIELD(field) \
5370MACRO_BEGIN \
5371 tmp_object->field = object1->field; \
5372 object1->field = object2->field; \
5373 object2->field = tmp_object->field; \
5374MACRO_END
5375
5376 assert(!object1->copy);
5377 assert(!object2->copy);
5378
5379 assert(!object1->shadow);
5380 assert(!object2->shadow);
5381
5382 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
5383 __TRANSPOSE_FIELD(pager);
5384 __TRANSPOSE_FIELD(paging_offset);
5385
5386 __TRANSPOSE_FIELD(pager_control);
5387 /* update the memory_objects' pointers back to the VM objects */
5388 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5389 memory_object_control_collapse(object1->pager_control,
5390 object1);
5391 }
5392 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5393 memory_object_control_collapse(object2->pager_control,
5394 object2);
5395 }
5396
5397 __TRANSPOSE_FIELD(absent_count);
5398
5399 assert(object1->paging_in_progress);
5400 assert(object2->paging_in_progress);
5401
5402 __TRANSPOSE_FIELD(pager_created);
5403 __TRANSPOSE_FIELD(pager_initialized);
5404 __TRANSPOSE_FIELD(pager_ready);
5405 __TRANSPOSE_FIELD(pager_trusted);
5406 __TRANSPOSE_FIELD(internal);
5407 __TRANSPOSE_FIELD(temporary);
5408 __TRANSPOSE_FIELD(private);
5409 __TRANSPOSE_FIELD(pageout);
5410 __TRANSPOSE_FIELD(true_share);
5411 __TRANSPOSE_FIELD(phys_contiguous);
5412 __TRANSPOSE_FIELD(nophyscache);
5413 __TRANSPOSE_FIELD(last_alloc);
5414 __TRANSPOSE_FIELD(sequential);
5415 __TRANSPOSE_FIELD(cluster_size);
5416 __TRANSPOSE_FIELD(existence_map);
5417 __TRANSPOSE_FIELD(cow_hint);
5418 __TRANSPOSE_FIELD(wimg_bits);
5419
5420#undef __TRANSPOSE_FIELD
5421
5422 retval = KERN_SUCCESS;
5423
5424done:
5425 /*
5426 * Cleanup.
5427 */
5428 if (tmp_object != VM_OBJECT_NULL) {
5429 vm_object_paging_end(tmp_object);
5430 vm_object_unlock(tmp_object);
5431 /*
5432 * Re-initialize the temporary object to avoid
5433 * deallocating a real pager.
5434 */
5435 _vm_object_allocate(transpose_size, tmp_object);
5436 vm_object_deallocate(tmp_object);
5437 tmp_object = VM_OBJECT_NULL;
5438 }
5439
5440 if (object1_locked) {
5441 vm_object_unlock(object1);
5442 object1_locked = FALSE;
5443 }
5444 if (object2_locked) {
5445 vm_object_unlock(object2);
5446 object2_locked = FALSE;
5447 }
5448 if (object1_paging) {
5449 vm_object_lock(object1);
5450 vm_object_paging_end(object1);
5451 vm_object_unlock(object1);
5452 object1_paging = FALSE;
5453 }
5454 if (object2_paging) {
5455 vm_object_lock(object2);
5456 vm_object_paging_end(object2);
5457 vm_object_unlock(object2);
5458 object2_paging = FALSE;
5459 }
5460
5461 return retval;
5462}
c0fea474
A
5463
5464
5465/* Allow manipulation of individual page state. This is actually part of */
5466/* the UPL regimen but takes place on the VM object rather than on a UPL */
5467
5468kern_return_t
5469vm_object_page_op(
5470 vm_object_t object,
5471 vm_object_offset_t offset,
5472 int ops,
5473 ppnum_t *phys_entry,
5474 int *flags)
5475{
5476 vm_page_t dst_page;
5477
5478 vm_object_lock(object);
5479
5480 if(ops & UPL_POP_PHYSICAL) {
5481 if(object->phys_contiguous) {
5482 if (phys_entry) {
5483 *phys_entry = (ppnum_t)
5484 (object->shadow_offset >> 12);
5485 }
5486 vm_object_unlock(object);
5487 return KERN_SUCCESS;
5488 } else {
5489 vm_object_unlock(object);
5490 return KERN_INVALID_OBJECT;
5491 }
5492 }
5493 if(object->phys_contiguous) {
5494 vm_object_unlock(object);
5495 return KERN_INVALID_OBJECT;
5496 }
5497
5498 while(TRUE) {
5499 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
5500 vm_object_unlock(object);
5501 return KERN_FAILURE;
5502 }
5503
5504 /* Sync up on getting the busy bit */
5505 if((dst_page->busy || dst_page->cleaning) &&
5506 (((ops & UPL_POP_SET) &&
5507 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
5508 /* someone else is playing with the page, we will */
5509 /* have to wait */
5510 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5511 continue;
5512 }
5513
5514 if (ops & UPL_POP_DUMP) {
5515 vm_page_lock_queues();
5516
5517 if (dst_page->no_isync == FALSE)
5518 pmap_disconnect(dst_page->phys_page);
5519 vm_page_free(dst_page);
5520
5521 vm_page_unlock_queues();
5522 break;
5523 }
5524
5525 if (flags) {
5526 *flags = 0;
5527
5528 /* Get the condition of flags before requested ops */
5529 /* are undertaken */
5530
5531 if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
5532 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
5533 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
5534 if(dst_page->absent) *flags |= UPL_POP_ABSENT;
5535 if(dst_page->busy) *flags |= UPL_POP_BUSY;
5536 }
5537
5538 /* The caller should have made a call either contingent with */
5539 /* or prior to this call to set UPL_POP_BUSY */
5540 if(ops & UPL_POP_SET) {
5541 /* The protection granted with this assert will */
5542 /* not be complete. If the caller violates the */
5543 /* convention and attempts to change page state */
5544 /* without first setting busy we may not see it */
5545 /* because the page may already be busy. However */
5546 /* if such violations occur we will assert sooner */
5547 /* or later. */
5548 assert(dst_page->busy || (ops & UPL_POP_BUSY));
5549 if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
5550 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
5551 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
5552 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
5553 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
5554 }
5555
5556 if(ops & UPL_POP_CLR) {
5557 assert(dst_page->busy);
5558 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
5559 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
5560 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
5561 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
5562 if (ops & UPL_POP_BUSY) {
5563 dst_page->busy = FALSE;
5564 PAGE_WAKEUP(dst_page);
5565 }
5566 }
5567
5568 if (dst_page->encrypted) {
5569 /*
5570 * ENCRYPTED SWAP:
5571 * We need to decrypt this encrypted page before the
5572 * caller can access its contents.
5573 * But if the caller really wants to access the page's
5574 * contents, they have to keep the page "busy".
5575 * Otherwise, the page could get recycled or re-encrypted
5576 * at any time.
5577 */
5578 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
5579 dst_page->busy) {
5580 /*
5581 * The page is stable enough to be accessed by
5582 * the caller, so make sure its contents are
5583 * not encrypted.
5584 */
5585 vm_page_decrypt(dst_page, 0);
5586 } else {
5587 /*
5588 * The page is not busy, so don't bother
5589 * decrypting it, since anything could
5590 * happen to it between now and when the
5591 * caller wants to access it.
5592 * We should not give the caller access
5593 * to this page.
5594 */
5595 assert(!phys_entry);
5596 }
5597 }
5598
5599 if (phys_entry) {
5600 /*
5601 * The physical page number will remain valid
5602 * only if the page is kept busy.
5603 * ENCRYPTED SWAP: make sure we don't let the
5604 * caller access an encrypted page.
5605 */
5606 assert(dst_page->busy);
5607 assert(!dst_page->encrypted);
5608 *phys_entry = dst_page->phys_page;
5609 }
5610
5611 break;
5612 }
5613
5614 vm_object_unlock(object);
5615 return KERN_SUCCESS;
5616
5617}
5618
5619/*
5620 * vm_object_range_op offers performance enhancement over
5621 * vm_object_page_op for page_op functions which do not require page
5622 * level state to be returned from the call. Page_op was created to provide
5623 * a low-cost alternative to page manipulation via UPLs when only a single
5624 * page was involved. The range_op call establishes the ability in the _op
5625 * family of functions to work on multiple pages where the lack of page level
5626 * state handling allows the caller to avoid the overhead of the upl structures.
5627 */
5628
5629kern_return_t
5630vm_object_range_op(
5631 vm_object_t object,
5632 vm_object_offset_t offset_beg,
5633 vm_object_offset_t offset_end,
5634 int ops,
5635 int *range)
5636{
5637 vm_object_offset_t offset;
5638 vm_page_t dst_page;
5639
5640 if (object->resident_page_count == 0) {
5641 if (range) {
5642 if (ops & UPL_ROP_PRESENT)
5643 *range = 0;
5644 else
5645 *range = offset_end - offset_beg;
5646 }
5647 return KERN_SUCCESS;
5648 }
5649 vm_object_lock(object);
5650
5651 if (object->phys_contiguous) {
5652 vm_object_unlock(object);
5653 return KERN_INVALID_OBJECT;
5654 }
5655
5656 offset = offset_beg;
5657
5658 while (offset < offset_end) {
5659 dst_page = vm_page_lookup(object, offset);
5660 if (dst_page != VM_PAGE_NULL) {
5661 if (ops & UPL_ROP_DUMP) {
5662 if (dst_page->busy || dst_page->cleaning) {
5663 /*
5664 * someone else is playing with the
5665 * page, we will have to wait
5666 */
5667 PAGE_SLEEP(object,
5668 dst_page, THREAD_UNINT);
5669 /*
5670 * need to relook the page up since it's
5671 * state may have changed while we slept
5672 * it might even belong to a different object
5673 * at this point
5674 */
5675 continue;
5676 }
5677 vm_page_lock_queues();
5678
5679 if (dst_page->no_isync == FALSE)
5680 pmap_disconnect(dst_page->phys_page);
5681 vm_page_free(dst_page);
5682
5683 vm_page_unlock_queues();
5684 } else if (ops & UPL_ROP_ABSENT)
5685 break;
5686 } else if (ops & UPL_ROP_PRESENT)
5687 break;
5688
5689 offset += PAGE_SIZE;
5690 }
5691 vm_object_unlock(object);
5692
5693 if (range)
5694 *range = offset - offset_beg;
5695
5696 return KERN_SUCCESS;
5697}