]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_object.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Virtual memory object module.
63 */
64
2d21ac55 65#include <debug.h>
1c79356b
A
66#include <mach_pagemap.h>
67#include <task_swapper.h>
68
0b4e3aa0 69#include <mach/mach_types.h>
1c79356b
A
70#include <mach/memory_object.h>
71#include <mach/memory_object_default.h>
72#include <mach/memory_object_control_server.h>
73#include <mach/vm_param.h>
91447636
A
74
75#include <ipc/ipc_types.h>
1c79356b 76#include <ipc/ipc_port.h>
91447636
A
77
78#include <kern/kern_types.h>
1c79356b
A
79#include <kern/assert.h>
80#include <kern/lock.h>
81#include <kern/queue.h>
82#include <kern/xpr.h>
83#include <kern/zalloc.h>
84#include <kern/host.h>
85#include <kern/host_statistics.h>
86#include <kern/processor.h>
91447636
A
87#include <kern/misc_protos.h>
88
1c79356b
A
89#include <vm/memory_object.h>
90#include <vm/vm_fault.h>
91#include <vm/vm_map.h>
92#include <vm/vm_object.h>
93#include <vm/vm_page.h>
94#include <vm/vm_pageout.h>
91447636 95#include <vm/vm_protos.h>
2d21ac55 96#include <vm/vm_purgeable_internal.h>
1c79356b 97
1c79356b
A
98/*
99 * Virtual memory objects maintain the actual data
100 * associated with allocated virtual memory. A given
101 * page of memory exists within exactly one object.
102 *
103 * An object is only deallocated when all "references"
0b4e3aa0 104 * are given up.
1c79356b
A
105 *
106 * Associated with each object is a list of all resident
107 * memory pages belonging to that object; this list is
108 * maintained by the "vm_page" module, but locked by the object's
109 * lock.
110 *
0b4e3aa0 111 * Each object also records the memory object reference
1c79356b 112 * that is used by the kernel to request and write
0b4e3aa0 113 * back data (the memory object, field "pager"), etc...
1c79356b
A
114 *
115 * Virtual memory objects are allocated to provide
116 * zero-filled memory (vm_allocate) or map a user-defined
117 * memory object into a virtual address space (vm_map).
118 *
119 * Virtual memory objects that refer to a user-defined
120 * memory object are called "permanent", because all changes
121 * made in virtual memory are reflected back to the
122 * memory manager, which may then store it permanently.
123 * Other virtual memory objects are called "temporary",
124 * meaning that changes need be written back only when
125 * necessary to reclaim pages, and that storage associated
126 * with the object can be discarded once it is no longer
127 * mapped.
128 *
129 * A permanent memory object may be mapped into more
130 * than one virtual address space. Moreover, two threads
131 * may attempt to make the first mapping of a memory
132 * object concurrently. Only one thread is allowed to
133 * complete this mapping; all others wait for the
134 * "pager_initialized" field is asserted, indicating
135 * that the first thread has initialized all of the
136 * necessary fields in the virtual memory object structure.
137 *
138 * The kernel relies on a *default memory manager* to
139 * provide backing storage for the zero-filled virtual
0b4e3aa0 140 * memory objects. The pager memory objects associated
1c79356b 141 * with these temporary virtual memory objects are only
0b4e3aa0
A
142 * requested from the default memory manager when it
143 * becomes necessary. Virtual memory objects
1c79356b
A
144 * that depend on the default memory manager are called
145 * "internal". The "pager_created" field is provided to
146 * indicate whether these ports have ever been allocated.
147 *
148 * The kernel may also create virtual memory objects to
149 * hold changed pages after a copy-on-write operation.
150 * In this case, the virtual memory object (and its
151 * backing storage -- its memory object) only contain
152 * those pages that have been changed. The "shadow"
153 * field refers to the virtual memory object that contains
154 * the remainder of the contents. The "shadow_offset"
155 * field indicates where in the "shadow" these contents begin.
156 * The "copy" field refers to a virtual memory object
157 * to which changed pages must be copied before changing
158 * this object, in order to implement another form
159 * of copy-on-write optimization.
160 *
161 * The virtual memory object structure also records
162 * the attributes associated with its memory object.
163 * The "pager_ready", "can_persist" and "copy_strategy"
164 * fields represent those attributes. The "cached_list"
165 * field is used in the implementation of the persistence
166 * attribute.
167 *
168 * ZZZ Continue this comment.
169 */
170
171/* Forward declarations for internal functions. */
0b4e3aa0 172static kern_return_t vm_object_terminate(
1c79356b
A
173 vm_object_t object);
174
175extern void vm_object_remove(
176 vm_object_t object);
177
0b4e3aa0 178static vm_object_t vm_object_cache_trim(
1c79356b
A
179 boolean_t called_from_vm_object_deallocate);
180
0b4e3aa0 181static void vm_object_deactivate_all_pages(
1c79356b
A
182 vm_object_t object);
183
0b4e3aa0 184static kern_return_t vm_object_copy_call(
1c79356b
A
185 vm_object_t src_object,
186 vm_object_offset_t src_offset,
187 vm_object_size_t size,
188 vm_object_t *_result_object);
189
0b4e3aa0 190static void vm_object_do_collapse(
1c79356b
A
191 vm_object_t object,
192 vm_object_t backing_object);
193
0b4e3aa0 194static void vm_object_do_bypass(
1c79356b
A
195 vm_object_t object,
196 vm_object_t backing_object);
197
0b4e3aa0
A
198static void vm_object_release_pager(
199 memory_object_t pager);
1c79356b 200
0b4e3aa0 201static zone_t vm_object_zone; /* vm backing store zone */
1c79356b
A
202
203/*
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
206 */
0b4e3aa0 207static struct vm_object kernel_object_store;
0c530ab8 208vm_object_t kernel_object;
1c79356b 209
2d21ac55 210
1c79356b
A
211/*
212 * The submap object is used as a placeholder for vm_map_submap
213 * operations. The object is declared in vm_map.c because it
214 * is exported by the vm_map module. The storage is declared
215 * here because it must be initialized here.
216 */
0b4e3aa0 217static struct vm_object vm_submap_object_store;
1c79356b
A
218
219/*
220 * Virtual memory objects are initialized from
221 * a template (see vm_object_allocate).
222 *
223 * When adding a new field to the virtual memory
224 * object structure, be sure to add initialization
0b4e3aa0 225 * (see _vm_object_allocate()).
1c79356b 226 */
0b4e3aa0 227static struct vm_object vm_object_template;
1c79356b
A
228
229/*
230 * Virtual memory objects that are not referenced by
231 * any address maps, but that are allowed to persist
232 * (an attribute specified by the associated memory manager),
233 * are kept in a queue (vm_object_cached_list).
234 *
235 * When an object from this queue is referenced again,
236 * for example to make another address space mapping,
237 * it must be removed from the queue. That is, the
238 * queue contains *only* objects with zero references.
239 *
240 * The kernel may choose to terminate objects from this
241 * queue in order to reclaim storage. The current policy
242 * is to permit a fixed maximum number of unreferenced
243 * objects (vm_object_cached_max).
244 *
245 * A spin lock (accessed by routines
246 * vm_object_cache_{lock,lock_try,unlock}) governs the
247 * object cache. It must be held when objects are
248 * added to or removed from the cache (in vm_object_terminate).
249 * The routines that acquire a reference to a virtual
250 * memory object based on one of the memory object ports
251 * must also lock the cache.
252 *
253 * Ideally, the object cache should be more isolated
254 * from the reference mechanism, so that the lock need
255 * not be held to make simple references.
256 */
0b4e3aa0 257static queue_head_t vm_object_cached_list;
9bccf70c 258static int vm_object_cached_count=0;
0b4e3aa0
A
259static int vm_object_cached_high; /* highest # cached objects */
260static int vm_object_cached_max = 512; /* may be patched*/
1c79356b 261
0b4e3aa0 262static decl_mutex_data(,vm_object_cached_lock_data)
1c79356b
A
263
264#define vm_object_cache_lock() \
265 mutex_lock(&vm_object_cached_lock_data)
266#define vm_object_cache_lock_try() \
267 mutex_try(&vm_object_cached_lock_data)
268#define vm_object_cache_unlock() \
269 mutex_unlock(&vm_object_cached_lock_data)
270
271#define VM_OBJECT_HASH_COUNT 1024
0b4e3aa0
A
272static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
273static struct zone *vm_object_hash_zone;
1c79356b
A
274
275struct vm_object_hash_entry {
276 queue_chain_t hash_link; /* hash chain link */
0b4e3aa0 277 memory_object_t pager; /* pager we represent */
1c79356b
A
278 vm_object_t object; /* corresponding object */
279 boolean_t waiting; /* someone waiting for
280 * termination */
281};
282
283typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
284#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
285
286#define VM_OBJECT_HASH_SHIFT 8
287#define vm_object_hash(pager) \
288 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
289
91447636
A
290void vm_object_hash_entry_free(
291 vm_object_hash_entry_t entry);
292
8f6c56a5
A
293static void vm_object_reap(vm_object_t object);
294static void vm_object_reap_async(vm_object_t object);
295static void vm_object_reaper_thread(void);
296static queue_head_t vm_object_reaper_queue; /* protected by vm_object_cache_lock() */
297unsigned int vm_object_reap_count = 0;
298unsigned int vm_object_reap_count_async = 0;
299
1c79356b
A
300/*
301 * vm_object_hash_lookup looks up a pager in the hashtable
302 * and returns the corresponding entry, with optional removal.
303 */
304
0b4e3aa0 305static vm_object_hash_entry_t
1c79356b 306vm_object_hash_lookup(
0b4e3aa0 307 memory_object_t pager,
1c79356b
A
308 boolean_t remove_entry)
309{
310 register queue_t bucket;
311 register vm_object_hash_entry_t entry;
312
313 bucket = &vm_object_hashtable[vm_object_hash(pager)];
314
315 entry = (vm_object_hash_entry_t)queue_first(bucket);
316 while (!queue_end(bucket, (queue_entry_t)entry)) {
317 if (entry->pager == pager && !remove_entry)
318 return(entry);
319 else if (entry->pager == pager) {
320 queue_remove(bucket, entry,
321 vm_object_hash_entry_t, hash_link);
322 return(entry);
323 }
324
325 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
326 }
327
328 return(VM_OBJECT_HASH_ENTRY_NULL);
329}
330
331/*
332 * vm_object_hash_enter enters the specified
333 * pager / cache object association in the hashtable.
334 */
335
0b4e3aa0 336static void
1c79356b
A
337vm_object_hash_insert(
338 vm_object_hash_entry_t entry)
339{
340 register queue_t bucket;
341
342 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
343
344 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
345}
346
0b4e3aa0 347static vm_object_hash_entry_t
1c79356b 348vm_object_hash_entry_alloc(
0b4e3aa0 349 memory_object_t pager)
1c79356b
A
350{
351 vm_object_hash_entry_t entry;
352
353 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
354 entry->pager = pager;
355 entry->object = VM_OBJECT_NULL;
356 entry->waiting = FALSE;
357
358 return(entry);
359}
360
361void
362vm_object_hash_entry_free(
363 vm_object_hash_entry_t entry)
364{
91447636 365 zfree(vm_object_hash_zone, entry);
1c79356b
A
366}
367
368/*
369 * vm_object_allocate:
370 *
371 * Returns a new object with the given size.
372 */
373
91447636 374__private_extern__ void
1c79356b
A
375_vm_object_allocate(
376 vm_object_size_t size,
377 vm_object_t object)
378{
379 XPR(XPR_VM_OBJECT,
380 "vm_object_allocate, object 0x%X size 0x%X\n",
381 (integer_t)object, size, 0,0,0);
382
383 *object = vm_object_template;
384 queue_init(&object->memq);
385 queue_init(&object->msr_q);
91447636 386#ifdef UPL_DEBUG
1c79356b 387 queue_init(&object->uplq);
91447636 388#endif /* UPL_DEBUG */
1c79356b
A
389 vm_object_lock_init(object);
390 object->size = size;
391}
392
0b4e3aa0 393__private_extern__ vm_object_t
1c79356b
A
394vm_object_allocate(
395 vm_object_size_t size)
396{
397 register vm_object_t object;
1c79356b
A
398
399 object = (vm_object_t) zalloc(vm_object_zone);
400
0b4e3aa0
A
401// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
402
403 if (object != VM_OBJECT_NULL)
404 _vm_object_allocate(size, object);
1c79356b
A
405
406 return object;
407}
408
2d21ac55
A
409
410lck_grp_t vm_object_lck_grp;
411lck_grp_attr_t vm_object_lck_grp_attr;
412lck_attr_t vm_object_lck_attr;
413lck_attr_t kernel_object_lck_attr;
414
1c79356b
A
415/*
416 * vm_object_bootstrap:
417 *
418 * Initialize the VM objects module.
419 */
0b4e3aa0 420__private_extern__ void
1c79356b
A
421vm_object_bootstrap(void)
422{
91447636 423 register int i;
1c79356b
A
424
425 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
55e303ae
A
426 round_page_32(512*1024),
427 round_page_32(12*1024),
1c79356b
A
428 "vm objects");
429
6601e61a 430 queue_init(&vm_object_reaper_queue);
1c79356b 431 queue_init(&vm_object_cached_list);
91447636 432 mutex_init(&vm_object_cached_lock_data, 0);
1c79356b
A
433
434 vm_object_hash_zone =
435 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
55e303ae
A
436 round_page_32(512*1024),
437 round_page_32(12*1024),
1c79356b
A
438 "vm object hash entries");
439
440 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
441 queue_init(&vm_object_hashtable[i]);
442
2d21ac55
A
443 vm_object_init_lck_grp();
444
1c79356b
A
445 /*
446 * Fill in a template object, for quick initialization
447 */
448
449 /* memq; Lock; init after allocation */
2d21ac55
A
450 vm_object_template.memq.prev = NULL;
451 vm_object_template.memq.next = NULL;
452#if 0
453 /*
454 * We can't call vm_object_lock_init() here because that will
455 * allocate some memory and VM is not fully initialized yet.
456 * The lock will be initialized for each allocate object in
457 * _vm_object_allocate(), so we don't need to initialize it in
458 * the vm_object_template.
459 */
460 vm_object_lock_init(&vm_object_template);
461#endif
1c79356b 462 vm_object_template.size = 0;
91447636 463 vm_object_template.memq_hint = VM_PAGE_NULL;
1c79356b
A
464 vm_object_template.ref_count = 1;
465#if TASK_SWAPPER
466 vm_object_template.res_count = 1;
467#endif /* TASK_SWAPPER */
468 vm_object_template.resident_page_count = 0;
469 vm_object_template.copy = VM_OBJECT_NULL;
470 vm_object_template.shadow = VM_OBJECT_NULL;
471 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
0b4e3aa0 472 vm_object_template.pager = MEMORY_OBJECT_NULL;
1c79356b 473 vm_object_template.paging_offset = 0;
91447636 474 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b 475 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
1c79356b
A
476 vm_object_template.paging_in_progress = 0;
477
478 /* Begin bitfields */
479 vm_object_template.all_wanted = 0; /* all bits FALSE */
480 vm_object_template.pager_created = FALSE;
481 vm_object_template.pager_initialized = FALSE;
482 vm_object_template.pager_ready = FALSE;
483 vm_object_template.pager_trusted = FALSE;
484 vm_object_template.can_persist = FALSE;
485 vm_object_template.internal = TRUE;
486 vm_object_template.temporary = TRUE;
487 vm_object_template.private = FALSE;
488 vm_object_template.pageout = FALSE;
489 vm_object_template.alive = TRUE;
2d21ac55
A
490 vm_object_template.purgable = VM_PURGABLE_DENY;
491 vm_object_template.shadowed = FALSE;
1c79356b
A
492 vm_object_template.silent_overwrite = FALSE;
493 vm_object_template.advisory_pageout = FALSE;
2d21ac55 494 vm_object_template.true_share = FALSE;
1c79356b 495 vm_object_template.terminating = FALSE;
2d21ac55 496 vm_object_template.named = FALSE;
1c79356b
A
497 vm_object_template.shadow_severed = FALSE;
498 vm_object_template.phys_contiguous = FALSE;
0b4e3aa0 499 vm_object_template.nophyscache = FALSE;
1c79356b
A
500 /* End bitfields */
501
2d21ac55
A
502 vm_object_template.cached_list.prev = NULL;
503 vm_object_template.cached_list.next = NULL;
504 vm_object_template.msr_q.prev = NULL;
505 vm_object_template.msr_q.next = NULL;
506
1c79356b 507 vm_object_template.last_alloc = (vm_object_offset_t) 0;
2d21ac55
A
508 vm_object_template.sequential = (vm_object_offset_t) 0;
509 vm_object_template.pages_created = 0;
510 vm_object_template.pages_used = 0;
511
1c79356b
A
512#if MACH_PAGEMAP
513 vm_object_template.existence_map = VM_EXTERNAL_NULL;
514#endif /* MACH_PAGEMAP */
2d21ac55 515 vm_object_template.cow_hint = ~(vm_offset_t)0;
1c79356b
A
516#if MACH_ASSERT
517 vm_object_template.paging_object = VM_OBJECT_NULL;
518#endif /* MACH_ASSERT */
519
2d21ac55
A
520 /* cache bitfields */
521 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
522 vm_object_template.code_signed = FALSE;
593a1d5f 523 vm_object_template.mapping_in_progress = FALSE;
2d21ac55
A
524 vm_object_template.not_in_use = 0;
525#ifdef UPL_DEBUG
526 vm_object_template.uplq.prev = NULL;
527 vm_object_template.uplq.next = NULL;
528#endif /* UPL_DEBUG */
529#ifdef VM_PIP_DEBUG
530 bzero(&vm_object_template.pip_holders,
531 sizeof (vm_object_template.pip_holders));
532#endif /* VM_PIP_DEBUG */
533
534 vm_object_template.objq.next=NULL;
535 vm_object_template.objq.prev=NULL;
536
537
1c79356b
A
538 /*
539 * Initialize the "kernel object"
540 */
541
542 kernel_object = &kernel_object_store;
543
544/*
545 * Note that in the following size specifications, we need to add 1 because
55e303ae 546 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
1c79356b 547 */
55e303ae
A
548
549#ifdef ppc
550 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
551 kernel_object);
552#else
1c79356b
A
553 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
554 kernel_object);
55e303ae
A
555#endif
556 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1c79356b
A
557
558 /*
559 * Initialize the "submap object". Make it as large as the
560 * kernel object so that no limit is imposed on submap sizes.
561 */
562
563 vm_submap_object = &vm_submap_object_store;
55e303ae
A
564#ifdef ppc
565 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
566 vm_submap_object);
567#else
1c79356b
A
568 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
569 vm_submap_object);
55e303ae
A
570#endif
571 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
572
1c79356b
A
573 /*
574 * Create an "extra" reference to this object so that we never
575 * try to deallocate it; zfree doesn't like to be called with
576 * non-zone memory.
577 */
578 vm_object_reference(vm_submap_object);
579
580#if MACH_PAGEMAP
581 vm_external_module_initialize();
582#endif /* MACH_PAGEMAP */
583}
584
8f6c56a5
A
585void
586vm_object_reaper_init(void)
587{
588 kern_return_t kr;
589 thread_t thread;
590
8f6c56a5
A
591 kr = kernel_thread_start_priority(
592 (thread_continue_t) vm_object_reaper_thread,
593 NULL,
594 BASEPRI_PREEMPT - 1,
595 &thread);
596 if (kr != KERN_SUCCESS) {
2d21ac55 597 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
8f6c56a5
A
598 }
599 thread_deallocate(thread);
600}
601
0b4e3aa0 602__private_extern__ void
1c79356b
A
603vm_object_init(void)
604{
605 /*
606 * Finish initializing the kernel object.
607 */
608}
609
2d21ac55
A
610
611__private_extern__ void
612vm_object_init_lck_grp(void)
613{
614 /*
615 * initialze the vm_object lock world
616 */
617 lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
618 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
619 lck_attr_setdefault(&vm_object_lck_attr);
620 lck_attr_setdefault(&kernel_object_lck_attr);
621 lck_attr_cleardebug(&kernel_object_lck_attr);
622}
623
1c79356b
A
624
625#define MIGHT_NOT_CACHE_SHADOWS 1
626#if MIGHT_NOT_CACHE_SHADOWS
0b4e3aa0 627static int cache_shadows = TRUE;
1c79356b
A
628#endif /* MIGHT_NOT_CACHE_SHADOWS */
629
630/*
631 * vm_object_deallocate:
632 *
633 * Release a reference to the specified object,
634 * gained either through a vm_object_allocate
635 * or a vm_object_reference call. When all references
636 * are gone, storage associated with this object
637 * may be relinquished.
638 *
639 * No object may be locked.
640 */
2d21ac55
A
641unsigned long vm_object_deallocate_shared_successes = 0;
642unsigned long vm_object_deallocate_shared_failures = 0;
643unsigned long vm_object_deallocate_shared_swap_failures = 0;
0b4e3aa0 644__private_extern__ void
1c79356b
A
645vm_object_deallocate(
646 register vm_object_t object)
647{
2d21ac55
A
648 boolean_t retry_cache_trim = FALSE;
649 vm_object_t shadow = VM_OBJECT_NULL;
650 uint32_t try_failed_count = 0;
1c79356b
A
651
652// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
653// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
654
2d21ac55
A
655 if (object == VM_OBJECT_NULL)
656 return;
657
658 if (object == kernel_object) {
659 vm_object_lock(kernel_object);
660 kernel_object->ref_count--;
661 if (kernel_object->ref_count == 0) {
662 panic("vm_object_deallocate: losing kernel_object\n");
663 }
664 vm_object_unlock(kernel_object);
665 return;
666 }
667
668 if (object->ref_count > 2 ||
669 (!object->named && object->ref_count > 1)) {
670 UInt32 original_ref_count;
671 volatile UInt32 *ref_count_p;
672 Boolean atomic_swap;
673
674 /*
675 * The object currently looks like it is not being
676 * kept alive solely by the reference we're about to release.
677 * Let's try and release our reference without taking
678 * all the locks we would need if we had to terminate the
679 * object (cache lock + exclusive object lock).
680 * Lock the object "shared" to make sure we don't race with
681 * anyone holding it "exclusive".
682 */
683 vm_object_lock_shared(object);
684 ref_count_p = (volatile UInt32 *) &object->ref_count;
685 original_ref_count = object->ref_count;
686 /*
687 * Test again as "ref_count" could have changed.
688 * "named" shouldn't change.
689 */
690 if (original_ref_count > 2 ||
691 (!object->named && original_ref_count > 1)) {
692 atomic_swap = OSCompareAndSwap(
693 original_ref_count,
694 original_ref_count - 1,
695 (UInt32 *) &object->ref_count);
696 if (atomic_swap == FALSE) {
697 vm_object_deallocate_shared_swap_failures++;
698 }
699
700 } else {
701 atomic_swap = FALSE;
702 }
703 vm_object_unlock(object);
704
705 if (atomic_swap) {
706 /* ref_count was updated atomically ! */
707 vm_object_deallocate_shared_successes++;
708 return;
709 }
710
711 /*
712 * Someone else updated the ref_count at the same
713 * time and we lost the race. Fall back to the usual
714 * slow but safe path...
715 */
716 vm_object_deallocate_shared_failures++;
717 }
1c79356b
A
718
719 while (object != VM_OBJECT_NULL) {
720
721 /*
722 * The cache holds a reference (uncounted) to
723 * the object; we must lock it before removing
724 * the object.
725 */
55e303ae
A
726 for (;;) {
727 vm_object_cache_lock();
1c79356b 728
55e303ae
A
729 /*
730 * if we try to take a regular lock here
731 * we risk deadlocking against someone
732 * holding a lock on this object while
733 * trying to vm_object_deallocate a different
734 * object
735 */
736 if (vm_object_lock_try(object))
737 break;
738 vm_object_cache_unlock();
2d21ac55
A
739 try_failed_count++;
740
741 mutex_pause(try_failed_count); /* wait a bit */
55e303ae 742 }
0b4e3aa0
A
743 assert(object->ref_count > 0);
744
745 /*
746 * If the object has a named reference, and only
747 * that reference would remain, inform the pager
748 * about the last "mapping" reference going away.
749 */
750 if ((object->ref_count == 2) && (object->named)) {
751 memory_object_t pager = object->pager;
752
753 /* Notify the Pager that there are no */
754 /* more mappers for this object */
755
756 if (pager != MEMORY_OBJECT_NULL) {
593a1d5f
A
757 vm_object_mapping_wait(object, THREAD_UNINT);
758 vm_object_mapping_begin(object);
0b4e3aa0
A
759 vm_object_unlock(object);
760 vm_object_cache_unlock();
761
593a1d5f 762 memory_object_last_unmap(pager);
0b4e3aa0 763
2d21ac55 764 try_failed_count = 0;
55e303ae
A
765 for (;;) {
766 vm_object_cache_lock();
767
768 /*
769 * if we try to take a regular lock here
770 * we risk deadlocking against someone
771 * holding a lock on this object while
772 * trying to vm_object_deallocate a different
773 * object
774 */
775 if (vm_object_lock_try(object))
776 break;
777 vm_object_cache_unlock();
2d21ac55
A
778 try_failed_count++;
779
780 mutex_pause(try_failed_count); /* wait a bit */
55e303ae 781 }
0b4e3aa0 782 assert(object->ref_count > 0);
593a1d5f
A
783
784 vm_object_mapping_end(object);
0b4e3aa0
A
785 }
786 }
1c79356b
A
787
788 /*
789 * Lose the reference. If other references
790 * remain, then we are done, unless we need
791 * to retry a cache trim.
792 * If it is the last reference, then keep it
793 * until any pending initialization is completed.
794 */
795
0b4e3aa0
A
796 /* if the object is terminating, it cannot go into */
797 /* the cache and we obviously should not call */
798 /* terminate again. */
799
800 if ((object->ref_count > 1) || object->terminating) {
2d21ac55 801 vm_object_lock_assert_exclusive(object);
1c79356b 802 object->ref_count--;
1c79356b 803 vm_object_res_deallocate(object);
1c79356b 804 vm_object_cache_unlock();
91447636
A
805
806 if (object->ref_count == 1 &&
807 object->shadow != VM_OBJECT_NULL) {
808 /*
0c530ab8
A
809 * There's only one reference left on this
810 * VM object. We can't tell if it's a valid
811 * one (from a mapping for example) or if this
812 * object is just part of a possibly stale and
813 * useless shadow chain.
814 * We would like to try and collapse it into
815 * its parent, but we don't have any pointers
816 * back to this parent object.
91447636
A
817 * But we can try and collapse this object with
818 * its own shadows, in case these are useless
819 * too...
0c530ab8
A
820 * We can't bypass this object though, since we
821 * don't know if this last reference on it is
822 * meaningful or not.
91447636 823 */
0c530ab8 824 vm_object_collapse(object, 0, FALSE);
91447636
A
825 }
826
827 vm_object_unlock(object);
1c79356b
A
828 if (retry_cache_trim &&
829 ((object = vm_object_cache_trim(TRUE)) !=
830 VM_OBJECT_NULL)) {
831 continue;
832 }
833 return;
834 }
835
836 /*
837 * We have to wait for initialization
838 * before destroying or caching the object.
839 */
840
841 if (object->pager_created && ! object->pager_initialized) {
842 assert(! object->can_persist);
843 vm_object_assert_wait(object,
844 VM_OBJECT_EVENT_INITIALIZED,
845 THREAD_UNINT);
846 vm_object_unlock(object);
847 vm_object_cache_unlock();
9bccf70c 848 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
849 continue;
850 }
851
852 /*
853 * If this object can persist, then enter it in
854 * the cache. Otherwise, terminate it.
855 *
856 * NOTE: Only permanent objects are cached, and
857 * permanent objects cannot have shadows. This
858 * affects the residence counting logic in a minor
859 * way (can do it in-line, mostly).
860 */
861
0b4e3aa0 862 if ((object->can_persist) && (object->alive)) {
1c79356b
A
863 /*
864 * Now it is safe to decrement reference count,
865 * and to return if reference count is > 0.
866 */
2d21ac55 867 vm_object_lock_assert_exclusive(object);
1c79356b
A
868 if (--object->ref_count > 0) {
869 vm_object_res_deallocate(object);
870 vm_object_unlock(object);
871 vm_object_cache_unlock();
872 if (retry_cache_trim &&
873 ((object = vm_object_cache_trim(TRUE)) !=
874 VM_OBJECT_NULL)) {
875 continue;
876 }
877 return;
878 }
879
880#if MIGHT_NOT_CACHE_SHADOWS
881 /*
882 * Remove shadow now if we don't
883 * want to cache shadows.
884 */
885 if (! cache_shadows) {
886 shadow = object->shadow;
887 object->shadow = VM_OBJECT_NULL;
888 }
889#endif /* MIGHT_NOT_CACHE_SHADOWS */
890
891 /*
892 * Enter the object onto the queue of
893 * cached objects, and deactivate
894 * all of its pages.
895 */
896 assert(object->shadow == VM_OBJECT_NULL);
897 VM_OBJ_RES_DECR(object);
898 XPR(XPR_VM_OBJECT,
899 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
900 (integer_t)object,
901 (integer_t)vm_object_cached_list.next,
902 (integer_t)vm_object_cached_list.prev,0,0);
903
904 vm_object_cached_count++;
905 if (vm_object_cached_count > vm_object_cached_high)
906 vm_object_cached_high = vm_object_cached_count;
907 queue_enter(&vm_object_cached_list, object,
908 vm_object_t, cached_list);
909 vm_object_cache_unlock();
0b4e3aa0 910 vm_object_deactivate_all_pages(object);
1c79356b
A
911 vm_object_unlock(object);
912
913#if MIGHT_NOT_CACHE_SHADOWS
914 /*
915 * If we have a shadow that we need
916 * to deallocate, do so now, remembering
917 * to trim the cache later.
918 */
919 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
920 object = shadow;
921 retry_cache_trim = TRUE;
922 continue;
923 }
924#endif /* MIGHT_NOT_CACHE_SHADOWS */
925
926 /*
927 * Trim the cache. If the cache trim
928 * returns with a shadow for us to deallocate,
929 * then remember to retry the cache trim
930 * when we are done deallocating the shadow.
931 * Otherwise, we are done.
932 */
933
934 object = vm_object_cache_trim(TRUE);
935 if (object == VM_OBJECT_NULL) {
936 return;
937 }
938 retry_cache_trim = TRUE;
939
940 } else {
941 /*
942 * This object is not cachable; terminate it.
943 */
944 XPR(XPR_VM_OBJECT,
91447636
A
945 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
946 (integer_t)object, object->resident_page_count,
947 object->paging_in_progress,
948 (void *)current_thread(),object->ref_count);
1c79356b
A
949
950 VM_OBJ_RES_DECR(object); /* XXX ? */
951 /*
952 * Terminate this object. If it had a shadow,
953 * then deallocate it; otherwise, if we need
954 * to retry a cache trim, do so now; otherwise,
955 * we are done. "pageout" objects have a shadow,
956 * but maintain a "paging reference" rather than
957 * a normal reference.
958 */
959 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
960 if(vm_object_terminate(object) != KERN_SUCCESS) {
961 return;
962 }
963 if (shadow != VM_OBJECT_NULL) {
964 object = shadow;
965 continue;
966 }
967 if (retry_cache_trim &&
968 ((object = vm_object_cache_trim(TRUE)) !=
969 VM_OBJECT_NULL)) {
970 continue;
971 }
972 return;
973 }
974 }
975 assert(! retry_cache_trim);
976}
977
978/*
979 * Check to see whether we really need to trim
980 * down the cache. If so, remove an object from
981 * the cache, terminate it, and repeat.
982 *
983 * Called with, and returns with, cache lock unlocked.
984 */
985vm_object_t
986vm_object_cache_trim(
987 boolean_t called_from_vm_object_deallocate)
988{
989 register vm_object_t object = VM_OBJECT_NULL;
990 vm_object_t shadow;
991
992 for (;;) {
993
994 /*
995 * If we no longer need to trim the cache,
996 * then we are done.
997 */
998
999 vm_object_cache_lock();
1000 if (vm_object_cached_count <= vm_object_cached_max) {
1001 vm_object_cache_unlock();
1002 return VM_OBJECT_NULL;
1003 }
1004
1005 /*
1006 * We must trim down the cache, so remove
1007 * the first object in the cache.
1008 */
1009 XPR(XPR_VM_OBJECT,
1010 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
1011 (integer_t)vm_object_cached_list.next,
1012 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
1013
1014 object = (vm_object_t) queue_first(&vm_object_cached_list);
9bccf70c
A
1015 if(object == (vm_object_t) &vm_object_cached_list) {
1016 /* something's wrong with the calling parameter or */
1017 /* the value of vm_object_cached_count, just fix */
1018 /* and return */
1019 if(vm_object_cached_max < 0)
1020 vm_object_cached_max = 0;
1021 vm_object_cached_count = 0;
1022 vm_object_cache_unlock();
1023 return VM_OBJECT_NULL;
1024 }
1c79356b
A
1025 vm_object_lock(object);
1026 queue_remove(&vm_object_cached_list, object, vm_object_t,
1027 cached_list);
1028 vm_object_cached_count--;
1029
1030 /*
1031 * Since this object is in the cache, we know
1032 * that it is initialized and has no references.
1033 * Take a reference to avoid recursive deallocations.
1034 */
1035
1036 assert(object->pager_initialized);
1037 assert(object->ref_count == 0);
2d21ac55 1038 vm_object_lock_assert_exclusive(object);
1c79356b
A
1039 object->ref_count++;
1040
1041 /*
1042 * Terminate the object.
1043 * If the object had a shadow, we let vm_object_deallocate
1044 * deallocate it. "pageout" objects have a shadow, but
1045 * maintain a "paging reference" rather than a normal
1046 * reference.
1047 * (We are careful here to limit recursion.)
1048 */
1049 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
1050 if(vm_object_terminate(object) != KERN_SUCCESS)
1051 continue;
1052 if (shadow != VM_OBJECT_NULL) {
1053 if (called_from_vm_object_deallocate) {
1054 return shadow;
1055 } else {
1056 vm_object_deallocate(shadow);
1057 }
1058 }
1059 }
1060}
1061
2d21ac55
A
1062#define VM_OBJ_TERM_STATS DEBUG
1063#if VM_OBJ_TERM_STATS
1064uint32_t vm_object_terminate_pages_freed = 0;
1065uint32_t vm_object_terminate_pages_removed = 0;
1066uint32_t vm_object_terminate_batches = 0;
1067uint32_t vm_object_terminate_biggest_batch = 0;
1068#endif /* VM_OBJ_TERM_STATS */
1069
1070#define V_O_T_MAX_BATCH 256
1c79356b
A
1071
1072/*
1073 * Routine: vm_object_terminate
1074 * Purpose:
1075 * Free all resources associated with a vm_object.
1076 * In/out conditions:
0b4e3aa0 1077 * Upon entry, the object must be locked,
1c79356b
A
1078 * and the object must have exactly one reference.
1079 *
1080 * The shadow object reference is left alone.
1081 *
1082 * The object must be unlocked if its found that pages
1083 * must be flushed to a backing object. If someone
1084 * manages to map the object while it is being flushed
1085 * the object is returned unlocked and unchanged. Otherwise,
1086 * upon exit, the cache will be unlocked, and the
1087 * object will cease to exist.
1088 */
0b4e3aa0 1089static kern_return_t
1c79356b
A
1090vm_object_terminate(
1091 register vm_object_t object)
1092{
1093 register vm_page_t p;
1094 vm_object_t shadow_object;
2d21ac55
A
1095 vm_page_t local_free_q;
1096 int loop_count;
1097#if VM_OBJ_TERM_STATS
1098 uint32_t local_free_count;
1099 uint32_t pages_removed;
1100#endif /* VM_OBJ_TERM_STATS */
1101
1102#if VM_OBJ_TERM_STATS
1103#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) \
1104 MACRO_BEGIN \
1105 if (_pages_removed) { \
1106 hw_atomic_add(&vm_object_terminate_batches, 1); \
1107 hw_atomic_add(&vm_object_terminate_pages_removed, \
1108 _pages_removed); \
1109 hw_atomic_add(&vm_object_terminate_pages_freed, \
1110 _local_free_count); \
1111 if (_local_free_count > \
1112 vm_object_terminate_biggest_batch) { \
1113 vm_object_terminate_biggest_batch = \
1114 _local_free_count; \
1115 } \
1116 _local_free_count = 0; \
1117 } \
1118 MACRO_END
1119#else /* VM_OBJ_TERM_STATS */
1120#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count)
1121#endif /* VM_OBJ_TERM_STATS */
1122
1123#define VM_OBJ_TERM_FREELIST(_pages_removed, _local_free_count, _local_free_q) \
1124 MACRO_BEGIN \
1125 VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count); \
1126 if (_local_free_q) { \
1127 vm_page_free_list(_local_free_q); \
1128 _local_free_q = VM_PAGE_NULL; \
1129 } \
1130 MACRO_END
1131
1132
1c79356b
A
1133
1134 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
1135 (integer_t)object, object->ref_count, 0, 0, 0);
1136
2d21ac55
A
1137 local_free_q = VM_PAGE_NULL;
1138#if VM_OBJ_TERM_STATS
1139 local_free_count = 0;
1140 pages_removed = 0;
1141#endif /* VM_OBJ_TERM_STATS */
1142
1c79356b
A
1143 if (!object->pageout && (!object->temporary || object->can_persist)
1144 && (object->pager != NULL || object->shadow_severed)) {
0b4e3aa0 1145 vm_object_cache_unlock();
2d21ac55
A
1146 loop_count = V_O_T_MAX_BATCH;
1147 vm_page_lock_queues();
1c79356b 1148 while (!queue_empty(&object->memq)) {
2d21ac55
A
1149 if (--loop_count == 0) {
1150 /*
1151 * Free the pages we've reclaimed so far and
1152 * take a little break to avoid hogging
1153 * the page queues lock too long.
1154 */
1155 VM_OBJ_TERM_FREELIST(pages_removed,
1156 local_free_count,
1157 local_free_q);
1158 mutex_yield(&vm_page_queue_lock);
1159 loop_count = V_O_T_MAX_BATCH;
1160 }
1c79356b
A
1161 /*
1162 * Clear pager_trusted bit so that the pages get yanked
1163 * out of the object instead of cleaned in place. This
1164 * prevents a deadlock in XMM and makes more sense anyway.
1165 */
1166 object->pager_trusted = FALSE;
1167
1168 p = (vm_page_t) queue_first(&object->memq);
1169
1170 VM_PAGE_CHECK(p);
1171
1172 if (p->busy || p->cleaning) {
1173 if(p->cleaning || p->absent) {
2d21ac55
A
1174 /* free the pages reclaimed so far */
1175 VM_OBJ_TERM_FREELIST(pages_removed,
1176 local_free_count,
1177 local_free_q);
1178 vm_page_unlock_queues();
1c79356b 1179 vm_object_paging_wait(object, THREAD_UNINT);
2d21ac55 1180 vm_page_lock_queues();
1c79356b
A
1181 continue;
1182 } else {
2d21ac55 1183 panic("vm_object_terminate.3 %p %p", object, p);
1c79356b
A
1184 }
1185 }
1186
55e303ae 1187 p->busy = TRUE;
1c79356b 1188 VM_PAGE_QUEUES_REMOVE(p);
2d21ac55
A
1189#if VM_OBJ_TERM_STATS
1190 pages_removed++;
1191#endif /* VM_OBJ_TERM_STATS */
1c79356b
A
1192
1193 if (p->absent || p->private) {
1194
1195 /*
1196 * For private pages, VM_PAGE_FREE just
1197 * leaves the page structure around for
1198 * its owner to clean up. For absent
1199 * pages, the structure is returned to
1200 * the appropriate pool.
1201 */
1202
1203 goto free_page;
1204 }
1205
2d21ac55
A
1206 if (p->fictitious) {
1207 if (p->phys_page == vm_page_guard_addr) {
1208 goto free_page;
1209 }
1210 panic("vm_object_terminate.4 %p %p", object, p);
1211 }
1c79356b 1212
4a3eedf9 1213 if (!p->dirty && p->wpmapped)
55e303ae 1214 p->dirty = pmap_is_modified(p->phys_page);
1c79356b 1215
0b4e3aa0 1216 if ((p->dirty || p->precious) && !p->error && object->alive) {
2d21ac55
A
1217 /* free the pages reclaimed so far */
1218 VM_OBJ_TERM_FREELIST(pages_removed,
1219 local_free_count,
1220 local_free_q);
1221 vm_page_unlock_queues();
1c79356b 1222 vm_pageout_cluster(p); /* flush page */
1c79356b
A
1223 vm_object_paging_wait(object, THREAD_UNINT);
1224 XPR(XPR_VM_OBJECT,
1225 "vm_object_terminate restart, object 0x%X ref %d\n",
1226 (integer_t)object, object->ref_count, 0, 0, 0);
2d21ac55 1227 vm_page_lock_queues();
1c79356b
A
1228 } else {
1229 free_page:
2d21ac55
A
1230 /*
1231 * Add this page to our list of reclaimed pages,
1232 * to be freed later.
1233 */
1234 vm_page_free_prepare(p);
1235 p->pageq.next = (queue_entry_t) local_free_q;
1236 local_free_q = p;
1237#if VM_OBJ_TERM_STATS
1238 local_free_count++;
1239#endif /* VM_OBJ_TERM_STATS */
1c79356b
A
1240 }
1241 }
2d21ac55
A
1242
1243 /*
1244 * Free the remaining reclaimed pages.
1245 */
1246 VM_OBJ_TERM_FREELIST(pages_removed,
1247 local_free_count,
1248 local_free_q);
1249 vm_page_unlock_queues();
0b4e3aa0
A
1250 vm_object_unlock(object);
1251 vm_object_cache_lock();
1252 vm_object_lock(object);
1c79356b 1253 }
0b4e3aa0
A
1254
1255 /*
1256 * Make sure the object isn't already being terminated
1257 */
1258 if(object->terminating) {
2d21ac55
A
1259 vm_object_lock_assert_exclusive(object);
1260 object->ref_count--;
0b4e3aa0
A
1261 assert(object->ref_count > 0);
1262 vm_object_cache_unlock();
1263 vm_object_unlock(object);
1264 return KERN_FAILURE;
1265 }
1266
1267 /*
1268 * Did somebody get a reference to the object while we were
1269 * cleaning it?
1270 */
1c79356b 1271 if(object->ref_count != 1) {
2d21ac55
A
1272 vm_object_lock_assert_exclusive(object);
1273 object->ref_count--;
0b4e3aa0 1274 assert(object->ref_count > 0);
1c79356b 1275 vm_object_res_deallocate(object);
0b4e3aa0 1276 vm_object_cache_unlock();
1c79356b
A
1277 vm_object_unlock(object);
1278 return KERN_FAILURE;
1279 }
1280
1c79356b
A
1281 /*
1282 * Make sure no one can look us up now.
1283 */
1284
0b4e3aa0
A
1285 object->terminating = TRUE;
1286 object->alive = FALSE;
1287 vm_object_remove(object);
1c79356b
A
1288
1289 /*
1290 * Detach the object from its shadow if we are the shadow's
55e303ae
A
1291 * copy. The reference we hold on the shadow must be dropped
1292 * by our caller.
1c79356b
A
1293 */
1294 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1295 !(object->pageout)) {
1296 vm_object_lock(shadow_object);
55e303ae
A
1297 if (shadow_object->copy == object)
1298 shadow_object->copy = VM_OBJECT_NULL;
1c79356b
A
1299 vm_object_unlock(shadow_object);
1300 }
1301
6601e61a 1302 if (object->paging_in_progress != 0) {
8f6c56a5
A
1303 /*
1304 * There are still some paging_in_progress references
1305 * on this object, meaning that there are some paging
1306 * or other I/O operations in progress for this VM object.
1307 * Such operations take some paging_in_progress references
1308 * up front to ensure that the object doesn't go away, but
1309 * they may also need to acquire a reference on the VM object,
1310 * to map it in kernel space, for example. That means that
1311 * they may end up releasing the last reference on the VM
1312 * object, triggering its termination, while still holding
1313 * paging_in_progress references. Waiting for these
1314 * pending paging_in_progress references to go away here would
1315 * deadlock.
1316 *
1317 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1318 * complete the VM object termination if it still holds
1319 * paging_in_progress references at this point.
1320 *
1321 * No new paging_in_progress should appear now that the
1322 * VM object is "terminating" and not "alive".
1323 */
1324 vm_object_reap_async(object);
1325 vm_object_cache_unlock();
1326 vm_object_unlock(object);
6601e61a
A
1327 /*
1328 * Return KERN_FAILURE to let the caller know that we
1329 * haven't completed the termination and it can't drop this
1330 * object's reference on its shadow object yet.
1331 * The reaper thread will take care of that once it has
1332 * completed this object's termination.
1333 */
1334 return KERN_FAILURE;
8f6c56a5 1335 }
4452a7af 1336
8f6c56a5
A
1337 /* complete the VM object termination */
1338 vm_object_reap(object);
1339 object = VM_OBJECT_NULL;
1340 /* cache lock and object lock were released by vm_object_reap() */
1341
2d21ac55
A
1342 /*
1343 * KERN_SUCCESS means that this object has been terminated
1344 * and no longer needs its shadow object but still holds a
1345 * reference on it.
1346 * The caller is responsible for dropping that reference.
1347 * We can't call vm_object_deallocate() here because that
1348 * would create a recursion.
1349 */
8f6c56a5
A
1350 return KERN_SUCCESS;
1351}
1352
1353/*
1354 * vm_object_reap():
1355 *
1356 * Complete the termination of a VM object after it's been marked
1357 * as "terminating" and "!alive" by vm_object_terminate().
1358 *
1359 * The VM object cache and the VM object must be locked by caller.
1360 * The locks will be released on return and the VM object is no longer valid.
1361 */
1362void
1363vm_object_reap(
1364 vm_object_t object)
1365{
1366 memory_object_t pager;
1367 vm_page_t p;
2d21ac55
A
1368 vm_page_t local_free_q;
1369 int loop_count;
1370#if VM_OBJ_TERM_STATS
1371 uint32_t local_free_count;
1372#endif /* VM_OBJ_TERM_STATS */
8f6c56a5
A
1373
1374#if DEBUG
1375 mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
8f6c56a5 1376#endif /* DEBUG */
2d21ac55
A
1377 vm_object_lock_assert_exclusive(object);
1378 assert(object->paging_in_progress == 0);
8f6c56a5
A
1379
1380 vm_object_reap_count++;
1381
2d21ac55
A
1382 local_free_q = VM_PAGE_NULL;
1383#if VM_OBJ_TERM_STATS
1384 local_free_count = 0;
1385#endif /* VM_OBJ_TERM_STATS */
0b4e3aa0
A
1386
1387 pager = object->pager;
1388 object->pager = MEMORY_OBJECT_NULL;
1389
1390 if (pager != MEMORY_OBJECT_NULL)
91447636 1391 memory_object_control_disable(object->pager_control);
0b4e3aa0
A
1392 vm_object_cache_unlock();
1393
2d21ac55 1394 vm_object_lock_assert_exclusive(object);
1c79356b
A
1395 object->ref_count--;
1396#if TASK_SWAPPER
1397 assert(object->res_count == 0);
1398#endif /* TASK_SWAPPER */
1399
1c79356b
A
1400 assert (object->ref_count == 0);
1401
2d21ac55
A
1402 /* remove from purgeable queue if it's on */
1403 if (object->objq.next || object->objq.prev) {
1404 purgeable_q_t queue = vm_purgeable_object_remove(object);
1405 assert(queue);
1406
1407 /* Must take page lock for this - using it to protect token queue */
1408 vm_page_lock_queues();
1409 vm_purgeable_token_delete_first(queue);
1410
1411 assert(queue->debug_count_objects>=0);
1412 vm_page_unlock_queues();
1413 }
1414
1c79356b
A
1415 /*
1416 * Clean or free the pages, as appropriate.
1417 * It is possible for us to find busy/absent pages,
1418 * if some faults on this object were aborted.
1419 */
1420 if (object->pageout) {
8f6c56a5 1421 assert(object->shadow != VM_OBJECT_NULL);
1c79356b
A
1422
1423 vm_pageout_object_terminate(object);
1424
0b4e3aa0
A
1425 } else if ((object->temporary && !object->can_persist) ||
1426 (pager == MEMORY_OBJECT_NULL)) {
2d21ac55
A
1427 loop_count = V_O_T_MAX_BATCH;
1428 vm_page_lock_queues();
1c79356b 1429 while (!queue_empty(&object->memq)) {
2d21ac55
A
1430 if (--loop_count == 0) {
1431 /*
1432 * Free the pages we reclaimed so far
1433 * and take a little break to avoid
1434 * hogging the page queue lock too long
1435 */
1436 VM_OBJ_TERM_FREELIST(local_free_count,
1437 local_free_count,
1438 local_free_q);
1439 mutex_yield(&vm_page_queue_lock);
1440 loop_count = V_O_T_MAX_BATCH;
1441 }
1c79356b
A
1442 p = (vm_page_t) queue_first(&object->memq);
1443
2d21ac55
A
1444 vm_page_free_prepare(p);
1445
1446 assert(p->pageq.next == NULL && p->pageq.prev == NULL);
1447 p->pageq.next = (queue_entry_t) local_free_q;
1448 local_free_q = p;
1449#if VM_OBJ_TERM_STATS
1450 local_free_count++;
1451#endif /* VM_OBJ_TERM_STATS */
1c79356b 1452 }
2d21ac55
A
1453 /*
1454 * Free the remaining reclaimed pages
1455 */
1456 VM_OBJ_TERM_FREELIST(local_free_count,
1457 local_free_count,
1458 local_free_q);
1459 vm_page_unlock_queues();
1c79356b 1460 } else if (!queue_empty(&object->memq)) {
8f6c56a5 1461 panic("vm_object_reap: queue just emptied isn't");
1c79356b
A
1462 }
1463
1464 assert(object->paging_in_progress == 0);
1465 assert(object->ref_count == 0);
1466
1c79356b 1467 /*
0b4e3aa0
A
1468 * If the pager has not already been released by
1469 * vm_object_destroy, we need to terminate it and
1470 * release our reference to it here.
1c79356b 1471 */
0b4e3aa0
A
1472 if (pager != MEMORY_OBJECT_NULL) {
1473 vm_object_unlock(object);
1474 vm_object_release_pager(pager);
1475 vm_object_lock(object);
1c79356b 1476 }
0b4e3aa0 1477
1c79356b 1478 /* kick off anyone waiting on terminating */
0b4e3aa0 1479 object->terminating = FALSE;
1c79356b
A
1480 vm_object_paging_begin(object);
1481 vm_object_paging_end(object);
1482 vm_object_unlock(object);
1483
1484#if MACH_PAGEMAP
1485 vm_external_destroy(object->existence_map, object->size);
1486#endif /* MACH_PAGEMAP */
1487
6601e61a
A
1488 object->shadow = VM_OBJECT_NULL;
1489
2d21ac55 1490 vm_object_lock_destroy(object);
1c79356b
A
1491 /*
1492 * Free the space for the object.
1493 */
91447636 1494 zfree(vm_object_zone, object);
8f6c56a5
A
1495 object = VM_OBJECT_NULL;
1496}
1497
1498void
1499vm_object_reap_async(
1500 vm_object_t object)
1501{
1502#if DEBUG
1503 mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
8f6c56a5 1504#endif /* DEBUG */
2d21ac55 1505 vm_object_lock_assert_exclusive(object);
8f6c56a5
A
1506
1507 vm_object_reap_count_async++;
1508
1509 /* enqueue the VM object... */
1510 queue_enter(&vm_object_reaper_queue, object,
1511 vm_object_t, cached_list);
1512 /* ... and wake up the reaper thread */
1513 thread_wakeup((event_t) &vm_object_reaper_queue);
1514}
1515
1516void
1517vm_object_reaper_thread(void)
1518{
6601e61a 1519 vm_object_t object, shadow_object;
8f6c56a5
A
1520
1521 vm_object_cache_lock();
1522
1523 while (!queue_empty(&vm_object_reaper_queue)) {
1524 queue_remove_first(&vm_object_reaper_queue,
1525 object,
1526 vm_object_t,
1527 cached_list);
1528 vm_object_lock(object);
1529 assert(object->terminating);
1530 assert(!object->alive);
2d21ac55
A
1531
1532 /*
1533 * The pageout daemon might be playing with our pages.
1534 * Now that the object is dead, it won't touch any more
1535 * pages, but some pages might already be on their way out.
1536 * Hence, we wait until the active paging activities have
1537 * ceased before we break the association with the pager
1538 * itself.
1539 */
1540 while (object->paging_in_progress != 0) {
1541 vm_object_cache_unlock();
1542 vm_object_wait(object,
1543 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1544 THREAD_UNINT);
1545 vm_object_cache_lock();
1546 vm_object_lock(object);
1547 }
6601e61a
A
1548
1549 shadow_object =
1550 object->pageout ? VM_OBJECT_NULL : object->shadow;
1551
8f6c56a5
A
1552 vm_object_reap(object);
1553 /* cache is unlocked and object is no longer valid */
1554 object = VM_OBJECT_NULL;
1555
6601e61a
A
1556 if (shadow_object != VM_OBJECT_NULL) {
1557 /*
1558 * Drop the reference "object" was holding on
1559 * its shadow object.
1560 */
1561 vm_object_deallocate(shadow_object);
1562 shadow_object = VM_OBJECT_NULL;
1563 }
1564
8f6c56a5
A
1565 vm_object_cache_lock();
1566 }
1567
1568 /* wait for more work... */
1569 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT);
1570 vm_object_cache_unlock();
1571 thread_block((thread_continue_t) vm_object_reaper_thread);
1572 /*NOTREACHED*/
1c79356b
A
1573}
1574
1575/*
1576 * Routine: vm_object_pager_wakeup
1577 * Purpose: Wake up anyone waiting for termination of a pager.
1578 */
1579
0b4e3aa0 1580static void
1c79356b 1581vm_object_pager_wakeup(
0b4e3aa0 1582 memory_object_t pager)
1c79356b
A
1583{
1584 vm_object_hash_entry_t entry;
1585 boolean_t waiting = FALSE;
1586
1587 /*
1588 * If anyone was waiting for the memory_object_terminate
1589 * to be queued, wake them up now.
1590 */
1591 vm_object_cache_lock();
1592 entry = vm_object_hash_lookup(pager, TRUE);
1593 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1594 waiting = entry->waiting;
1595 vm_object_cache_unlock();
1596 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1597 if (waiting)
1598 thread_wakeup((event_t) pager);
1599 vm_object_hash_entry_free(entry);
1600 }
1601}
1602
1603/*
0b4e3aa0
A
1604 * Routine: vm_object_release_pager
1605 * Purpose: Terminate the pager and, upon completion,
1606 * release our last reference to it.
1607 * just like memory_object_terminate, except
1608 * that we wake up anyone blocked in vm_object_enter
1609 * waiting for termination message to be queued
1610 * before calling memory_object_init.
1c79356b 1611 */
0b4e3aa0
A
1612static void
1613vm_object_release_pager(
1614 memory_object_t pager)
1c79356b 1615{
1c79356b 1616
0b4e3aa0
A
1617 /*
1618 * Terminate the pager.
1619 */
1c79356b 1620
0b4e3aa0 1621 (void) memory_object_terminate(pager);
1c79356b 1622
0b4e3aa0
A
1623 /*
1624 * Wakeup anyone waiting for this terminate
1625 */
1626 vm_object_pager_wakeup(pager);
1c79356b 1627
0b4e3aa0
A
1628 /*
1629 * Release reference to pager.
1630 */
1631 memory_object_deallocate(pager);
1632}
1c79356b 1633
1c79356b 1634/*
0b4e3aa0 1635 * Routine: vm_object_destroy
1c79356b 1636 * Purpose:
0b4e3aa0 1637 * Shut down a VM object, despite the
1c79356b
A
1638 * presence of address map (or other) references
1639 * to the vm_object.
1640 */
1641kern_return_t
0b4e3aa0
A
1642vm_object_destroy(
1643 vm_object_t object,
91447636 1644 __unused kern_return_t reason)
1c79356b 1645{
0b4e3aa0 1646 memory_object_t old_pager;
1c79356b
A
1647
1648 if (object == VM_OBJECT_NULL)
1649 return(KERN_SUCCESS);
1650
1651 /*
0b4e3aa0 1652 * Remove the pager association immediately.
1c79356b
A
1653 *
1654 * This will prevent the memory manager from further
1655 * meddling. [If it wanted to flush data or make
1656 * other changes, it should have done so before performing
1657 * the destroy call.]
1658 */
1659
1660 vm_object_cache_lock();
1661 vm_object_lock(object);
1c79356b
A
1662 object->can_persist = FALSE;
1663 object->named = FALSE;
0b4e3aa0 1664 object->alive = FALSE;
1c79356b
A
1665
1666 /*
0b4e3aa0 1667 * Rip out the pager from the vm_object now...
1c79356b
A
1668 */
1669
0b4e3aa0
A
1670 vm_object_remove(object);
1671 old_pager = object->pager;
1672 object->pager = MEMORY_OBJECT_NULL;
1673 if (old_pager != MEMORY_OBJECT_NULL)
91447636 1674 memory_object_control_disable(object->pager_control);
0b4e3aa0 1675 vm_object_cache_unlock();
1c79356b
A
1676
1677 /*
0b4e3aa0
A
1678 * Wait for the existing paging activity (that got
1679 * through before we nulled out the pager) to subside.
1c79356b
A
1680 */
1681
1682 vm_object_paging_wait(object, THREAD_UNINT);
1683 vm_object_unlock(object);
1684
1685 /*
0b4e3aa0 1686 * Terminate the object now.
1c79356b 1687 */
0b4e3aa0
A
1688 if (old_pager != MEMORY_OBJECT_NULL) {
1689 vm_object_release_pager(old_pager);
1690
1691 /*
1692 * JMM - Release the caller's reference. This assumes the
1693 * caller had a reference to release, which is a big (but
1694 * currently valid) assumption if this is driven from the
1695 * vnode pager (it is holding a named reference when making
1696 * this call)..
1697 */
1698 vm_object_deallocate(object);
1c79356b 1699
1c79356b 1700 }
1c79356b
A
1701 return(KERN_SUCCESS);
1702}
1703
2d21ac55
A
1704#define VM_OBJ_DEACT_ALL_STATS DEBUG
1705#if VM_OBJ_DEACT_ALL_STATS
1706uint32_t vm_object_deactivate_all_pages_batches = 0;
1707uint32_t vm_object_deactivate_all_pages_pages = 0;
1708#endif /* VM_OBJ_DEACT_ALL_STATS */
1c79356b
A
1709/*
1710 * vm_object_deactivate_pages
1711 *
1712 * Deactivate all pages in the specified object. (Keep its pages
1713 * in memory even though it is no longer referenced.)
1714 *
1715 * The object must be locked.
1716 */
0b4e3aa0
A
1717static void
1718vm_object_deactivate_all_pages(
1c79356b
A
1719 register vm_object_t object)
1720{
1721 register vm_page_t p;
2d21ac55
A
1722 int loop_count;
1723#if VM_OBJ_DEACT_ALL_STATS
1724 int pages_count;
1725#endif /* VM_OBJ_DEACT_ALL_STATS */
1726#define V_O_D_A_P_MAX_BATCH 256
1727
1728 loop_count = V_O_D_A_P_MAX_BATCH;
1729#if VM_OBJ_DEACT_ALL_STATS
1730 pages_count = 0;
1731#endif /* VM_OBJ_DEACT_ALL_STATS */
1732 vm_page_lock_queues();
1c79356b 1733 queue_iterate(&object->memq, p, vm_page_t, listq) {
2d21ac55
A
1734 if (--loop_count == 0) {
1735#if VM_OBJ_DEACT_ALL_STATS
1736 hw_atomic_add(&vm_object_deactivate_all_pages_batches,
1737 1);
1738 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1739 pages_count);
1740 pages_count = 0;
1741#endif /* VM_OBJ_DEACT_ALL_STATS */
1742 mutex_yield(&vm_page_queue_lock);
1743 loop_count = V_O_D_A_P_MAX_BATCH;
1744 }
1745 if (!p->busy && !p->throttled) {
1746#if VM_OBJ_DEACT_ALL_STATS
1747 pages_count++;
1748#endif /* VM_OBJ_DEACT_ALL_STATS */
1c79356b 1749 vm_page_deactivate(p);
2d21ac55
A
1750 }
1751 }
1752#if VM_OBJ_DEACT_ALL_STATS
1753 if (pages_count) {
1754 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1);
1755 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1756 pages_count);
1757 pages_count = 0;
1c79356b 1758 }
2d21ac55
A
1759#endif /* VM_OBJ_DEACT_ALL_STATS */
1760 vm_page_unlock_queues();
1c79356b
A
1761}
1762
0b4e3aa0
A
1763__private_extern__ void
1764vm_object_deactivate_pages(
1765 vm_object_t object,
1766 vm_object_offset_t offset,
1767 vm_object_size_t size,
1768 boolean_t kill_page)
1769{
1770 vm_object_t orig_object;
1771 int pages_moved = 0;
1772 int pages_found = 0;
1773
1774 /*
1775 * entered with object lock held, acquire a paging reference to
1776 * prevent the memory_object and control ports from
1777 * being destroyed.
1778 */
1779 orig_object = object;
1780
1781 for (;;) {
1782 register vm_page_t m;
1783 vm_object_offset_t toffset;
1784 vm_object_size_t tsize;
1785
1786 vm_object_paging_begin(object);
1787 vm_page_lock_queues();
1788
1789 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1790
1791 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1792
1793 pages_found++;
1794
1795 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1796
91447636
A
1797 assert(!m->laundry);
1798
0b4e3aa0 1799 m->reference = FALSE;
55e303ae 1800 pmap_clear_reference(m->phys_page);
0b4e3aa0
A
1801
1802 if ((kill_page) && (object->internal)) {
1803 m->precious = FALSE;
1804 m->dirty = FALSE;
55e303ae 1805 pmap_clear_modify(m->phys_page);
2d21ac55 1806#if MACH_PAGEMAP
0b4e3aa0 1807 vm_external_state_clr(object->existence_map, offset);
2d21ac55 1808#endif /* MACH_PAGEMAP */
0b4e3aa0 1809 }
0b4e3aa0 1810
2d21ac55
A
1811 if (!m->throttled) {
1812 VM_PAGE_QUEUES_REMOVE(m);
1813
1814 assert(!m->laundry);
1815 assert(m->object != kernel_object);
1816 assert(m->pageq.next == NULL &&
1817 m->pageq.prev == NULL);
1818
1819 if(m->zero_fill) {
1820 queue_enter_first(
9bccf70c
A
1821 &vm_page_queue_zf,
1822 m, vm_page_t, pageq);
2d21ac55
A
1823 vm_zf_queue_count++;
1824 } else {
1825 queue_enter_first(
1826 &vm_page_queue_inactive,
1827 m, vm_page_t, pageq);
1828 }
0b4e3aa0 1829
2d21ac55
A
1830 m->inactive = TRUE;
1831 if (!m->fictitious) {
1832 vm_page_inactive_count++;
1833 token_new_pagecount++;
1834 } else {
1835 assert(m->phys_page == vm_page_fictitious_addr);
1836 }
0b4e3aa0 1837
2d21ac55
A
1838 pages_moved++;
1839 }
0b4e3aa0
A
1840 }
1841 }
1842 }
1843 vm_page_unlock_queues();
1844 vm_object_paging_end(object);
1845
1846 if (object->shadow) {
1847 vm_object_t tmp_object;
1848
1849 kill_page = 0;
1850
1851 offset += object->shadow_offset;
1852
1853 tmp_object = object->shadow;
1854 vm_object_lock(tmp_object);
1855
1856 if (object != orig_object)
1857 vm_object_unlock(object);
1858 object = tmp_object;
1859 } else
1860 break;
1861 }
1862 if (object != orig_object)
1863 vm_object_unlock(object);
1864}
1c79356b
A
1865
1866/*
1867 * Routine: vm_object_pmap_protect
1868 *
1869 * Purpose:
1870 * Reduces the permission for all physical
1871 * pages in the specified object range.
1872 *
1873 * If removing write permission only, it is
1874 * sufficient to protect only the pages in
1875 * the top-level object; only those pages may
1876 * have write permission.
1877 *
1878 * If removing all access, we must follow the
1879 * shadow chain from the top-level object to
1880 * remove access to all pages in shadowed objects.
1881 *
1882 * The object must *not* be locked. The object must
1883 * be temporary/internal.
1884 *
1885 * If pmap is not NULL, this routine assumes that
1886 * the only mappings for the pages are in that
1887 * pmap.
1888 */
1889
0b4e3aa0 1890__private_extern__ void
1c79356b
A
1891vm_object_pmap_protect(
1892 register vm_object_t object,
1893 register vm_object_offset_t offset,
91447636 1894 vm_object_size_t size,
1c79356b 1895 pmap_t pmap,
91447636 1896 vm_map_offset_t pmap_start,
1c79356b
A
1897 vm_prot_t prot)
1898{
1899 if (object == VM_OBJECT_NULL)
1900 return;
91447636
A
1901 size = vm_object_round_page(size);
1902 offset = vm_object_trunc_page(offset);
1c79356b
A
1903
1904 vm_object_lock(object);
1905
2d21ac55
A
1906 if (object->phys_contiguous) {
1907 if (pmap != NULL) {
1908 vm_object_unlock(object);
1909 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1910 } else {
1911 vm_object_offset_t phys_start, phys_end, phys_addr;
1912
1913 phys_start = object->shadow_offset + offset;
1914 phys_end = phys_start + size;
1915 assert(phys_start <= phys_end);
1916 assert(phys_end <= object->shadow_offset + object->size);
1917 vm_object_unlock(object);
1918
1919 for (phys_addr = phys_start;
1920 phys_addr < phys_end;
1921 phys_addr += PAGE_SIZE_64) {
935ed37a 1922 pmap_page_protect(phys_addr >> PAGE_SHIFT, prot);
2d21ac55
A
1923 }
1924 }
1925 return;
1926 }
1927
55e303ae 1928 assert(object->internal);
de355530 1929
1c79356b 1930 while (TRUE) {
91447636 1931 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1c79356b
A
1932 vm_object_unlock(object);
1933 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1934 return;
1935 }
1936
9bccf70c
A
1937 /* if we are doing large ranges with respect to resident */
1938 /* page count then we should interate over pages otherwise */
1939 /* inverse page look-up will be faster */
91447636 1940 if (ptoa_64(object->resident_page_count / 4) < size) {
9bccf70c
A
1941 vm_page_t p;
1942 vm_object_offset_t end;
1c79356b
A
1943
1944 end = offset + size;
1945
1946 if (pmap != PMAP_NULL) {
1947 queue_iterate(&object->memq, p, vm_page_t, listq) {
1948 if (!p->fictitious &&
1949 (offset <= p->offset) && (p->offset < end)) {
91447636 1950 vm_map_offset_t start;
1c79356b 1951
91447636
A
1952 start = pmap_start + p->offset - offset;
1953 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1c79356b
A
1954 }
1955 }
1956 } else {
1957 queue_iterate(&object->memq, p, vm_page_t, listq) {
1958 if (!p->fictitious &&
1959 (offset <= p->offset) && (p->offset < end)) {
1960
2d21ac55 1961 pmap_page_protect(p->phys_page, prot);
1c79356b
A
1962 }
1963 }
1964 }
9bccf70c
A
1965 } else {
1966 vm_page_t p;
1967 vm_object_offset_t end;
1968 vm_object_offset_t target_off;
1969
1970 end = offset + size;
1971
1972 if (pmap != PMAP_NULL) {
1973 for(target_off = offset;
91447636
A
1974 target_off < end;
1975 target_off += PAGE_SIZE) {
1976 p = vm_page_lookup(object, target_off);
1977 if (p != VM_PAGE_NULL) {
1978 vm_offset_t start;
1979 start = pmap_start +
9bccf70c
A
1980 (vm_offset_t)(p->offset - offset);
1981 pmap_protect(pmap, start,
1982 start + PAGE_SIZE, prot);
1983 }
1984 }
1985 } else {
1986 for(target_off = offset;
1987 target_off < end; target_off += PAGE_SIZE) {
91447636
A
1988 p = vm_page_lookup(object, target_off);
1989 if (p != VM_PAGE_NULL) {
2d21ac55 1990 pmap_page_protect(p->phys_page, prot);
9bccf70c
A
1991 }
1992 }
1993 }
1994 }
1c79356b
A
1995
1996 if (prot == VM_PROT_NONE) {
1997 /*
1998 * Must follow shadow chain to remove access
1999 * to pages in shadowed objects.
2000 */
2001 register vm_object_t next_object;
2002
2003 next_object = object->shadow;
2004 if (next_object != VM_OBJECT_NULL) {
2005 offset += object->shadow_offset;
2006 vm_object_lock(next_object);
2007 vm_object_unlock(object);
2008 object = next_object;
2009 }
2010 else {
2011 /*
2012 * End of chain - we are done.
2013 */
2014 break;
2015 }
2016 }
2017 else {
2018 /*
2019 * Pages in shadowed objects may never have
2020 * write permission - we may stop here.
2021 */
2022 break;
2023 }
2024 }
2025
2026 vm_object_unlock(object);
2027}
2028
2029/*
2030 * Routine: vm_object_copy_slowly
2031 *
2032 * Description:
2033 * Copy the specified range of the source
2034 * virtual memory object without using
2035 * protection-based optimizations (such
2036 * as copy-on-write). The pages in the
2037 * region are actually copied.
2038 *
2039 * In/out conditions:
2040 * The caller must hold a reference and a lock
2041 * for the source virtual memory object. The source
2042 * object will be returned *unlocked*.
2043 *
2044 * Results:
2045 * If the copy is completed successfully, KERN_SUCCESS is
2046 * returned. If the caller asserted the interruptible
2047 * argument, and an interruption occurred while waiting
2048 * for a user-generated event, MACH_SEND_INTERRUPTED is
2049 * returned. Other values may be returned to indicate
2050 * hard errors during the copy operation.
2051 *
2052 * A new virtual memory object is returned in a
2053 * parameter (_result_object). The contents of this
2054 * new object, starting at a zero offset, are a copy
2055 * of the source memory region. In the event of
2056 * an error, this parameter will contain the value
2057 * VM_OBJECT_NULL.
2058 */
0b4e3aa0 2059__private_extern__ kern_return_t
1c79356b
A
2060vm_object_copy_slowly(
2061 register vm_object_t src_object,
2062 vm_object_offset_t src_offset,
2063 vm_object_size_t size,
2064 boolean_t interruptible,
2065 vm_object_t *_result_object) /* OUT */
2066{
2067 vm_object_t new_object;
2068 vm_object_offset_t new_offset;
2069
2d21ac55 2070 struct vm_object_fault_info fault_info;
1c79356b
A
2071
2072 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
2073 src_object, src_offset, size, 0, 0);
2074
2075 if (size == 0) {
2076 vm_object_unlock(src_object);
2077 *_result_object = VM_OBJECT_NULL;
2078 return(KERN_INVALID_ARGUMENT);
2079 }
2080
2081 /*
2082 * Prevent destruction of the source object while we copy.
2083 */
2084
2d21ac55 2085 vm_object_reference_locked(src_object);
1c79356b
A
2086 vm_object_unlock(src_object);
2087
2088 /*
2089 * Create a new object to hold the copied pages.
2090 * A few notes:
2091 * We fill the new object starting at offset 0,
2092 * regardless of the input offset.
2093 * We don't bother to lock the new object within
2094 * this routine, since we have the only reference.
2095 */
2096
2097 new_object = vm_object_allocate(size);
2098 new_offset = 0;
2099
2100 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
2101
2d21ac55
A
2102 fault_info.interruptible = interruptible;
2103 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
2104 fault_info.user_tag = 0;
2105 fault_info.lo_offset = src_offset;
2106 fault_info.hi_offset = src_offset + size;
2107 fault_info.no_cache = FALSE;
2108
1c79356b
A
2109 for ( ;
2110 size != 0 ;
2111 src_offset += PAGE_SIZE_64,
2112 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
2113 ) {
2114 vm_page_t new_page;
2115 vm_fault_return_t result;
2116
2d21ac55
A
2117 vm_object_lock(new_object);
2118
1c79356b
A
2119 while ((new_page = vm_page_alloc(new_object, new_offset))
2120 == VM_PAGE_NULL) {
2d21ac55
A
2121
2122 vm_object_unlock(new_object);
2123
1c79356b
A
2124 if (!vm_page_wait(interruptible)) {
2125 vm_object_deallocate(new_object);
91447636 2126 vm_object_deallocate(src_object);
1c79356b
A
2127 *_result_object = VM_OBJECT_NULL;
2128 return(MACH_SEND_INTERRUPTED);
2129 }
2d21ac55 2130 vm_object_lock(new_object);
1c79356b 2131 }
2d21ac55 2132 vm_object_unlock(new_object);
1c79356b
A
2133
2134 do {
2135 vm_prot_t prot = VM_PROT_READ;
2136 vm_page_t _result_page;
2137 vm_page_t top_page;
2138 register
2139 vm_page_t result_page;
2140 kern_return_t error_code;
2141
2142 vm_object_lock(src_object);
2143 vm_object_paging_begin(src_object);
2144
2d21ac55
A
2145 fault_info.cluster_size = size;
2146
1c79356b
A
2147 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
2148 result = vm_fault_page(src_object, src_offset,
2d21ac55 2149 VM_PROT_READ, FALSE,
1c79356b
A
2150 &prot, &_result_page, &top_page,
2151 (int *)0,
2d21ac55 2152 &error_code, FALSE, FALSE, &fault_info);
1c79356b
A
2153
2154 switch(result) {
2155 case VM_FAULT_SUCCESS:
2156 result_page = _result_page;
2157
2158 /*
2159 * We don't need to hold the object
2160 * lock -- the busy page will be enough.
2161 * [We don't care about picking up any
2162 * new modifications.]
2163 *
2164 * Copy the page to the new object.
2165 *
2166 * POLICY DECISION:
2167 * If result_page is clean,
2168 * we could steal it instead
2169 * of copying.
2170 */
2171
2172 vm_object_unlock(result_page->object);
2173 vm_page_copy(result_page, new_page);
2174
2175 /*
2176 * Let go of both pages (make them
2177 * not busy, perform wakeup, activate).
2178 */
2d21ac55 2179 vm_object_lock(new_object);
1c79356b 2180 new_page->dirty = TRUE;
2d21ac55
A
2181 PAGE_WAKEUP_DONE(new_page);
2182 vm_object_unlock(new_object);
2183
1c79356b
A
2184 vm_object_lock(result_page->object);
2185 PAGE_WAKEUP_DONE(result_page);
2186
2d21ac55 2187 vm_page_lockspin_queues();
1c79356b 2188 if (!result_page->active &&
2d21ac55
A
2189 !result_page->inactive &&
2190 !result_page->throttled)
1c79356b
A
2191 vm_page_activate(result_page);
2192 vm_page_activate(new_page);
2193 vm_page_unlock_queues();
2194
2195 /*
2196 * Release paging references and
2197 * top-level placeholder page, if any.
2198 */
2199
2200 vm_fault_cleanup(result_page->object,
2201 top_page);
2202
2203 break;
2204
2205 case VM_FAULT_RETRY:
2206 break;
2207
2208 case VM_FAULT_FICTITIOUS_SHORTAGE:
2209 vm_page_more_fictitious();
2210 break;
2211
2212 case VM_FAULT_MEMORY_SHORTAGE:
2213 if (vm_page_wait(interruptible))
2214 break;
2215 /* fall thru */
2216
2217 case VM_FAULT_INTERRUPTED:
593a1d5f
A
2218 vm_object_lock(new_object);
2219 vm_page_lock_queues();
1c79356b 2220 vm_page_free(new_page);
593a1d5f
A
2221 vm_page_unlock_queues();
2222 vm_object_unlock(new_object);
2223
1c79356b
A
2224 vm_object_deallocate(new_object);
2225 vm_object_deallocate(src_object);
2226 *_result_object = VM_OBJECT_NULL;
2227 return(MACH_SEND_INTERRUPTED);
2228
2229 case VM_FAULT_MEMORY_ERROR:
2230 /*
2231 * A policy choice:
2232 * (a) ignore pages that we can't
2233 * copy
2234 * (b) return the null object if
2235 * any page fails [chosen]
2236 */
2237
593a1d5f 2238 vm_object_lock(new_object);
1c79356b
A
2239 vm_page_lock_queues();
2240 vm_page_free(new_page);
2241 vm_page_unlock_queues();
593a1d5f 2242 vm_object_unlock(new_object);
2d21ac55 2243
1c79356b
A
2244 vm_object_deallocate(new_object);
2245 vm_object_deallocate(src_object);
2246 *_result_object = VM_OBJECT_NULL;
2247 return(error_code ? error_code:
2248 KERN_MEMORY_ERROR);
2249 }
2250 } while (result != VM_FAULT_SUCCESS);
2251 }
2252
2253 /*
2254 * Lose the extra reference, and return our object.
2255 */
1c79356b
A
2256 vm_object_deallocate(src_object);
2257 *_result_object = new_object;
2258 return(KERN_SUCCESS);
2259}
2260
2261/*
2262 * Routine: vm_object_copy_quickly
2263 *
2264 * Purpose:
2265 * Copy the specified range of the source virtual
2266 * memory object, if it can be done without waiting
2267 * for user-generated events.
2268 *
2269 * Results:
2270 * If the copy is successful, the copy is returned in
2271 * the arguments; otherwise, the arguments are not
2272 * affected.
2273 *
2274 * In/out conditions:
2275 * The object should be unlocked on entry and exit.
2276 */
2277
2278/*ARGSUSED*/
0b4e3aa0 2279__private_extern__ boolean_t
1c79356b
A
2280vm_object_copy_quickly(
2281 vm_object_t *_object, /* INOUT */
91447636
A
2282 __unused vm_object_offset_t offset, /* IN */
2283 __unused vm_object_size_t size, /* IN */
1c79356b
A
2284 boolean_t *_src_needs_copy, /* OUT */
2285 boolean_t *_dst_needs_copy) /* OUT */
2286{
2287 vm_object_t object = *_object;
2288 memory_object_copy_strategy_t copy_strategy;
2289
2290 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
2291 *_object, offset, size, 0, 0);
2292 if (object == VM_OBJECT_NULL) {
2293 *_src_needs_copy = FALSE;
2294 *_dst_needs_copy = FALSE;
2295 return(TRUE);
2296 }
2297
2298 vm_object_lock(object);
2299
2300 copy_strategy = object->copy_strategy;
2301
2302 switch (copy_strategy) {
2303 case MEMORY_OBJECT_COPY_SYMMETRIC:
2304
2305 /*
2306 * Symmetric copy strategy.
2307 * Make another reference to the object.
2308 * Leave object/offset unchanged.
2309 */
2310
2d21ac55 2311 vm_object_reference_locked(object);
1c79356b
A
2312 object->shadowed = TRUE;
2313 vm_object_unlock(object);
2314
2315 /*
2316 * Both source and destination must make
2317 * shadows, and the source must be made
2318 * read-only if not already.
2319 */
2320
2321 *_src_needs_copy = TRUE;
2322 *_dst_needs_copy = TRUE;
2323
2324 break;
2325
2326 case MEMORY_OBJECT_COPY_DELAY:
2327 vm_object_unlock(object);
2328 return(FALSE);
2329
2330 default:
2331 vm_object_unlock(object);
2332 return(FALSE);
2333 }
2334 return(TRUE);
2335}
2336
0b4e3aa0
A
2337static int copy_call_count = 0;
2338static int copy_call_sleep_count = 0;
2339static int copy_call_restart_count = 0;
1c79356b
A
2340
2341/*
2342 * Routine: vm_object_copy_call [internal]
2343 *
2344 * Description:
2345 * Copy the source object (src_object), using the
2346 * user-managed copy algorithm.
2347 *
2348 * In/out conditions:
2349 * The source object must be locked on entry. It
2350 * will be *unlocked* on exit.
2351 *
2352 * Results:
2353 * If the copy is successful, KERN_SUCCESS is returned.
2354 * A new object that represents the copied virtual
2355 * memory is returned in a parameter (*_result_object).
2356 * If the return value indicates an error, this parameter
2357 * is not valid.
2358 */
0b4e3aa0 2359static kern_return_t
1c79356b
A
2360vm_object_copy_call(
2361 vm_object_t src_object,
2362 vm_object_offset_t src_offset,
2363 vm_object_size_t size,
2364 vm_object_t *_result_object) /* OUT */
2365{
2366 kern_return_t kr;
2367 vm_object_t copy;
2368 boolean_t check_ready = FALSE;
2d21ac55 2369 uint32_t try_failed_count = 0;
1c79356b
A
2370
2371 /*
2372 * If a copy is already in progress, wait and retry.
2373 *
2374 * XXX
2375 * Consider making this call interruptable, as Mike
2376 * intended it to be.
2377 *
2378 * XXXO
2379 * Need a counter or version or something to allow
2380 * us to use the copy that the currently requesting
2381 * thread is obtaining -- is it worth adding to the
2382 * vm object structure? Depends how common this case it.
2383 */
2384 copy_call_count++;
2385 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 2386 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 2387 THREAD_UNINT);
1c79356b
A
2388 copy_call_restart_count++;
2389 }
2390
2391 /*
2392 * Indicate (for the benefit of memory_object_create_copy)
2393 * that we want a copy for src_object. (Note that we cannot
2394 * do a real assert_wait before calling memory_object_copy,
2395 * so we simply set the flag.)
2396 */
2397
2398 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
2399 vm_object_unlock(src_object);
2400
2401 /*
2402 * Ask the memory manager to give us a memory object
2403 * which represents a copy of the src object.
2404 * The memory manager may give us a memory object
2405 * which we already have, or it may give us a
2406 * new memory object. This memory object will arrive
2407 * via memory_object_create_copy.
2408 */
2409
2410 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
2411 if (kr != KERN_SUCCESS) {
2412 return kr;
2413 }
2414
2415 /*
2416 * Wait for the copy to arrive.
2417 */
2418 vm_object_lock(src_object);
2419 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 2420 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 2421 THREAD_UNINT);
1c79356b
A
2422 copy_call_sleep_count++;
2423 }
2424Retry:
2425 assert(src_object->copy != VM_OBJECT_NULL);
2426 copy = src_object->copy;
2427 if (!vm_object_lock_try(copy)) {
2428 vm_object_unlock(src_object);
2d21ac55
A
2429
2430 try_failed_count++;
2431 mutex_pause(try_failed_count); /* wait a bit */
2432
1c79356b
A
2433 vm_object_lock(src_object);
2434 goto Retry;
2435 }
2436 if (copy->size < src_offset+size)
2437 copy->size = src_offset+size;
2438
2439 if (!copy->pager_ready)
2440 check_ready = TRUE;
2441
2442 /*
2443 * Return the copy.
2444 */
2445 *_result_object = copy;
2446 vm_object_unlock(copy);
2447 vm_object_unlock(src_object);
2448
2449 /* Wait for the copy to be ready. */
2450 if (check_ready == TRUE) {
2451 vm_object_lock(copy);
2452 while (!copy->pager_ready) {
9bccf70c 2453 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1c79356b
A
2454 }
2455 vm_object_unlock(copy);
2456 }
2457
2458 return KERN_SUCCESS;
2459}
2460
0b4e3aa0
A
2461static int copy_delayed_lock_collisions = 0;
2462static int copy_delayed_max_collisions = 0;
2463static int copy_delayed_lock_contention = 0;
2464static int copy_delayed_protect_iterate = 0;
1c79356b
A
2465
2466/*
2467 * Routine: vm_object_copy_delayed [internal]
2468 *
2469 * Description:
2470 * Copy the specified virtual memory object, using
2471 * the asymmetric copy-on-write algorithm.
2472 *
2473 * In/out conditions:
55e303ae
A
2474 * The src_object must be locked on entry. It will be unlocked
2475 * on exit - so the caller must also hold a reference to it.
1c79356b
A
2476 *
2477 * This routine will not block waiting for user-generated
2478 * events. It is not interruptible.
2479 */
0b4e3aa0 2480__private_extern__ vm_object_t
1c79356b
A
2481vm_object_copy_delayed(
2482 vm_object_t src_object,
2483 vm_object_offset_t src_offset,
2d21ac55
A
2484 vm_object_size_t size,
2485 boolean_t src_object_shared)
1c79356b
A
2486{
2487 vm_object_t new_copy = VM_OBJECT_NULL;
2488 vm_object_t old_copy;
2489 vm_page_t p;
55e303ae 2490 vm_object_size_t copy_size = src_offset + size;
1c79356b 2491
2d21ac55 2492
1c79356b
A
2493 int collisions = 0;
2494 /*
2495 * The user-level memory manager wants to see all of the changes
2496 * to this object, but it has promised not to make any changes on
2497 * its own.
2498 *
2499 * Perform an asymmetric copy-on-write, as follows:
2500 * Create a new object, called a "copy object" to hold
2501 * pages modified by the new mapping (i.e., the copy,
2502 * not the original mapping).
2503 * Record the original object as the backing object for
2504 * the copy object. If the original mapping does not
2505 * change a page, it may be used read-only by the copy.
2506 * Record the copy object in the original object.
2507 * When the original mapping causes a page to be modified,
2508 * it must be copied to a new page that is "pushed" to
2509 * the copy object.
2510 * Mark the new mapping (the copy object) copy-on-write.
2511 * This makes the copy object itself read-only, allowing
2512 * it to be reused if the original mapping makes no
2513 * changes, and simplifying the synchronization required
2514 * in the "push" operation described above.
2515 *
2516 * The copy-on-write is said to be assymetric because the original
2517 * object is *not* marked copy-on-write. A copied page is pushed
2518 * to the copy object, regardless which party attempted to modify
2519 * the page.
2520 *
2521 * Repeated asymmetric copy operations may be done. If the
2522 * original object has not been changed since the last copy, its
2523 * copy object can be reused. Otherwise, a new copy object can be
2524 * inserted between the original object and its previous copy
2525 * object. Since any copy object is read-only, this cannot affect
2526 * affect the contents of the previous copy object.
2527 *
2528 * Note that a copy object is higher in the object tree than the
2529 * original object; therefore, use of the copy object recorded in
2530 * the original object must be done carefully, to avoid deadlock.
2531 */
2532
2533 Retry:
1c79356b 2534
55e303ae
A
2535 /*
2536 * Wait for paging in progress.
2537 */
2d21ac55
A
2538 if (!src_object->true_share && src_object->paging_in_progress) {
2539 if (src_object_shared == TRUE) {
2540 vm_object_unlock(src_object);
2541
2542 vm_object_lock(src_object);
2543 src_object_shared = FALSE;
2544 }
55e303ae 2545 vm_object_paging_wait(src_object, THREAD_UNINT);
2d21ac55 2546 }
1c79356b
A
2547 /*
2548 * See whether we can reuse the result of a previous
2549 * copy operation.
2550 */
2551
2552 old_copy = src_object->copy;
2553 if (old_copy != VM_OBJECT_NULL) {
2d21ac55
A
2554 int lock_granted;
2555
1c79356b
A
2556 /*
2557 * Try to get the locks (out of order)
2558 */
2d21ac55
A
2559 if (src_object_shared == TRUE)
2560 lock_granted = vm_object_lock_try_shared(old_copy);
2561 else
2562 lock_granted = vm_object_lock_try(old_copy);
2563
2564 if (!lock_granted) {
1c79356b 2565 vm_object_unlock(src_object);
1c79356b 2566
1c79356b
A
2567 if (collisions++ == 0)
2568 copy_delayed_lock_contention++;
2d21ac55
A
2569 mutex_pause(collisions);
2570
2571 /* Heisenberg Rules */
2572 copy_delayed_lock_collisions++;
1c79356b
A
2573
2574 if (collisions > copy_delayed_max_collisions)
2575 copy_delayed_max_collisions = collisions;
2576
2d21ac55
A
2577 if (src_object_shared == TRUE)
2578 vm_object_lock_shared(src_object);
2579 else
2580 vm_object_lock(src_object);
2581
1c79356b
A
2582 goto Retry;
2583 }
2584
2585 /*
2586 * Determine whether the old copy object has
2587 * been modified.
2588 */
2589
2590 if (old_copy->resident_page_count == 0 &&
2591 !old_copy->pager_created) {
2592 /*
2593 * It has not been modified.
2594 *
2595 * Return another reference to
55e303ae
A
2596 * the existing copy-object if
2597 * we can safely grow it (if
2598 * needed).
de355530 2599 */
1c79356b 2600
55e303ae 2601 if (old_copy->size < copy_size) {
2d21ac55
A
2602 if (src_object_shared == TRUE) {
2603 vm_object_unlock(old_copy);
2604 vm_object_unlock(src_object);
2605
2606 vm_object_lock(src_object);
2607 src_object_shared = FALSE;
2608 goto Retry;
2609 }
55e303ae
A
2610 /*
2611 * We can't perform a delayed copy if any of the
2612 * pages in the extended range are wired (because
2613 * we can't safely take write permission away from
2614 * wired pages). If the pages aren't wired, then
2615 * go ahead and protect them.
2616 */
2617 copy_delayed_protect_iterate++;
2d21ac55 2618
55e303ae
A
2619 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2620 if (!p->fictitious &&
2621 p->offset >= old_copy->size &&
2622 p->offset < copy_size) {
2623 if (p->wire_count > 0) {
2624 vm_object_unlock(old_copy);
2625 vm_object_unlock(src_object);
91447636
A
2626
2627 if (new_copy != VM_OBJECT_NULL) {
2628 vm_object_unlock(new_copy);
2629 vm_object_deallocate(new_copy);
2630 }
2631
55e303ae
A
2632 return VM_OBJECT_NULL;
2633 } else {
2634 pmap_page_protect(p->phys_page,
2d21ac55 2635 (VM_PROT_ALL & ~VM_PROT_WRITE));
55e303ae
A
2636 }
2637 }
2638 }
2639 old_copy->size = copy_size;
2640 }
2d21ac55
A
2641 if (src_object_shared == TRUE)
2642 vm_object_reference_shared(old_copy);
2643 else
2644 vm_object_reference_locked(old_copy);
d7e50217
A
2645 vm_object_unlock(old_copy);
2646 vm_object_unlock(src_object);
91447636
A
2647
2648 if (new_copy != VM_OBJECT_NULL) {
2649 vm_object_unlock(new_copy);
2650 vm_object_deallocate(new_copy);
2651 }
55e303ae 2652 return(old_copy);
d7e50217 2653 }
2d21ac55
A
2654
2655
de355530
A
2656
2657 /*
2658 * Adjust the size argument so that the newly-created
2659 * copy object will be large enough to back either the
55e303ae 2660 * old copy object or the new mapping.
de355530 2661 */
55e303ae
A
2662 if (old_copy->size > copy_size)
2663 copy_size = old_copy->size;
2664
2665 if (new_copy == VM_OBJECT_NULL) {
2666 vm_object_unlock(old_copy);
2667 vm_object_unlock(src_object);
2668 new_copy = vm_object_allocate(copy_size);
2669 vm_object_lock(src_object);
2670 vm_object_lock(new_copy);
2d21ac55
A
2671
2672 src_object_shared = FALSE;
55e303ae
A
2673 goto Retry;
2674 }
2675 new_copy->size = copy_size;
1c79356b
A
2676
2677 /*
2678 * The copy-object is always made large enough to
2679 * completely shadow the original object, since
2680 * it may have several users who want to shadow
2681 * the original object at different points.
2682 */
2683
2684 assert((old_copy->shadow == src_object) &&
2685 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2686
55e303ae
A
2687 } else if (new_copy == VM_OBJECT_NULL) {
2688 vm_object_unlock(src_object);
2689 new_copy = vm_object_allocate(copy_size);
2690 vm_object_lock(src_object);
2691 vm_object_lock(new_copy);
2d21ac55
A
2692
2693 src_object_shared = FALSE;
55e303ae
A
2694 goto Retry;
2695 }
2696
2697 /*
2698 * We now have the src object locked, and the new copy object
2699 * allocated and locked (and potentially the old copy locked).
2700 * Before we go any further, make sure we can still perform
2701 * a delayed copy, as the situation may have changed.
2702 *
2703 * Specifically, we can't perform a delayed copy if any of the
2704 * pages in the range are wired (because we can't safely take
2705 * write permission away from wired pages). If the pages aren't
2706 * wired, then go ahead and protect them.
2707 */
2708 copy_delayed_protect_iterate++;
2d21ac55 2709
55e303ae
A
2710 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2711 if (!p->fictitious && p->offset < copy_size) {
2712 if (p->wire_count > 0) {
2713 if (old_copy)
2714 vm_object_unlock(old_copy);
2715 vm_object_unlock(src_object);
2716 vm_object_unlock(new_copy);
2717 vm_object_deallocate(new_copy);
2718 return VM_OBJECT_NULL;
2719 } else {
2720 pmap_page_protect(p->phys_page,
2d21ac55 2721 (VM_PROT_ALL & ~VM_PROT_WRITE));
55e303ae
A
2722 }
2723 }
2724 }
55e303ae 2725 if (old_copy != VM_OBJECT_NULL) {
1c79356b
A
2726 /*
2727 * Make the old copy-object shadow the new one.
2728 * It will receive no more pages from the original
2729 * object.
2730 */
2731
2d21ac55
A
2732 /* remove ref. from old_copy */
2733 vm_object_lock_assert_exclusive(src_object);
2734 src_object->ref_count--;
1c79356b 2735 assert(src_object->ref_count > 0);
2d21ac55 2736 vm_object_lock_assert_exclusive(old_copy);
1c79356b 2737 old_copy->shadow = new_copy;
2d21ac55 2738 vm_object_lock_assert_exclusive(new_copy);
1c79356b
A
2739 assert(new_copy->ref_count > 0);
2740 new_copy->ref_count++; /* for old_copy->shadow ref. */
2741
2742#if TASK_SWAPPER
2743 if (old_copy->res_count) {
2744 VM_OBJ_RES_INCR(new_copy);
2745 VM_OBJ_RES_DECR(src_object);
2746 }
2747#endif
2748
2749 vm_object_unlock(old_copy); /* done with old_copy */
1c79356b
A
2750 }
2751
2752 /*
2753 * Point the new copy at the existing object.
2754 */
2d21ac55 2755 vm_object_lock_assert_exclusive(new_copy);
1c79356b
A
2756 new_copy->shadow = src_object;
2757 new_copy->shadow_offset = 0;
2758 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2d21ac55
A
2759
2760 vm_object_lock_assert_exclusive(src_object);
2761 vm_object_reference_locked(src_object);
1c79356b 2762 src_object->copy = new_copy;
55e303ae 2763 vm_object_unlock(src_object);
1c79356b
A
2764 vm_object_unlock(new_copy);
2765
1c79356b
A
2766 XPR(XPR_VM_OBJECT,
2767 "vm_object_copy_delayed: used copy object %X for source %X\n",
2768 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2769
2d21ac55 2770 return new_copy;
1c79356b
A
2771}
2772
2773/*
2774 * Routine: vm_object_copy_strategically
2775 *
2776 * Purpose:
2777 * Perform a copy according to the source object's
2778 * declared strategy. This operation may block,
2779 * and may be interrupted.
2780 */
0b4e3aa0 2781__private_extern__ kern_return_t
1c79356b
A
2782vm_object_copy_strategically(
2783 register vm_object_t src_object,
2784 vm_object_offset_t src_offset,
2785 vm_object_size_t size,
2786 vm_object_t *dst_object, /* OUT */
2787 vm_object_offset_t *dst_offset, /* OUT */
2788 boolean_t *dst_needs_copy) /* OUT */
2789{
2790 boolean_t result;
2791 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2d21ac55 2792 boolean_t object_lock_shared = FALSE;
1c79356b
A
2793 memory_object_copy_strategy_t copy_strategy;
2794
2795 assert(src_object != VM_OBJECT_NULL);
2796
2d21ac55
A
2797 copy_strategy = src_object->copy_strategy;
2798
2799 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
2800 vm_object_lock_shared(src_object);
2801 object_lock_shared = TRUE;
2802 } else
2803 vm_object_lock(src_object);
1c79356b
A
2804
2805 /*
2806 * The copy strategy is only valid if the memory manager
2807 * is "ready". Internal objects are always ready.
2808 */
2809
2810 while (!src_object->internal && !src_object->pager_ready) {
9bccf70c 2811 wait_result_t wait_result;
1c79356b 2812
2d21ac55
A
2813 if (object_lock_shared == TRUE) {
2814 vm_object_unlock(src_object);
2815 vm_object_lock(src_object);
2816 object_lock_shared = FALSE;
2817 continue;
2818 }
9bccf70c
A
2819 wait_result = vm_object_sleep( src_object,
2820 VM_OBJECT_EVENT_PAGER_READY,
2821 interruptible);
2822 if (wait_result != THREAD_AWAKENED) {
2823 vm_object_unlock(src_object);
1c79356b
A
2824 *dst_object = VM_OBJECT_NULL;
2825 *dst_offset = 0;
2826 *dst_needs_copy = FALSE;
2827 return(MACH_SEND_INTERRUPTED);
2828 }
1c79356b
A
2829 }
2830
1c79356b
A
2831 /*
2832 * Use the appropriate copy strategy.
2833 */
2834
2835 switch (copy_strategy) {
55e303ae
A
2836 case MEMORY_OBJECT_COPY_DELAY:
2837 *dst_object = vm_object_copy_delayed(src_object,
2d21ac55 2838 src_offset, size, object_lock_shared);
55e303ae
A
2839 if (*dst_object != VM_OBJECT_NULL) {
2840 *dst_offset = src_offset;
2841 *dst_needs_copy = TRUE;
2842 result = KERN_SUCCESS;
2843 break;
2844 }
2845 vm_object_lock(src_object);
2846 /* fall thru when delayed copy not allowed */
2847
1c79356b
A
2848 case MEMORY_OBJECT_COPY_NONE:
2849 result = vm_object_copy_slowly(src_object, src_offset, size,
2850 interruptible, dst_object);
2851 if (result == KERN_SUCCESS) {
2852 *dst_offset = 0;
2853 *dst_needs_copy = FALSE;
2854 }
2855 break;
2856
2857 case MEMORY_OBJECT_COPY_CALL:
2858 result = vm_object_copy_call(src_object, src_offset, size,
2859 dst_object);
2860 if (result == KERN_SUCCESS) {
2861 *dst_offset = src_offset;
2862 *dst_needs_copy = TRUE;
2863 }
2864 break;
2865
1c79356b
A
2866 case MEMORY_OBJECT_COPY_SYMMETRIC:
2867 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2868 vm_object_unlock(src_object);
2869 result = KERN_MEMORY_RESTART_COPY;
2870 break;
2871
2872 default:
2873 panic("copy_strategically: bad strategy");
2874 result = KERN_INVALID_ARGUMENT;
2875 }
2876 return(result);
2877}
2878
2879/*
2880 * vm_object_shadow:
2881 *
2882 * Create a new object which is backed by the
2883 * specified existing object range. The source
2884 * object reference is deallocated.
2885 *
2886 * The new object and offset into that object
2887 * are returned in the source parameters.
2888 */
2889boolean_t vm_object_shadow_check = FALSE;
2890
0b4e3aa0 2891__private_extern__ boolean_t
1c79356b
A
2892vm_object_shadow(
2893 vm_object_t *object, /* IN/OUT */
2894 vm_object_offset_t *offset, /* IN/OUT */
2895 vm_object_size_t length)
2896{
2897 register vm_object_t source;
2898 register vm_object_t result;
2899
2900 source = *object;
2d21ac55
A
2901#if 0
2902 /*
2903 * XXX FBDP
2904 * This assertion is valid but it gets triggered by Rosetta for example
2905 * due to a combination of vm_remap() that changes a VM object's
2906 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
2907 * that then sets "needs_copy" on its map entry. This creates a
2908 * mapping situation that VM should never see and doesn't know how to
2909 * handle.
2910 * It's not clear if this can create any real problem but we should
2911 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
2912 * do more than just set "needs_copy" to handle the copy-on-write...
2913 * In the meantime, let's disable the assertion.
2914 */
1c79356b 2915 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2d21ac55 2916#endif
1c79356b
A
2917
2918 /*
2919 * Determine if we really need a shadow.
2920 */
2921
2922 if (vm_object_shadow_check && source->ref_count == 1 &&
2923 (source->shadow == VM_OBJECT_NULL ||
2924 source->shadow->copy == VM_OBJECT_NULL))
2925 {
2926 source->shadowed = FALSE;
2927 return FALSE;
2928 }
2929
2930 /*
2931 * Allocate a new object with the given length
2932 */
2933
2934 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2935 panic("vm_object_shadow: no object for shadowing");
2936
2937 /*
2938 * The new object shadows the source object, adding
2939 * a reference to it. Our caller changes his reference
2940 * to point to the new object, removing a reference to
2941 * the source object. Net result: no change of reference
2942 * count.
2943 */
2944 result->shadow = source;
2945
2946 /*
2947 * Store the offset into the source object,
2948 * and fix up the offset into the new object.
2949 */
2950
2951 result->shadow_offset = *offset;
2952
2953 /*
2954 * Return the new things
2955 */
2956
2957 *offset = 0;
2958 *object = result;
2959 return TRUE;
2960}
2961
2962/*
2963 * The relationship between vm_object structures and
0b4e3aa0 2964 * the memory_object requires careful synchronization.
1c79356b 2965 *
0b4e3aa0
A
2966 * All associations are created by memory_object_create_named
2967 * for external pagers and vm_object_pager_create for internal
2968 * objects as follows:
2969 *
2970 * pager: the memory_object itself, supplied by
1c79356b
A
2971 * the user requesting a mapping (or the kernel,
2972 * when initializing internal objects); the
2973 * kernel simulates holding send rights by keeping
2974 * a port reference;
0b4e3aa0 2975 *
1c79356b
A
2976 * pager_request:
2977 * the memory object control port,
2978 * created by the kernel; the kernel holds
2979 * receive (and ownership) rights to this
2980 * port, but no other references.
1c79356b
A
2981 *
2982 * When initialization is complete, the "initialized" field
2983 * is asserted. Other mappings using a particular memory object,
2984 * and any references to the vm_object gained through the
2985 * port association must wait for this initialization to occur.
2986 *
2987 * In order to allow the memory manager to set attributes before
2988 * requests (notably virtual copy operations, but also data or
2989 * unlock requests) are made, a "ready" attribute is made available.
2990 * Only the memory manager may affect the value of this attribute.
2991 * Its value does not affect critical kernel functions, such as
2992 * internal object initialization or destruction. [Furthermore,
2993 * memory objects created by the kernel are assumed to be ready
2994 * immediately; the default memory manager need not explicitly
2995 * set the "ready" attribute.]
2996 *
2997 * [Both the "initialized" and "ready" attribute wait conditions
2998 * use the "pager" field as the wait event.]
2999 *
3000 * The port associations can be broken down by any of the
3001 * following routines:
3002 * vm_object_terminate:
3003 * No references to the vm_object remain, and
3004 * the object cannot (or will not) be cached.
3005 * This is the normal case, and is done even
3006 * though one of the other cases has already been
3007 * done.
1c79356b
A
3008 * memory_object_destroy:
3009 * The memory manager has requested that the
0b4e3aa0
A
3010 * kernel relinquish references to the memory
3011 * object. [The memory manager may not want to
3012 * destroy the memory object, but may wish to
3013 * refuse or tear down existing memory mappings.]
3014 *
1c79356b
A
3015 * Each routine that breaks an association must break all of
3016 * them at once. At some later time, that routine must clear
0b4e3aa0 3017 * the pager field and release the memory object references.
1c79356b
A
3018 * [Furthermore, each routine must cope with the simultaneous
3019 * or previous operations of the others.]
3020 *
3021 * In addition to the lock on the object, the vm_object_cache_lock
0b4e3aa0
A
3022 * governs the associations. References gained through the
3023 * association require use of the cache lock.
1c79356b 3024 *
0b4e3aa0 3025 * Because the pager field may be cleared spontaneously, it
1c79356b
A
3026 * cannot be used to determine whether a memory object has
3027 * ever been associated with a particular vm_object. [This
2d21ac55
A
3028 * knowledge is important to the shadow object mechanism.]
3029 * For this reason, an additional "created" attribute is
3030 * provided.
3031 *
3032 * During various paging operations, the pager reference found in the
3033 * vm_object must be valid. To prevent this from being released,
3034 * (other than being removed, i.e., made null), routines may use
3035 * the vm_object_paging_begin/end routines [actually, macros].
3036 * The implementation uses the "paging_in_progress" and "wanted" fields.
3037 * [Operations that alter the validity of the pager values include the
3038 * termination routines and vm_object_collapse.]
3039 */
1c79356b 3040
1c79356b
A
3041
3042/*
3043 * Routine: vm_object_enter
3044 * Purpose:
3045 * Find a VM object corresponding to the given
3046 * pager; if no such object exists, create one,
3047 * and initialize the pager.
3048 */
3049vm_object_t
3050vm_object_enter(
0b4e3aa0 3051 memory_object_t pager,
1c79356b
A
3052 vm_object_size_t size,
3053 boolean_t internal,
3054 boolean_t init,
0b4e3aa0 3055 boolean_t named)
1c79356b
A
3056{
3057 register vm_object_t object;
3058 vm_object_t new_object;
3059 boolean_t must_init;
1c79356b 3060 vm_object_hash_entry_t entry, new_entry;
2d21ac55 3061 uint32_t try_failed_count = 0;
1c79356b 3062
0b4e3aa0 3063 if (pager == MEMORY_OBJECT_NULL)
1c79356b
A
3064 return(vm_object_allocate(size));
3065
3066 new_object = VM_OBJECT_NULL;
3067 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3068 must_init = init;
3069
3070 /*
3071 * Look for an object associated with this port.
3072 */
2d21ac55 3073Retry:
1c79356b 3074 vm_object_cache_lock();
55e303ae 3075 do {
1c79356b
A
3076 entry = vm_object_hash_lookup(pager, FALSE);
3077
55e303ae
A
3078 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
3079 if (new_object == VM_OBJECT_NULL) {
3080 /*
3081 * We must unlock to create a new object;
3082 * if we do so, we must try the lookup again.
3083 */
3084 vm_object_cache_unlock();
3085 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
3086 new_entry = vm_object_hash_entry_alloc(pager);
3087 new_object = vm_object_allocate(size);
3088 vm_object_cache_lock();
3089 } else {
3090 /*
3091 * Lookup failed twice, and we have something
3092 * to insert; set the object.
3093 */
3094 vm_object_hash_insert(new_entry);
3095 entry = new_entry;
3096 entry->object = new_object;
3097 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3098 new_object = VM_OBJECT_NULL;
3099 must_init = TRUE;
3100 }
3101 } else if (entry->object == VM_OBJECT_NULL) {
3102 /*
3103 * If a previous object is being terminated,
3104 * we must wait for the termination message
3105 * to be queued (and lookup the entry again).
3106 */
1c79356b 3107 entry->waiting = TRUE;
55e303ae 3108 entry = VM_OBJECT_HASH_ENTRY_NULL;
1c79356b
A
3109 assert_wait((event_t) pager, THREAD_UNINT);
3110 vm_object_cache_unlock();
91447636 3111 thread_block(THREAD_CONTINUE_NULL);
1c79356b 3112 vm_object_cache_lock();
1c79356b 3113 }
55e303ae 3114 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
1c79356b
A
3115
3116 object = entry->object;
3117 assert(object != VM_OBJECT_NULL);
3118
3119 if (!must_init) {
2d21ac55
A
3120 if (!vm_object_lock_try(object)) {
3121
3122 vm_object_cache_unlock();
3123
3124 try_failed_count++;
3125 mutex_pause(try_failed_count); /* wait a bit */
3126
3127 goto Retry;
3128 }
1c79356b 3129 assert(!internal || object->internal);
0b4e3aa0
A
3130 if (named) {
3131 assert(!object->named);
1c79356b 3132 object->named = TRUE;
0b4e3aa0 3133 }
1c79356b
A
3134 if (object->ref_count == 0) {
3135 XPR(XPR_VM_OBJECT_CACHE,
3136 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
3137 (integer_t)object,
3138 (integer_t)vm_object_cached_list.next,
3139 (integer_t)vm_object_cached_list.prev, 0,0);
3140 queue_remove(&vm_object_cached_list, object,
3141 vm_object_t, cached_list);
3142 vm_object_cached_count--;
3143 }
2d21ac55 3144 vm_object_lock_assert_exclusive(object);
1c79356b
A
3145 object->ref_count++;
3146 vm_object_res_reference(object);
3147 vm_object_unlock(object);
3148
2d21ac55 3149 VM_STAT_INCR(hits);
1c79356b
A
3150 }
3151 assert(object->ref_count > 0);
3152
2d21ac55 3153 VM_STAT_INCR(lookups);
1c79356b
A
3154
3155 vm_object_cache_unlock();
3156
3157 XPR(XPR_VM_OBJECT,
3158 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
3159 (integer_t)pager, (integer_t)object, must_init, 0, 0);
3160
3161 /*
3162 * If we raced to create a vm_object but lost, let's
3163 * throw away ours.
3164 */
3165
3166 if (new_object != VM_OBJECT_NULL)
3167 vm_object_deallocate(new_object);
3168
3169 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
3170 vm_object_hash_entry_free(new_entry);
3171
3172 if (must_init) {
91447636 3173 memory_object_control_t control;
1c79356b
A
3174
3175 /*
3176 * Allocate request port.
3177 */
3178
91447636
A
3179 control = memory_object_control_allocate(object);
3180 assert (control != MEMORY_OBJECT_CONTROL_NULL);
1c79356b
A
3181
3182 vm_object_lock(object);
91447636 3183 assert(object != kernel_object);
1c79356b
A
3184
3185 /*
0b4e3aa0 3186 * Copy the reference we were given.
1c79356b
A
3187 */
3188
0b4e3aa0 3189 memory_object_reference(pager);
1c79356b
A
3190 object->pager_created = TRUE;
3191 object->pager = pager;
3192 object->internal = internal;
3193 object->pager_trusted = internal;
3194 if (!internal) {
3195 /* copy strategy invalid until set by memory manager */
3196 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
3197 }
91447636 3198 object->pager_control = control;
1c79356b
A
3199 object->pager_ready = FALSE;
3200
1c79356b
A
3201 vm_object_unlock(object);
3202
3203 /*
3204 * Let the pager know we're using it.
3205 */
3206
0b4e3aa0 3207 (void) memory_object_init(pager,
91447636 3208 object->pager_control,
0b4e3aa0 3209 PAGE_SIZE);
1c79356b
A
3210
3211 vm_object_lock(object);
0b4e3aa0
A
3212 if (named)
3213 object->named = TRUE;
1c79356b
A
3214 if (internal) {
3215 object->pager_ready = TRUE;
3216 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
3217 }
3218
3219 object->pager_initialized = TRUE;
3220 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
3221 } else {
3222 vm_object_lock(object);
3223 }
3224
3225 /*
3226 * [At this point, the object must be locked]
3227 */
3228
3229 /*
3230 * Wait for the work above to be done by the first
3231 * thread to map this object.
3232 */
3233
3234 while (!object->pager_initialized) {
9bccf70c 3235 vm_object_sleep(object,
1c79356b
A
3236 VM_OBJECT_EVENT_INITIALIZED,
3237 THREAD_UNINT);
1c79356b
A
3238 }
3239 vm_object_unlock(object);
3240
3241 XPR(XPR_VM_OBJECT,
3242 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
3243 (integer_t)object, (integer_t)object->pager, internal, 0,0);
3244 return(object);
3245}
3246
3247/*
3248 * Routine: vm_object_pager_create
3249 * Purpose:
3250 * Create a memory object for an internal object.
3251 * In/out conditions:
3252 * The object is locked on entry and exit;
3253 * it may be unlocked within this call.
3254 * Limitations:
3255 * Only one thread may be performing a
3256 * vm_object_pager_create on an object at
3257 * a time. Presumably, only the pageout
3258 * daemon will be using this routine.
3259 */
3260
3261void
3262vm_object_pager_create(
3263 register vm_object_t object)
3264{
0b4e3aa0 3265 memory_object_t pager;
1c79356b
A
3266 vm_object_hash_entry_t entry;
3267#if MACH_PAGEMAP
3268 vm_object_size_t size;
3269 vm_external_map_t map;
3270#endif /* MACH_PAGEMAP */
3271
3272 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
3273 (integer_t)object, 0,0,0,0);
3274
91447636
A
3275 assert(object != kernel_object);
3276
1c79356b
A
3277 if (memory_manager_default_check() != KERN_SUCCESS)
3278 return;
3279
3280 /*
3281 * Prevent collapse or termination by holding a paging reference
3282 */
3283
3284 vm_object_paging_begin(object);
3285 if (object->pager_created) {
3286 /*
3287 * Someone else got to it first...
3288 * wait for them to finish initializing the ports
3289 */
3290 while (!object->pager_initialized) {
9bccf70c
A
3291 vm_object_sleep(object,
3292 VM_OBJECT_EVENT_INITIALIZED,
3293 THREAD_UNINT);
1c79356b
A
3294 }
3295 vm_object_paging_end(object);
3296 return;
3297 }
3298
3299 /*
3300 * Indicate that a memory object has been assigned
3301 * before dropping the lock, to prevent a race.
3302 */
3303
3304 object->pager_created = TRUE;
3305 object->paging_offset = 0;
3306
3307#if MACH_PAGEMAP
3308 size = object->size;
3309#endif /* MACH_PAGEMAP */
3310 vm_object_unlock(object);
3311
3312#if MACH_PAGEMAP
3313 map = vm_external_create(size);
3314 vm_object_lock(object);
3315 assert(object->size == size);
3316 object->existence_map = map;
3317 vm_object_unlock(object);
3318#endif /* MACH_PAGEMAP */
3319
3320 /*
0b4e3aa0 3321 * Create the [internal] pager, and associate it with this object.
1c79356b 3322 *
0b4e3aa0 3323 * We make the association here so that vm_object_enter()
1c79356b
A
3324 * can look up the object to complete initializing it. No
3325 * user will ever map this object.
3326 */
3327 {
0b4e3aa0 3328 memory_object_default_t dmm;
1c79356b 3329
0b4e3aa0 3330 /* acquire a reference for the default memory manager */
2d21ac55 3331 dmm = memory_manager_default_reference();
1c79356b 3332
1c79356b
A
3333 assert(object->temporary);
3334
0b4e3aa0
A
3335 /* create our new memory object */
3336 (void) memory_object_create(dmm, object->size, &pager);
3337
3338 memory_object_default_deallocate(dmm);
1c79356b
A
3339 }
3340
3341 entry = vm_object_hash_entry_alloc(pager);
3342
3343 vm_object_cache_lock();
3344 vm_object_hash_insert(entry);
3345
3346 entry->object = object;
3347 vm_object_cache_unlock();
3348
3349 /*
0b4e3aa0 3350 * A reference was returned by
1c79356b
A
3351 * memory_object_create(), and it is
3352 * copied by vm_object_enter().
3353 */
3354
3355 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
3356 panic("vm_object_pager_create: mismatch");
3357
3358 /*
0b4e3aa0 3359 * Drop the reference we were passed.
1c79356b 3360 */
0b4e3aa0 3361 memory_object_deallocate(pager);
1c79356b
A
3362
3363 vm_object_lock(object);
3364
3365 /*
3366 * Release the paging reference
3367 */
3368 vm_object_paging_end(object);
3369}
3370
3371/*
3372 * Routine: vm_object_remove
3373 * Purpose:
3374 * Eliminate the pager/object association
3375 * for this pager.
3376 * Conditions:
3377 * The object cache must be locked.
3378 */
0b4e3aa0 3379__private_extern__ void
1c79356b
A
3380vm_object_remove(
3381 vm_object_t object)
3382{
0b4e3aa0 3383 memory_object_t pager;
1c79356b 3384
0b4e3aa0 3385 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
1c79356b
A
3386 vm_object_hash_entry_t entry;
3387
0b4e3aa0 3388 entry = vm_object_hash_lookup(pager, FALSE);
1c79356b
A
3389 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
3390 entry->object = VM_OBJECT_NULL;
3391 }
3392
1c79356b
A
3393}
3394
3395/*
3396 * Global variables for vm_object_collapse():
3397 *
3398 * Counts for normal collapses and bypasses.
3399 * Debugging variables, to watch or disable collapse.
3400 */
0b4e3aa0
A
3401static long object_collapses = 0;
3402static long object_bypasses = 0;
1c79356b 3403
0b4e3aa0
A
3404static boolean_t vm_object_collapse_allowed = TRUE;
3405static boolean_t vm_object_bypass_allowed = TRUE;
3406
2d21ac55 3407#if MACH_PAGEMAP
0b4e3aa0
A
3408static int vm_external_discarded;
3409static int vm_external_collapsed;
2d21ac55 3410#endif
1c79356b 3411
91447636
A
3412unsigned long vm_object_collapse_encrypted = 0;
3413
1c79356b 3414/*
0b4e3aa0
A
3415 * Routine: vm_object_do_collapse
3416 * Purpose:
3417 * Collapse an object with the object backing it.
3418 * Pages in the backing object are moved into the
3419 * parent, and the backing object is deallocated.
3420 * Conditions:
3421 * Both objects and the cache are locked; the page
3422 * queues are unlocked.
1c79356b
A
3423 *
3424 */
0b4e3aa0 3425static void
1c79356b
A
3426vm_object_do_collapse(
3427 vm_object_t object,
3428 vm_object_t backing_object)
3429{
3430 vm_page_t p, pp;
3431 vm_object_offset_t new_offset, backing_offset;
3432 vm_object_size_t size;
3433
3434 backing_offset = object->shadow_offset;
3435 size = object->size;
3436
1c79356b
A
3437 /*
3438 * Move all in-memory pages from backing_object
3439 * to the parent. Pages that have been paged out
3440 * will be overwritten by any of the parent's
3441 * pages that shadow them.
3442 */
3443
3444 while (!queue_empty(&backing_object->memq)) {
3445
3446 p = (vm_page_t) queue_first(&backing_object->memq);
3447
3448 new_offset = (p->offset - backing_offset);
3449
3450 assert(!p->busy || p->absent);
91447636 3451
1c79356b
A
3452 /*
3453 * If the parent has a page here, or if
3454 * this page falls outside the parent,
3455 * dispose of it.
3456 *
3457 * Otherwise, move it as planned.
3458 */
3459
3460 if (p->offset < backing_offset || new_offset >= size) {
3461 VM_PAGE_FREE(p);
3462 } else {
91447636
A
3463 /*
3464 * ENCRYPTED SWAP:
3465 * The encryption key includes the "pager" and the
2d21ac55
A
3466 * "paging_offset". These will not change during the
3467 * object collapse, so we can just move an encrypted
3468 * page from one object to the other in this case.
3469 * We can't decrypt the page here, since we can't drop
91447636 3470 * the object lock.
91447636 3471 */
2d21ac55
A
3472 if (p->encrypted) {
3473 vm_object_collapse_encrypted++;
3474 }
1c79356b
A
3475 pp = vm_page_lookup(object, new_offset);
3476 if (pp == VM_PAGE_NULL) {
3477
3478 /*
3479 * Parent now has no page.
3480 * Move the backing object's page up.
3481 */
3482
2d21ac55 3483 vm_page_rename(p, object, new_offset, TRUE);
1c79356b
A
3484#if MACH_PAGEMAP
3485 } else if (pp->absent) {
3486
3487 /*
3488 * Parent has an absent page...
3489 * it's not being paged in, so
3490 * it must really be missing from
3491 * the parent.
3492 *
3493 * Throw out the absent page...
3494 * any faults looking for that
3495 * page will restart with the new
3496 * one.
3497 */
3498
3499 VM_PAGE_FREE(pp);
2d21ac55 3500 vm_page_rename(p, object, new_offset, TRUE);
1c79356b
A
3501#endif /* MACH_PAGEMAP */
3502 } else {
3503 assert(! pp->absent);
3504
3505 /*
3506 * Parent object has a real page.
3507 * Throw away the backing object's
3508 * page.
3509 */
3510 VM_PAGE_FREE(p);
3511 }
3512 }
3513 }
3514
55e303ae 3515#if !MACH_PAGEMAP
2d21ac55 3516 assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL))
55e303ae 3517 || (!backing_object->pager_created
2d21ac55 3518 && (backing_object->pager == MEMORY_OBJECT_NULL)));
55e303ae
A
3519#else
3520 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3521#endif /* !MACH_PAGEMAP */
1c79356b 3522
0b4e3aa0 3523 if (backing_object->pager != MEMORY_OBJECT_NULL) {
1c79356b
A
3524 vm_object_hash_entry_t entry;
3525
3526 /*
3527 * Move the pager from backing_object to object.
3528 *
3529 * XXX We're only using part of the paging space
3530 * for keeps now... we ought to discard the
3531 * unused portion.
3532 */
3533
55e303ae 3534 assert(!object->paging_in_progress);
1c79356b
A
3535 object->pager = backing_object->pager;
3536 entry = vm_object_hash_lookup(object->pager, FALSE);
3537 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3538 entry->object = object;
3539 object->pager_created = backing_object->pager_created;
91447636 3540 object->pager_control = backing_object->pager_control;
1c79356b
A
3541 object->pager_ready = backing_object->pager_ready;
3542 object->pager_initialized = backing_object->pager_initialized;
1c79356b
A
3543 object->paging_offset =
3544 backing_object->paging_offset + backing_offset;
91447636
A
3545 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
3546 memory_object_control_collapse(object->pager_control,
0b4e3aa0 3547 object);
1c79356b
A
3548 }
3549 }
3550
3551 vm_object_cache_unlock();
3552
1c79356b
A
3553#if MACH_PAGEMAP
3554 /*
3555 * If the shadow offset is 0, the use the existence map from
3556 * the backing object if there is one. If the shadow offset is
3557 * not zero, toss it.
3558 *
3559 * XXX - If the shadow offset is not 0 then a bit copy is needed
3560 * if the map is to be salvaged. For now, we just just toss the
3561 * old map, giving the collapsed object no map. This means that
3562 * the pager is invoked for zero fill pages. If analysis shows
3563 * that this happens frequently and is a performance hit, then
3564 * this code should be fixed to salvage the map.
3565 */
3566 assert(object->existence_map == VM_EXTERNAL_NULL);
3567 if (backing_offset || (size != backing_object->size)) {
3568 vm_external_discarded++;
3569 vm_external_destroy(backing_object->existence_map,
3570 backing_object->size);
3571 }
3572 else {
3573 vm_external_collapsed++;
3574 object->existence_map = backing_object->existence_map;
3575 }
3576 backing_object->existence_map = VM_EXTERNAL_NULL;
3577#endif /* MACH_PAGEMAP */
3578
3579 /*
3580 * Object now shadows whatever backing_object did.
3581 * Note that the reference to backing_object->shadow
3582 * moves from within backing_object to within object.
3583 */
3584
91447636
A
3585 assert(!object->phys_contiguous);
3586 assert(!backing_object->phys_contiguous);
1c79356b 3587 object->shadow = backing_object->shadow;
91447636
A
3588 if (object->shadow) {
3589 object->shadow_offset += backing_object->shadow_offset;
3590 } else {
3591 /* no shadow, therefore no shadow offset... */
3592 object->shadow_offset = 0;
3593 }
1c79356b 3594 assert((object->shadow == VM_OBJECT_NULL) ||
55e303ae 3595 (object->shadow->copy != backing_object));
1c79356b
A
3596
3597 /*
3598 * Discard backing_object.
3599 *
3600 * Since the backing object has no pages, no
3601 * pager left, and no object references within it,
3602 * all that is necessary is to dispose of it.
3603 */
3604
3605 assert((backing_object->ref_count == 1) &&
3606 (backing_object->resident_page_count == 0) &&
3607 (backing_object->paging_in_progress == 0));
3608
1c79356b
A
3609 backing_object->alive = FALSE;
3610 vm_object_unlock(backing_object);
3611
3612 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3613 (integer_t)backing_object, 0,0,0,0);
3614
2d21ac55
A
3615 vm_object_lock_destroy(backing_object);
3616
91447636 3617 zfree(vm_object_zone, backing_object);
1c79356b
A
3618
3619 object_collapses++;
3620}
3621
0b4e3aa0 3622static void
1c79356b
A
3623vm_object_do_bypass(
3624 vm_object_t object,
3625 vm_object_t backing_object)
3626{
3627 /*
3628 * Make the parent shadow the next object
3629 * in the chain.
3630 */
3631
2d21ac55
A
3632 vm_object_lock_assert_exclusive(backing_object);
3633
1c79356b
A
3634#if TASK_SWAPPER
3635 /*
3636 * Do object reference in-line to
3637 * conditionally increment shadow's
3638 * residence count. If object is not
3639 * resident, leave residence count
3640 * on shadow alone.
3641 */
3642 if (backing_object->shadow != VM_OBJECT_NULL) {
3643 vm_object_lock(backing_object->shadow);
2d21ac55 3644 vm_object_lock_assert_exclusive(backing_object->shadow);
1c79356b
A
3645 backing_object->shadow->ref_count++;
3646 if (object->res_count != 0)
3647 vm_object_res_reference(backing_object->shadow);
3648 vm_object_unlock(backing_object->shadow);
3649 }
3650#else /* TASK_SWAPPER */
3651 vm_object_reference(backing_object->shadow);
3652#endif /* TASK_SWAPPER */
3653
91447636
A
3654 assert(!object->phys_contiguous);
3655 assert(!backing_object->phys_contiguous);
1c79356b 3656 object->shadow = backing_object->shadow;
91447636
A
3657 if (object->shadow) {
3658 object->shadow_offset += backing_object->shadow_offset;
3659 } else {
3660 /* no shadow, therefore no shadow offset... */
3661 object->shadow_offset = 0;
3662 }
1c79356b
A
3663
3664 /*
3665 * Backing object might have had a copy pointer
3666 * to us. If it did, clear it.
3667 */
3668 if (backing_object->copy == object) {
3669 backing_object->copy = VM_OBJECT_NULL;
3670 }
3671
3672 /*
3673 * Drop the reference count on backing_object.
3674#if TASK_SWAPPER
3675 * Since its ref_count was at least 2, it
3676 * will not vanish; so we don't need to call
3677 * vm_object_deallocate.
593a1d5f 3678 * [with a caveat for "named" objects]
1c79356b
A
3679 *
3680 * The res_count on the backing object is
3681 * conditionally decremented. It's possible
3682 * (via vm_pageout_scan) to get here with
3683 * a "swapped" object, which has a 0 res_count,
3684 * in which case, the backing object res_count
3685 * is already down by one.
3686#else
3687 * Don't call vm_object_deallocate unless
3688 * ref_count drops to zero.
3689 *
3690 * The ref_count can drop to zero here if the
3691 * backing object could be bypassed but not
3692 * collapsed, such as when the backing object
3693 * is temporary and cachable.
3694#endif
3695 */
593a1d5f
A
3696 if (backing_object->ref_count > 2 ||
3697 (!backing_object->named && backing_object->ref_count > 1)) {
2d21ac55 3698 vm_object_lock_assert_exclusive(backing_object);
1c79356b
A
3699 backing_object->ref_count--;
3700#if TASK_SWAPPER
3701 if (object->res_count != 0)
3702 vm_object_res_deallocate(backing_object);
3703 assert(backing_object->ref_count > 0);
3704#endif /* TASK_SWAPPER */
3705 vm_object_unlock(backing_object);
3706 } else {
3707
3708 /*
3709 * Drop locks so that we can deallocate
3710 * the backing object.
3711 */
3712
3713#if TASK_SWAPPER
3714 if (object->res_count == 0) {
3715 /* XXX get a reference for the deallocate below */
3716 vm_object_res_reference(backing_object);
3717 }
3718#endif /* TASK_SWAPPER */
3719 vm_object_unlock(object);
3720 vm_object_unlock(backing_object);
3721 vm_object_deallocate(backing_object);
3722
3723 /*
3724 * Relock object. We don't have to reverify
3725 * its state since vm_object_collapse will
3726 * do that for us as it starts at the
3727 * top of its loop.
3728 */
3729
3730 vm_object_lock(object);
3731 }
3732
3733 object_bypasses++;
3734}
0b4e3aa0 3735
1c79356b
A
3736
3737/*
3738 * vm_object_collapse:
3739 *
3740 * Perform an object collapse or an object bypass if appropriate.
3741 * The real work of collapsing and bypassing is performed in
3742 * the routines vm_object_do_collapse and vm_object_do_bypass.
3743 *
3744 * Requires that the object be locked and the page queues be unlocked.
3745 *
3746 */
91447636
A
3747static unsigned long vm_object_collapse_calls = 0;
3748static unsigned long vm_object_collapse_objects = 0;
3749static unsigned long vm_object_collapse_do_collapse = 0;
3750static unsigned long vm_object_collapse_do_bypass = 0;
2d21ac55 3751static unsigned long vm_object_collapse_delays = 0;
0b4e3aa0 3752__private_extern__ void
1c79356b 3753vm_object_collapse(
55e303ae 3754 register vm_object_t object,
0c530ab8
A
3755 register vm_object_offset_t hint_offset,
3756 boolean_t can_bypass)
1c79356b
A
3757{
3758 register vm_object_t backing_object;
55e303ae
A
3759 register unsigned int rcount;
3760 register unsigned int size;
91447636
A
3761 vm_object_t original_object;
3762
3763 vm_object_collapse_calls++;
0b4e3aa0 3764
0c530ab8
A
3765 if (! vm_object_collapse_allowed &&
3766 ! (can_bypass && vm_object_bypass_allowed)) {
1c79356b
A
3767 return;
3768 }
3769
3770 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3771 (integer_t)object, 0,0,0,0);
3772
91447636
A
3773 if (object == VM_OBJECT_NULL)
3774 return;
3775
3776 original_object = object;
3777
1c79356b 3778 while (TRUE) {
91447636 3779 vm_object_collapse_objects++;
1c79356b
A
3780 /*
3781 * Verify that the conditions are right for either
3782 * collapse or bypass:
1c79356b 3783 */
1c79356b
A
3784
3785 /*
3786 * There is a backing object, and
3787 */
3788
91447636
A
3789 backing_object = object->shadow;
3790 if (backing_object == VM_OBJECT_NULL) {
3791 if (object != original_object) {
3792 vm_object_unlock(object);
3793 }
1c79356b 3794 return;
91447636 3795 }
1c79356b 3796
91447636
A
3797 /*
3798 * No pages in the object are currently
3799 * being paged out, and
3800 */
2d21ac55 3801 if (object->paging_in_progress != 0) {
91447636
A
3802 /* try and collapse the rest of the shadow chain */
3803 vm_object_lock(backing_object);
3804 if (object != original_object) {
3805 vm_object_unlock(object);
3806 }
3807 object = backing_object;
3808 continue;
3809 }
3810
1c79356b
A
3811 vm_object_lock(backing_object);
3812
3813 /*
3814 * ...
3815 * The backing object is not read_only,
3816 * and no pages in the backing object are
3817 * currently being paged out.
3818 * The backing object is internal.
3819 *
3820 */
3821
3822 if (!backing_object->internal ||
3823 backing_object->paging_in_progress != 0) {
91447636
A
3824 /* try and collapse the rest of the shadow chain */
3825 if (object != original_object) {
3826 vm_object_unlock(object);
3827 }
3828 object = backing_object;
3829 continue;
1c79356b
A
3830 }
3831
3832 /*
3833 * The backing object can't be a copy-object:
3834 * the shadow_offset for the copy-object must stay
3835 * as 0. Furthermore (for the 'we have all the
3836 * pages' case), if we bypass backing_object and
3837 * just shadow the next object in the chain, old
3838 * pages from that object would then have to be copied
3839 * BOTH into the (former) backing_object and into the
3840 * parent object.
3841 */
3842 if (backing_object->shadow != VM_OBJECT_NULL &&
55e303ae 3843 backing_object->shadow->copy == backing_object) {
91447636
A
3844 /* try and collapse the rest of the shadow chain */
3845 if (object != original_object) {
3846 vm_object_unlock(object);
3847 }
3848 object = backing_object;
3849 continue;
1c79356b
A
3850 }
3851
3852 /*
3853 * We can now try to either collapse the backing
3854 * object (if the parent is the only reference to
3855 * it) or (perhaps) remove the parent's reference
3856 * to it.
1c79356b 3857 *
0b4e3aa0
A
3858 * If there is exactly one reference to the backing
3859 * object, we may be able to collapse it into the
3860 * parent.
1c79356b 3861 *
55e303ae
A
3862 * If MACH_PAGEMAP is defined:
3863 * The parent must not have a pager created for it,
3864 * since collapsing a backing_object dumps new pages
3865 * into the parent that its pager doesn't know about
3866 * (and the collapse code can't merge the existence
3867 * maps).
3868 * Otherwise:
3869 * As long as one of the objects is still not known
3870 * to the pager, we can collapse them.
1c79356b 3871 */
1c79356b 3872 if (backing_object->ref_count == 1 &&
55e303ae
A
3873 (!object->pager_created
3874#if !MACH_PAGEMAP
91447636 3875 || !backing_object->pager_created
55e303ae
A
3876#endif /*!MACH_PAGEMAP */
3877 ) && vm_object_collapse_allowed) {
1c79356b
A
3878
3879 XPR(XPR_VM_OBJECT,
91447636 3880 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
1c79356b
A
3881 (integer_t)backing_object, (integer_t)object,
3882 (integer_t)backing_object->pager,
91447636 3883 (integer_t)backing_object->pager_control, 0);
1c79356b
A
3884
3885 /*
3886 * We need the cache lock for collapsing,
3887 * but we must not deadlock.
3888 */
3889
3890 if (! vm_object_cache_lock_try()) {
91447636
A
3891 if (object != original_object) {
3892 vm_object_unlock(object);
3893 }
1c79356b
A
3894 vm_object_unlock(backing_object);
3895 return;
3896 }
3897
3898 /*
3899 * Collapse the object with its backing
3900 * object, and try again with the object's
3901 * new backing object.
3902 */
3903
3904 vm_object_do_collapse(object, backing_object);
91447636 3905 vm_object_collapse_do_collapse++;
1c79356b
A
3906 continue;
3907 }
3908
1c79356b
A
3909 /*
3910 * Collapsing the backing object was not possible
3911 * or permitted, so let's try bypassing it.
3912 */
3913
0c530ab8 3914 if (! (can_bypass && vm_object_bypass_allowed)) {
91447636
A
3915 /* try and collapse the rest of the shadow chain */
3916 if (object != original_object) {
3917 vm_object_unlock(object);
3918 }
3919 object = backing_object;
3920 continue;
1c79356b
A
3921 }
3922
0b4e3aa0 3923
1c79356b 3924 /*
55e303ae
A
3925 * If the object doesn't have all its pages present,
3926 * we have to make sure no pages in the backing object
3927 * "show through" before bypassing it.
1c79356b 3928 */
55e303ae
A
3929 size = atop(object->size);
3930 rcount = object->resident_page_count;
3931 if (rcount != size) {
55e303ae
A
3932 vm_object_offset_t offset;
3933 vm_object_offset_t backing_offset;
3934 unsigned int backing_rcount;
3935 unsigned int lookups = 0;
3936
3937 /*
3938 * If the backing object has a pager but no pagemap,
3939 * then we cannot bypass it, because we don't know
3940 * what pages it has.
3941 */
3942 if (backing_object->pager_created
1c79356b 3943#if MACH_PAGEMAP
55e303ae 3944 && (backing_object->existence_map == VM_EXTERNAL_NULL)
1c79356b 3945#endif /* MACH_PAGEMAP */
55e303ae 3946 ) {
91447636
A
3947 /* try and collapse the rest of the shadow chain */
3948 if (object != original_object) {
3949 vm_object_unlock(object);
3950 }
3951 object = backing_object;
3952 continue;
55e303ae 3953 }
1c79356b 3954
55e303ae
A
3955 /*
3956 * If the object has a pager but no pagemap,
3957 * then we cannot bypass it, because we don't know
3958 * what pages it has.
3959 */
3960 if (object->pager_created
0b4e3aa0 3961#if MACH_PAGEMAP
55e303ae 3962 && (object->existence_map == VM_EXTERNAL_NULL)
0b4e3aa0 3963#endif /* MACH_PAGEMAP */
55e303ae 3964 ) {
91447636
A
3965 /* try and collapse the rest of the shadow chain */
3966 if (object != original_object) {
3967 vm_object_unlock(object);
3968 }
3969 object = backing_object;
3970 continue;
55e303ae 3971 }
0b4e3aa0 3972
55e303ae
A
3973 /*
3974 * If all of the pages in the backing object are
3975 * shadowed by the parent object, the parent
3976 * object no longer has to shadow the backing
3977 * object; it can shadow the next one in the
3978 * chain.
3979 *
3980 * If the backing object has existence info,
3981 * we must check examine its existence info
3982 * as well.
3983 *
3984 */
1c79356b 3985
55e303ae
A
3986 backing_offset = object->shadow_offset;
3987 backing_rcount = backing_object->resident_page_count;
1c79356b 3988
2d21ac55 3989#if MACH_PAGEMAP
55e303ae
A
3990#define EXISTS_IN_OBJECT(obj, off, rc) \
3991 (vm_external_state_get((obj)->existence_map, \
3992 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3993 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
2d21ac55
A
3994#else
3995#define EXISTS_IN_OBJECT(obj, off, rc) \
3996 (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3997#endif /* MACH_PAGEMAP */
55e303ae
A
3998
3999 /*
4000 * Check the hint location first
4001 * (since it is often the quickest way out of here).
4002 */
4003 if (object->cow_hint != ~(vm_offset_t)0)
4004 hint_offset = (vm_object_offset_t)object->cow_hint;
4005 else
4006 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
4007 (hint_offset - 8 * PAGE_SIZE_64) : 0;
4008
4009 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
4010 backing_offset, backing_rcount) &&
4011 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
4012 /* dependency right at the hint */
4013 object->cow_hint = (vm_offset_t)hint_offset;
91447636
A
4014 /* try and collapse the rest of the shadow chain */
4015 if (object != original_object) {
4016 vm_object_unlock(object);
4017 }
4018 object = backing_object;
4019 continue;
0b4e3aa0 4020 }
55e303ae
A
4021
4022 /*
4023 * If the object's window onto the backing_object
4024 * is large compared to the number of resident
4025 * pages in the backing object, it makes sense to
4026 * walk the backing_object's resident pages first.
4027 *
4028 * NOTE: Pages may be in both the existence map and
4029 * resident. So, we can't permanently decrement
4030 * the rcount here because the second loop may
4031 * find the same pages in the backing object'
4032 * existence map that we found here and we would
4033 * double-decrement the rcount. We also may or
4034 * may not have found the
4035 */
2d21ac55
A
4036 if (backing_rcount &&
4037#if MACH_PAGEMAP
4038 size > ((backing_object->existence_map) ?
4039 backing_rcount : (backing_rcount >> 1))
4040#else
4041 size > (backing_rcount >> 1)
4042#endif /* MACH_PAGEMAP */
4043 ) {
55e303ae
A
4044 unsigned int rc = rcount;
4045 vm_page_t p;
4046
4047 backing_rcount = backing_object->resident_page_count;
4048 p = (vm_page_t)queue_first(&backing_object->memq);
4049 do {
4050 /* Until we get more than one lookup lock */
4051 if (lookups > 256) {
2d21ac55 4052 vm_object_collapse_delays++;
55e303ae 4053 lookups = 0;
2d21ac55 4054 mutex_pause(0);
55e303ae
A
4055 }
4056
4057 offset = (p->offset - backing_offset);
4058 if (offset < object->size &&
4059 offset != hint_offset &&
4060 !EXISTS_IN_OBJECT(object, offset, rc)) {
4061 /* found a dependency */
4062 object->cow_hint = (vm_offset_t)offset;
91447636 4063 break;
55e303ae 4064 }
91447636 4065 p = (vm_page_t) queue_next(&p->listq);
55e303ae
A
4066
4067 } while (--backing_rcount);
91447636
A
4068 if (backing_rcount != 0 ) {
4069 /* try and collapse the rest of the shadow chain */
4070 if (object != original_object) {
4071 vm_object_unlock(object);
4072 }
4073 object = backing_object;
4074 continue;
4075 }
0b4e3aa0 4076 }
55e303ae
A
4077
4078 /*
4079 * Walk through the offsets looking for pages in the
4080 * backing object that show through to the object.
4081 */
2d21ac55 4082#if MACH_PAGEMAP
593a1d5f 4083 if (backing_rcount || backing_object->existence_map)
2d21ac55 4084#else
593a1d5f 4085 if (backing_rcount)
2d21ac55 4086#endif /* MACH_PAGEMAP */
593a1d5f 4087 {
55e303ae
A
4088 offset = hint_offset;
4089
4090 while((offset =
4091 (offset + PAGE_SIZE_64 < object->size) ?
4092 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
4093
4094 /* Until we get more than one lookup lock */
4095 if (lookups > 256) {
2d21ac55 4096 vm_object_collapse_delays++;
55e303ae 4097 lookups = 0;
2d21ac55 4098 mutex_pause(0);
55e303ae
A
4099 }
4100
4101 if (EXISTS_IN_OBJECT(backing_object, offset +
4102 backing_offset, backing_rcount) &&
4103 !EXISTS_IN_OBJECT(object, offset, rcount)) {
4104 /* found a dependency */
4105 object->cow_hint = (vm_offset_t)offset;
91447636 4106 break;
55e303ae
A
4107 }
4108 }
91447636
A
4109 if (offset != hint_offset) {
4110 /* try and collapse the rest of the shadow chain */
4111 if (object != original_object) {
4112 vm_object_unlock(object);
4113 }
4114 object = backing_object;
4115 continue;
4116 }
0b4e3aa0
A
4117 }
4118 }
1c79356b 4119
55e303ae
A
4120 /* reset the offset hint for any objects deeper in the chain */
4121 object->cow_hint = (vm_offset_t)0;
1c79356b
A
4122
4123 /*
4124 * All interesting pages in the backing object
4125 * already live in the parent or its pager.
4126 * Thus we can bypass the backing object.
4127 */
4128
4129 vm_object_do_bypass(object, backing_object);
91447636 4130 vm_object_collapse_do_bypass++;
1c79356b
A
4131
4132 /*
4133 * Try again with this object's new backing object.
4134 */
4135
4136 continue;
4137 }
91447636
A
4138
4139 if (object != original_object) {
4140 vm_object_unlock(object);
4141 }
1c79356b
A
4142}
4143
4144/*
4145 * Routine: vm_object_page_remove: [internal]
4146 * Purpose:
4147 * Removes all physical pages in the specified
4148 * object range from the object's list of pages.
4149 *
4150 * In/out conditions:
4151 * The object must be locked.
4152 * The object must not have paging_in_progress, usually
4153 * guaranteed by not having a pager.
4154 */
4155unsigned int vm_object_page_remove_lookup = 0;
4156unsigned int vm_object_page_remove_iterate = 0;
4157
0b4e3aa0 4158__private_extern__ void
1c79356b
A
4159vm_object_page_remove(
4160 register vm_object_t object,
4161 register vm_object_offset_t start,
4162 register vm_object_offset_t end)
4163{
4164 register vm_page_t p, next;
4165
4166 /*
4167 * One and two page removals are most popular.
4168 * The factor of 16 here is somewhat arbitrary.
4169 * It balances vm_object_lookup vs iteration.
4170 */
4171
55e303ae 4172 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
1c79356b
A
4173 vm_object_page_remove_lookup++;
4174
4175 for (; start < end; start += PAGE_SIZE_64) {
4176 p = vm_page_lookup(object, start);
4177 if (p != VM_PAGE_NULL) {
4178 assert(!p->cleaning && !p->pageout);
2d21ac55 4179 if (!p->fictitious && p->pmapped)
91447636 4180 pmap_disconnect(p->phys_page);
1c79356b
A
4181 VM_PAGE_FREE(p);
4182 }
4183 }
4184 } else {
4185 vm_object_page_remove_iterate++;
4186
4187 p = (vm_page_t) queue_first(&object->memq);
4188 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4189 next = (vm_page_t) queue_next(&p->listq);
4190 if ((start <= p->offset) && (p->offset < end)) {
4191 assert(!p->cleaning && !p->pageout);
2d21ac55 4192 if (!p->fictitious && p->pmapped)
91447636 4193 pmap_disconnect(p->phys_page);
1c79356b
A
4194 VM_PAGE_FREE(p);
4195 }
4196 p = next;
4197 }
4198 }
4199}
4200
0b4e3aa0 4201
1c79356b
A
4202/*
4203 * Routine: vm_object_coalesce
4204 * Function: Coalesces two objects backing up adjoining
4205 * regions of memory into a single object.
4206 *
4207 * returns TRUE if objects were combined.
4208 *
4209 * NOTE: Only works at the moment if the second object is NULL -
4210 * if it's not, which object do we lock first?
4211 *
4212 * Parameters:
4213 * prev_object First object to coalesce
4214 * prev_offset Offset into prev_object
4215 * next_object Second object into coalesce
4216 * next_offset Offset into next_object
4217 *
4218 * prev_size Size of reference to prev_object
4219 * next_size Size of reference to next_object
4220 *
4221 * Conditions:
4222 * The object(s) must *not* be locked. The map must be locked
4223 * to preserve the reference to the object(s).
4224 */
0b4e3aa0 4225static int vm_object_coalesce_count = 0;
1c79356b 4226
0b4e3aa0 4227__private_extern__ boolean_t
1c79356b
A
4228vm_object_coalesce(
4229 register vm_object_t prev_object,
4230 vm_object_t next_object,
4231 vm_object_offset_t prev_offset,
91447636 4232 __unused vm_object_offset_t next_offset,
1c79356b
A
4233 vm_object_size_t prev_size,
4234 vm_object_size_t next_size)
4235{
4236 vm_object_size_t newsize;
4237
4238#ifdef lint
4239 next_offset++;
4240#endif /* lint */
4241
4242 if (next_object != VM_OBJECT_NULL) {
4243 return(FALSE);
4244 }
4245
4246 if (prev_object == VM_OBJECT_NULL) {
4247 return(TRUE);
4248 }
4249
4250 XPR(XPR_VM_OBJECT,
4251 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
4252 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
4253
4254 vm_object_lock(prev_object);
4255
4256 /*
4257 * Try to collapse the object first
4258 */
0c530ab8 4259 vm_object_collapse(prev_object, prev_offset, TRUE);
1c79356b
A
4260
4261 /*
4262 * Can't coalesce if pages not mapped to
4263 * prev_entry may be in use any way:
4264 * . more than one reference
4265 * . paged out
4266 * . shadows another object
4267 * . has a copy elsewhere
2d21ac55 4268 * . is purgeable
1c79356b
A
4269 * . paging references (pages might be in page-list)
4270 */
4271
4272 if ((prev_object->ref_count > 1) ||
4273 prev_object->pager_created ||
4274 (prev_object->shadow != VM_OBJECT_NULL) ||
4275 (prev_object->copy != VM_OBJECT_NULL) ||
4276 (prev_object->true_share != FALSE) ||
2d21ac55 4277 (prev_object->purgable != VM_PURGABLE_DENY) ||
1c79356b
A
4278 (prev_object->paging_in_progress != 0)) {
4279 vm_object_unlock(prev_object);
4280 return(FALSE);
4281 }
4282
4283 vm_object_coalesce_count++;
4284
4285 /*
4286 * Remove any pages that may still be in the object from
4287 * a previous deallocation.
4288 */
4289 vm_object_page_remove(prev_object,
4290 prev_offset + prev_size,
4291 prev_offset + prev_size + next_size);
4292
4293 /*
4294 * Extend the object if necessary.
4295 */
4296 newsize = prev_offset + prev_size + next_size;
4297 if (newsize > prev_object->size) {
4298#if MACH_PAGEMAP
4299 /*
4300 * We cannot extend an object that has existence info,
4301 * since the existence info might then fail to cover
4302 * the entire object.
4303 *
4304 * This assertion must be true because the object
4305 * has no pager, and we only create existence info
4306 * for objects with pagers.
4307 */
4308 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
4309#endif /* MACH_PAGEMAP */
4310 prev_object->size = newsize;
4311 }
4312
4313 vm_object_unlock(prev_object);
4314 return(TRUE);
4315}
4316
4317/*
4318 * Attach a set of physical pages to an object, so that they can
4319 * be mapped by mapping the object. Typically used to map IO memory.
4320 *
4321 * The mapping function and its private data are used to obtain the
4322 * physical addresses for each page to be mapped.
4323 */
4324void
4325vm_object_page_map(
4326 vm_object_t object,
4327 vm_object_offset_t offset,
4328 vm_object_size_t size,
4329 vm_object_offset_t (*map_fn)(void *map_fn_data,
4330 vm_object_offset_t offset),
4331 void *map_fn_data) /* private to map_fn */
4332{
4333 int num_pages;
4334 int i;
4335 vm_page_t m;
4336 vm_page_t old_page;
4337 vm_object_offset_t addr;
4338
55e303ae 4339 num_pages = atop_64(size);
1c79356b
A
4340
4341 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
4342
4343 addr = (*map_fn)(map_fn_data, offset);
4344
4345 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
4346 vm_page_more_fictitious();
4347
4348 vm_object_lock(object);
4349 if ((old_page = vm_page_lookup(object, offset))
4350 != VM_PAGE_NULL)
4351 {
4352 vm_page_lock_queues();
4353 vm_page_free(old_page);
4354 vm_page_unlock_queues();
4355 }
4356
4357 vm_page_init(m, addr);
0b4e3aa0
A
4358 /* private normally requires lock_queues but since we */
4359 /* are initializing the page, its not necessary here */
1c79356b
A
4360 m->private = TRUE; /* don`t free page */
4361 m->wire_count = 1;
4362 vm_page_insert(m, object, offset);
4363
4364 PAGE_WAKEUP_DONE(m);
4365 vm_object_unlock(object);
4366 }
4367}
4368
4369#include <mach_kdb.h>
4370
4371#if MACH_KDB
4372#include <ddb/db_output.h>
4373#include <vm/vm_print.h>
4374
4375#define printf kdbprintf
4376
4377extern boolean_t vm_object_cached(
4378 vm_object_t object);
4379
4380extern void print_bitstring(
4381 char byte);
4382
4383boolean_t vm_object_print_pages = FALSE;
4384
4385void
4386print_bitstring(
4387 char byte)
4388{
4389 printf("%c%c%c%c%c%c%c%c",
4390 ((byte & (1 << 0)) ? '1' : '0'),
4391 ((byte & (1 << 1)) ? '1' : '0'),
4392 ((byte & (1 << 2)) ? '1' : '0'),
4393 ((byte & (1 << 3)) ? '1' : '0'),
4394 ((byte & (1 << 4)) ? '1' : '0'),
4395 ((byte & (1 << 5)) ? '1' : '0'),
4396 ((byte & (1 << 6)) ? '1' : '0'),
4397 ((byte & (1 << 7)) ? '1' : '0'));
4398}
4399
4400boolean_t
4401vm_object_cached(
4402 register vm_object_t object)
4403{
4404 register vm_object_t o;
4405
4406 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
4407 if (object == o) {
4408 return TRUE;
4409 }
4410 }
4411 return FALSE;
4412}
4413
4414#if MACH_PAGEMAP
4415/*
4416 * vm_external_print: [ debug ]
4417 */
4418void
4419vm_external_print(
91447636
A
4420 vm_external_map_t emap,
4421 vm_size_t size)
1c79356b 4422{
91447636 4423 if (emap == VM_EXTERNAL_NULL) {
1c79356b
A
4424 printf("0 ");
4425 } else {
4426 vm_size_t existence_size = stob(size);
4427 printf("{ size=%d, map=[", existence_size);
4428 if (existence_size > 0) {
91447636 4429 print_bitstring(emap[0]);
1c79356b
A
4430 }
4431 if (existence_size > 1) {
91447636 4432 print_bitstring(emap[1]);
1c79356b
A
4433 }
4434 if (existence_size > 2) {
4435 printf("...");
91447636 4436 print_bitstring(emap[existence_size-1]);
1c79356b
A
4437 }
4438 printf("] }\n");
4439 }
4440 return;
4441}
4442#endif /* MACH_PAGEMAP */
4443
4444int
4445vm_follow_object(
4446 vm_object_t object)
4447{
0b4e3aa0
A
4448 int count = 0;
4449 int orig_db_indent = db_indent;
1c79356b 4450
0b4e3aa0
A
4451 while (TRUE) {
4452 if (object == VM_OBJECT_NULL) {
4453 db_indent = orig_db_indent;
4454 return count;
4455 }
1c79356b 4456
0b4e3aa0 4457 count += 1;
1c79356b 4458
0b4e3aa0
A
4459 iprintf("object 0x%x", object);
4460 printf(", shadow=0x%x", object->shadow);
4461 printf(", copy=0x%x", object->copy);
4462 printf(", pager=0x%x", object->pager);
4463 printf(", ref=%d\n", object->ref_count);
4464
4465 db_indent += 2;
4466 object = object->shadow;
4467 }
1c79356b 4468
1c79356b
A
4469}
4470
4471/*
4472 * vm_object_print: [ debug ]
4473 */
4474void
2d21ac55
A
4475vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
4476 __unused db_expr_t arg_count, __unused char *modif)
1c79356b 4477{
91447636 4478 vm_object_t object;
1c79356b 4479 register vm_page_t p;
91447636 4480 const char *s;
1c79356b
A
4481
4482 register int count;
4483
91447636 4484 object = (vm_object_t) (long) db_addr;
1c79356b
A
4485 if (object == VM_OBJECT_NULL)
4486 return;
4487
4488 iprintf("object 0x%x\n", object);
4489
4490 db_indent += 2;
4491
4492 iprintf("size=0x%x", object->size);
91447636 4493 printf(", memq_hint=%p", object->memq_hint);
1c79356b
A
4494 printf(", ref_count=%d\n", object->ref_count);
4495 iprintf("");
4496#if TASK_SWAPPER
4497 printf("res_count=%d, ", object->res_count);
4498#endif /* TASK_SWAPPER */
4499 printf("resident_page_count=%d\n", object->resident_page_count);
4500
4501 iprintf("shadow=0x%x", object->shadow);
4502 if (object->shadow) {
4503 register int i = 0;
4504 vm_object_t shadow = object;
91447636 4505 while((shadow = shadow->shadow))
1c79356b
A
4506 i++;
4507 printf(" (depth %d)", i);
4508 }
4509 printf(", copy=0x%x", object->copy);
4510 printf(", shadow_offset=0x%x", object->shadow_offset);
4511 printf(", last_alloc=0x%x\n", object->last_alloc);
4512
4513 iprintf("pager=0x%x", object->pager);
4514 printf(", paging_offset=0x%x", object->paging_offset);
91447636 4515 printf(", pager_control=0x%x\n", object->pager_control);
1c79356b
A
4516
4517 iprintf("copy_strategy=%d[", object->copy_strategy);
4518 switch (object->copy_strategy) {
4519 case MEMORY_OBJECT_COPY_NONE:
4520 printf("copy_none");
4521 break;
4522
4523 case MEMORY_OBJECT_COPY_CALL:
4524 printf("copy_call");
4525 break;
4526
4527 case MEMORY_OBJECT_COPY_DELAY:
4528 printf("copy_delay");
4529 break;
4530
4531 case MEMORY_OBJECT_COPY_SYMMETRIC:
4532 printf("copy_symmetric");
4533 break;
4534
4535 case MEMORY_OBJECT_COPY_INVALID:
4536 printf("copy_invalid");
4537 break;
4538
4539 default:
4540 printf("?");
4541 }
4542 printf("]");
1c79356b
A
4543
4544 iprintf("all_wanted=0x%x<", object->all_wanted);
4545 s = "";
4546 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4547 printf("%sinit", s);
4548 s = ",";
4549 }
4550 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4551 printf("%sready", s);
4552 s = ",";
4553 }
4554 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4555 printf("%spaging", s);
4556 s = ",";
4557 }
1c79356b
A
4558 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4559 printf("%slock", s);
4560 s = ",";
4561 }
4562 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4563 printf("%suncaching", s);
4564 s = ",";
4565 }
4566 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4567 printf("%scopy_call", s);
4568 s = ",";
4569 }
4570 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4571 printf("%scaching", s);
4572 s = ",";
4573 }
4574 printf(">");
4575 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4576
4577 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4578 (object->pager_created ? "" : "!"),
4579 (object->pager_initialized ? "" : "!"),
4580 (object->pager_ready ? "" : "!"),
4581 (object->can_persist ? "" : "!"),
4582 (object->pager_trusted ? "" : "!"),
4583 (object->pageout ? "" : "!"),
4584 (object->internal ? "internal" : "external"),
4585 (object->temporary ? "temporary" : "permanent"));
2d21ac55 4586 iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
1c79356b 4587 (object->alive ? "" : "!"),
2d21ac55
A
4588 ((object->purgable != VM_PURGABLE_DENY) ? "" : "!"),
4589 ((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"),
4590 ((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"),
1c79356b
A
4591 (object->shadowed ? "" : "!"),
4592 (vm_object_cached(object) ? "" : "!"),
4593 (object->private ? "" : "!"));
4594 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4595 (object->advisory_pageout ? "" : "!"),
4596 (object->silent_overwrite ? "" : "!"));
4597
4598#if MACH_PAGEMAP
4599 iprintf("existence_map=");
4600 vm_external_print(object->existence_map, object->size);
4601#endif /* MACH_PAGEMAP */
4602#if MACH_ASSERT
4603 iprintf("paging_object=0x%x\n", object->paging_object);
4604#endif /* MACH_ASSERT */
4605
4606 if (vm_object_print_pages) {
4607 count = 0;
4608 p = (vm_page_t) queue_first(&object->memq);
4609 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4610 if (count == 0) {
4611 iprintf("memory:=");
4612 } else if (count == 2) {
4613 printf("\n");
4614 iprintf(" ...");
4615 count = 0;
4616 } else {
4617 printf(",");
4618 }
4619 count++;
4620
91447636 4621 printf("(off=0x%llX,page=%p)", p->offset, p);
1c79356b
A
4622 p = (vm_page_t) queue_next(&p->listq);
4623 }
4624 if (count != 0) {
4625 printf("\n");
4626 }
4627 }
4628 db_indent -= 2;
4629}
4630
4631
4632/*
4633 * vm_object_find [ debug ]
4634 *
4635 * Find all tasks which reference the given vm_object.
4636 */
4637
4638boolean_t vm_object_find(vm_object_t object);
4639boolean_t vm_object_print_verbose = FALSE;
4640
4641boolean_t
4642vm_object_find(
4643 vm_object_t object)
4644{
4645 task_t task;
4646 vm_map_t map;
4647 vm_map_entry_t entry;
1c79356b
A
4648 boolean_t found = FALSE;
4649
2d21ac55 4650 queue_iterate(&tasks, task, task_t, tasks) {
1c79356b
A
4651 map = task->map;
4652 for (entry = vm_map_first_entry(map);
4653 entry && entry != vm_map_to_entry(map);
4654 entry = entry->vme_next) {
4655
4656 vm_object_t obj;
4657
4658 /*
4659 * For the time being skip submaps,
4660 * only the kernel can have submaps,
4661 * and unless we are interested in
4662 * kernel objects, we can simply skip
4663 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4664 * for a full solution.
4665 */
4666 if (entry->is_sub_map)
4667 continue;
4668 if (entry)
4669 obj = entry->object.vm_object;
4670 else
4671 continue;
4672
4673 while (obj != VM_OBJECT_NULL) {
4674 if (obj == object) {
4675 if (!found) {
4676 printf("TASK\t\tMAP\t\tENTRY\n");
4677 found = TRUE;
4678 }
4679 printf("0x%x\t0x%x\t0x%x\n",
4680 task, map, entry);
4681 }
4682 obj = obj->shadow;
4683 }
4684 }
4685 }
4686
4687 return(found);
4688}
4689
4690#endif /* MACH_KDB */
4691
0b4e3aa0
A
4692kern_return_t
4693vm_object_populate_with_private(
55e303ae 4694 vm_object_t object,
0b4e3aa0 4695 vm_object_offset_t offset,
55e303ae
A
4696 ppnum_t phys_page,
4697 vm_size_t size)
0b4e3aa0 4698{
55e303ae 4699 ppnum_t base_page;
0b4e3aa0
A
4700 vm_object_offset_t base_offset;
4701
4702
4703 if(!object->private)
4704 return KERN_FAILURE;
4705
55e303ae 4706 base_page = phys_page;
0b4e3aa0
A
4707
4708 vm_object_lock(object);
4709 if(!object->phys_contiguous) {
4710 vm_page_t m;
55e303ae 4711 if((base_offset = trunc_page_64(offset)) != offset) {
0b4e3aa0
A
4712 vm_object_unlock(object);
4713 return KERN_FAILURE;
4714 }
4715 base_offset += object->paging_offset;
4716 while(size) {
4717 m = vm_page_lookup(object, base_offset);
4718 if(m != VM_PAGE_NULL) {
4719 if(m->fictitious) {
2d21ac55
A
4720 if (m->phys_page !=
4721 vm_page_guard_addr) {
4722 vm_page_lockspin_queues();
4723 m->fictitious = FALSE;
4724 m->private = TRUE;
4725 m->phys_page = base_page;
4726 if(!m->busy) {
4727 m->busy = TRUE;
4728 }
4729 if(!m->absent) {
4730 m->absent = TRUE;
4731 }
4732 m->list_req_pending = TRUE;
4733 vm_page_unlock_queues();
0b4e3aa0 4734 }
55e303ae 4735 } else if (m->phys_page != base_page) {
2d21ac55
A
4736 if (m->pmapped) {
4737 /*
4738 * pmap call to clear old mapping
4739 */
4740 pmap_disconnect(m->phys_page);
4741 }
55e303ae 4742 m->phys_page = base_page;
0b4e3aa0 4743 }
91447636
A
4744
4745 /*
4746 * ENCRYPTED SWAP:
4747 * We're not pointing to the same
4748 * physical page any longer and the
4749 * contents of the new one are not
4750 * supposed to be encrypted.
4751 * XXX What happens to the original
4752 * physical page. Is it lost ?
4753 */
4754 m->encrypted = FALSE;
4755
0b4e3aa0
A
4756 } else {
4757 while ((m = vm_page_grab_fictitious())
4758 == VM_PAGE_NULL)
4759 vm_page_more_fictitious();
2d21ac55 4760 vm_page_lockspin_queues();
0b4e3aa0
A
4761 m->fictitious = FALSE;
4762 m->private = TRUE;
55e303ae 4763 m->phys_page = base_page;
0b4e3aa0
A
4764 m->list_req_pending = TRUE;
4765 m->absent = TRUE;
4766 m->unusual = TRUE;
0b4e3aa0
A
4767 vm_page_unlock_queues();
4768 vm_page_insert(m, object, base_offset);
4769 }
55e303ae 4770 base_page++; /* Go to the next physical page */
0b4e3aa0
A
4771 base_offset += PAGE_SIZE;
4772 size -= PAGE_SIZE;
4773 }
4774 } else {
4775 /* NOTE: we should check the original settings here */
4776 /* if we have a size > zero a pmap call should be made */
4777 /* to disable the range */
4778
4779 /* pmap_? */
4780
4781 /* shadows on contiguous memory are not allowed */
4782 /* we therefore can use the offset field */
935ed37a 4783 object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
0b4e3aa0
A
4784 object->size = size;
4785 }
4786 vm_object_unlock(object);
4787 return KERN_SUCCESS;
4788}
4789
1c79356b
A
4790/*
4791 * memory_object_free_from_cache:
4792 *
4793 * Walk the vm_object cache list, removing and freeing vm_objects
0c530ab8 4794 * which are backed by the pager identified by the caller, (pager_ops).
1c79356b
A
4795 * Remove up to "count" objects, if there are that may available
4796 * in the cache.
0b4e3aa0 4797 *
1c79356b
A
4798 * Walk the list at most once, return the number of vm_objects
4799 * actually freed.
1c79356b
A
4800 */
4801
0b4e3aa0 4802__private_extern__ kern_return_t
1c79356b 4803memory_object_free_from_cache(
91447636 4804 __unused host_t host,
0c530ab8 4805 memory_object_pager_ops_t pager_ops,
1c79356b
A
4806 int *count)
4807{
4808
4809 int object_released = 0;
1c79356b
A
4810
4811 register vm_object_t object = VM_OBJECT_NULL;
4812 vm_object_t shadow;
4813
4814/*
4815 if(host == HOST_NULL)
4816 return(KERN_INVALID_ARGUMENT);
4817*/
4818
4819 try_again:
4820 vm_object_cache_lock();
4821
4822 queue_iterate(&vm_object_cached_list, object,
4823 vm_object_t, cached_list) {
0c530ab8
A
4824 if (object->pager &&
4825 (pager_ops == object->pager->mo_pager_ops)) {
1c79356b
A
4826 vm_object_lock(object);
4827 queue_remove(&vm_object_cached_list, object,
4828 vm_object_t, cached_list);
4829 vm_object_cached_count--;
4830
4831 /*
4832 * Since this object is in the cache, we know
0b4e3aa0
A
4833 * that it is initialized and has only a pager's
4834 * (implicit) reference. Take a reference to avoid
4835 * recursive deallocations.
1c79356b
A
4836 */
4837
4838 assert(object->pager_initialized);
4839 assert(object->ref_count == 0);
2d21ac55 4840 vm_object_lock_assert_exclusive(object);
1c79356b
A
4841 object->ref_count++;
4842
4843 /*
4844 * Terminate the object.
4845 * If the object had a shadow, we let
4846 * vm_object_deallocate deallocate it.
4847 * "pageout" objects have a shadow, but
4848 * maintain a "paging reference" rather
4849 * than a normal reference.
4850 * (We are careful here to limit recursion.)
4851 */
4852 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4853 if ((vm_object_terminate(object) == KERN_SUCCESS)
4854 && (shadow != VM_OBJECT_NULL)) {
4855 vm_object_deallocate(shadow);
4856 }
4857
4858 if(object_released++ == *count)
4859 return KERN_SUCCESS;
4860 goto try_again;
4861 }
4862 }
4863 vm_object_cache_unlock();
4864 *count = object_released;
4865 return KERN_SUCCESS;
4866}
4867
0b4e3aa0 4868
1c79356b
A
4869
4870kern_return_t
0b4e3aa0
A
4871memory_object_create_named(
4872 memory_object_t pager,
4873 memory_object_offset_t size,
4874 memory_object_control_t *control)
1c79356b 4875{
0b4e3aa0
A
4876 vm_object_t object;
4877 vm_object_hash_entry_t entry;
1c79356b 4878
0b4e3aa0
A
4879 *control = MEMORY_OBJECT_CONTROL_NULL;
4880 if (pager == MEMORY_OBJECT_NULL)
4881 return KERN_INVALID_ARGUMENT;
1c79356b 4882
0b4e3aa0
A
4883 vm_object_cache_lock();
4884 entry = vm_object_hash_lookup(pager, FALSE);
4885 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4886 (entry->object != VM_OBJECT_NULL)) {
4887 if (entry->object->named == TRUE)
4888 panic("memory_object_create_named: caller already holds the right"); }
1c79356b 4889
0b4e3aa0
A
4890 vm_object_cache_unlock();
4891 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4892 == VM_OBJECT_NULL) {
4893 return(KERN_INVALID_OBJECT);
4894 }
4895
4896 /* wait for object (if any) to be ready */
4897 if (object != VM_OBJECT_NULL) {
4898 vm_object_lock(object);
4899 object->named = TRUE;
4900 while (!object->pager_ready) {
9bccf70c
A
4901 vm_object_sleep(object,
4902 VM_OBJECT_EVENT_PAGER_READY,
4903 THREAD_UNINT);
0b4e3aa0 4904 }
91447636 4905 *control = object->pager_control;
0b4e3aa0
A
4906 vm_object_unlock(object);
4907 }
4908 return (KERN_SUCCESS);
4909}
1c79356b 4910
1c79356b 4911
0b4e3aa0
A
4912/*
4913 * Routine: memory_object_recover_named [user interface]
4914 * Purpose:
4915 * Attempt to recover a named reference for a VM object.
4916 * VM will verify that the object has not already started
4917 * down the termination path, and if it has, will optionally
4918 * wait for that to finish.
4919 * Returns:
4920 * KERN_SUCCESS - we recovered a named reference on the object
4921 * KERN_FAILURE - we could not recover a reference (object dead)
4922 * KERN_INVALID_ARGUMENT - bad memory object control
4923 */
4924kern_return_t
4925memory_object_recover_named(
4926 memory_object_control_t control,
4927 boolean_t wait_on_terminating)
4928{
4929 vm_object_t object;
1c79356b 4930
0b4e3aa0
A
4931 vm_object_cache_lock();
4932 object = memory_object_control_to_vm_object(control);
4933 if (object == VM_OBJECT_NULL) {
4934 vm_object_cache_unlock();
4935 return (KERN_INVALID_ARGUMENT);
4936 }
1c79356b 4937
0b4e3aa0
A
4938restart:
4939 vm_object_lock(object);
1c79356b 4940
0b4e3aa0
A
4941 if (object->terminating && wait_on_terminating) {
4942 vm_object_cache_unlock();
4943 vm_object_wait(object,
4944 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4945 THREAD_UNINT);
4946 vm_object_cache_lock();
4947 goto restart;
4948 }
4949
4950 if (!object->alive) {
4951 vm_object_cache_unlock();
4952 vm_object_unlock(object);
4953 return KERN_FAILURE;
1c79356b
A
4954 }
4955
0b4e3aa0
A
4956 if (object->named == TRUE) {
4957 vm_object_cache_unlock();
4958 vm_object_unlock(object);
4959 return KERN_SUCCESS;
4960 }
1c79356b 4961
0b4e3aa0
A
4962 if((object->ref_count == 0) && (!object->terminating)){
4963 queue_remove(&vm_object_cached_list, object,
4964 vm_object_t, cached_list);
4965 vm_object_cached_count--;
4966 XPR(XPR_VM_OBJECT_CACHE,
4967 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4968 (integer_t)object,
4969 (integer_t)vm_object_cached_list.next,
4970 (integer_t)vm_object_cached_list.prev, 0,0);
4971 }
4972
4973 vm_object_cache_unlock();
4974
4975 object->named = TRUE;
2d21ac55 4976 vm_object_lock_assert_exclusive(object);
0b4e3aa0
A
4977 object->ref_count++;
4978 vm_object_res_reference(object);
4979 while (!object->pager_ready) {
9bccf70c
A
4980 vm_object_sleep(object,
4981 VM_OBJECT_EVENT_PAGER_READY,
4982 THREAD_UNINT);
0b4e3aa0
A
4983 }
4984 vm_object_unlock(object);
4985 return (KERN_SUCCESS);
1c79356b
A
4986}
4987
0b4e3aa0
A
4988
4989/*
4990 * vm_object_release_name:
4991 *
4992 * Enforces name semantic on memory_object reference count decrement
4993 * This routine should not be called unless the caller holds a name
4994 * reference gained through the memory_object_create_named.
4995 *
4996 * If the TERMINATE_IDLE flag is set, the call will return if the
4997 * reference count is not 1. i.e. idle with the only remaining reference
4998 * being the name.
4999 * If the decision is made to proceed the name field flag is set to
5000 * false and the reference count is decremented. If the RESPECT_CACHE
5001 * flag is set and the reference count has gone to zero, the
5002 * memory_object is checked to see if it is cacheable otherwise when
5003 * the reference count is zero, it is simply terminated.
5004 */
5005
5006__private_extern__ kern_return_t
5007vm_object_release_name(
5008 vm_object_t object,
5009 int flags)
1c79356b 5010{
0b4e3aa0
A
5011 vm_object_t shadow;
5012 boolean_t original_object = TRUE;
1c79356b 5013
0b4e3aa0 5014 while (object != VM_OBJECT_NULL) {
1c79356b 5015
0b4e3aa0
A
5016 /*
5017 * The cache holds a reference (uncounted) to
5018 * the object. We must locke it before removing
5019 * the object.
5020 *
5021 */
5022
1c79356b 5023 vm_object_cache_lock();
0b4e3aa0
A
5024 vm_object_lock(object);
5025 assert(object->alive);
5026 if(original_object)
5027 assert(object->named);
5028 assert(object->ref_count > 0);
5029
5030 /*
5031 * We have to wait for initialization before
5032 * destroying or caching the object.
5033 */
5034
5035 if (object->pager_created && !object->pager_initialized) {
5036 assert(!object->can_persist);
5037 vm_object_assert_wait(object,
5038 VM_OBJECT_EVENT_INITIALIZED,
5039 THREAD_UNINT);
5040 vm_object_unlock(object);
5041 vm_object_cache_unlock();
9bccf70c 5042 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0 5043 continue;
1c79356b
A
5044 }
5045
0b4e3aa0
A
5046 if (((object->ref_count > 1)
5047 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
5048 || (object->terminating)) {
5049 vm_object_unlock(object);
5050 vm_object_cache_unlock();
5051 return KERN_FAILURE;
5052 } else {
5053 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
5054 vm_object_unlock(object);
5055 vm_object_cache_unlock();
5056 return KERN_SUCCESS;
1c79356b 5057 }
0b4e3aa0
A
5058 }
5059
5060 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
5061 (object->ref_count == 1)) {
5062 if(original_object)
5063 object->named = FALSE;
1c79356b 5064 vm_object_unlock(object);
0b4e3aa0
A
5065 vm_object_cache_unlock();
5066 /* let vm_object_deallocate push this thing into */
5067 /* the cache, if that it is where it is bound */
5068 vm_object_deallocate(object);
5069 return KERN_SUCCESS;
5070 }
5071 VM_OBJ_RES_DECR(object);
5072 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
5073 if(object->ref_count == 1) {
5074 if(vm_object_terminate(object) != KERN_SUCCESS) {
5075 if(original_object) {
5076 return KERN_FAILURE;
5077 } else {
5078 return KERN_SUCCESS;
5079 }
5080 }
5081 if (shadow != VM_OBJECT_NULL) {
5082 original_object = FALSE;
5083 object = shadow;
5084 continue;
5085 }
5086 return KERN_SUCCESS;
5087 } else {
2d21ac55 5088 vm_object_lock_assert_exclusive(object);
0b4e3aa0
A
5089 object->ref_count--;
5090 assert(object->ref_count > 0);
5091 if(original_object)
5092 object->named = FALSE;
5093 vm_object_unlock(object);
5094 vm_object_cache_unlock();
5095 return KERN_SUCCESS;
1c79356b 5096 }
1c79356b 5097 }
91447636
A
5098 /*NOTREACHED*/
5099 assert(0);
5100 return KERN_FAILURE;
1c79356b
A
5101}
5102
0b4e3aa0
A
5103
5104__private_extern__ kern_return_t
5105vm_object_lock_request(
5106 vm_object_t object,
5107 vm_object_offset_t offset,
5108 vm_object_size_t size,
5109 memory_object_return_t should_return,
5110 int flags,
5111 vm_prot_t prot)
1c79356b 5112{
91447636
A
5113 __unused boolean_t should_flush;
5114
5115 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
1c79356b 5116
0b4e3aa0
A
5117 XPR(XPR_MEMORY_OBJECT,
5118 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
5119 (integer_t)object, offset, size,
5120 (((should_return&1)<<1)|should_flush), prot);
1c79356b 5121
0b4e3aa0
A
5122 /*
5123 * Check for bogus arguments.
5124 */
5125 if (object == VM_OBJECT_NULL)
5126 return (KERN_INVALID_ARGUMENT);
1c79356b 5127
0b4e3aa0
A
5128 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
5129 return (KERN_INVALID_ARGUMENT);
1c79356b 5130
55e303ae 5131 size = round_page_64(size);
0b4e3aa0
A
5132
5133 /*
5134 * Lock the object, and acquire a paging reference to
5135 * prevent the memory_object reference from being released.
5136 */
5137 vm_object_lock(object);
5138 vm_object_paging_begin(object);
0b4e3aa0
A
5139
5140 (void)vm_object_update(object,
91447636 5141 offset, size, NULL, NULL, should_return, flags, prot);
0b4e3aa0
A
5142
5143 vm_object_paging_end(object);
5144 vm_object_unlock(object);
5145
5146 return (KERN_SUCCESS);
5147}
5148
593a1d5f
A
5149unsigned int vm_page_purged_wired = 0;
5150unsigned int vm_page_purged_busy = 0;
5151unsigned int vm_page_purged_others = 0;
91447636 5152/*
2d21ac55 5153 * Empty a purgeable object by grabbing the physical pages assigned to it and
91447636
A
5154 * putting them on the free queue without writing them to backing store, etc.
5155 * When the pages are next touched they will be demand zero-fill pages. We
5156 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
5157 * skip referenced/dirty pages, pages on the active queue, etc. We're more
2d21ac55 5158 * than happy to grab these since this is a purgeable object. We mark the
91447636
A
5159 * object as "empty" after reaping its pages.
5160 *
5161 * On entry the object and page queues are locked, the object must be a
2d21ac55 5162 * purgeable object with no delayed copies pending.
91447636
A
5163 */
5164unsigned int
5165vm_object_purge(vm_object_t object)
5166{
5167 vm_page_t p, next;
5168 unsigned int num_purged_pages;
5169 vm_page_t local_freeq;
5170 unsigned long local_freed;
5171 int purge_loop_quota;
5172/* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
5173#define PURGE_BATCH_FREE_LIMIT 50
5174/* release page queues lock every PURGE_LOOP_QUOTA iterations */
5175#define PURGE_LOOP_QUOTA 100
5176
5177 num_purged_pages = 0;
2d21ac55 5178 if (object->purgable == VM_PURGABLE_DENY)
91447636 5179 return num_purged_pages;
0b4e3aa0 5180
2d21ac55
A
5181 assert(object->purgable != VM_PURGABLE_NONVOLATILE);
5182 object->purgable = VM_PURGABLE_EMPTY;
91447636
A
5183
5184 assert(object->copy == VM_OBJECT_NULL);
5185 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5186 purge_loop_quota = PURGE_LOOP_QUOTA;
5187
5188 local_freeq = VM_PAGE_NULL;
5189 local_freed = 0;
5190
5191 /*
5192 * Go through the object's resident pages and try and discard them.
5193 */
5194 next = (vm_page_t)queue_first(&object->memq);
5195 while (!queue_end(&object->memq, (queue_entry_t)next)) {
5196 p = next;
5197 next = (vm_page_t)queue_next(&next->listq);
5198
5199 if (purge_loop_quota-- == 0) {
5200 /*
5201 * Avoid holding the page queues lock for too long.
5202 * Let someone else take it for a while if needed.
5203 * Keep holding the object's lock to guarantee that
5204 * the object's page list doesn't change under us
5205 * while we yield.
5206 */
5207 if (local_freeq != VM_PAGE_NULL) {
5208 /*
5209 * Flush our queue of pages to free.
5210 */
5211 vm_page_free_list(local_freeq);
5212 local_freeq = VM_PAGE_NULL;
5213 local_freed = 0;
5214 }
2d21ac55 5215 mutex_yield(&vm_page_queue_lock);
91447636
A
5216
5217 /* resume with the current page and a new quota */
5218 purge_loop_quota = PURGE_LOOP_QUOTA;
5219 }
593a1d5f 5220
91447636
A
5221 if (p->wire_count) {
5222 /* don't discard a wired page */
593a1d5f
A
5223 vm_page_purged_wired++;
5224
5225 skip_page:
5226 /*
5227 * This page is no longer "purgeable",
5228 * for accounting purposes.
5229 */
5230 assert(vm_page_purgeable_count > 0);
5231 vm_page_purgeable_count--;
91447636
A
5232 continue;
5233 }
5234
593a1d5f
A
5235 if (p->busy) {
5236 /*
5237 * We can't reclaim a busy page but we can deactivate
5238 * it (if it's not wired) to make sure it gets
5239 * considered by vm_pageout_scan() later.
5240 */
5241 vm_page_deactivate(p);
5242 vm_page_purged_busy++;
5243 goto skip_page;
5244 }
5245
5246 if (p->cleaning || p->laundry || p->list_req_pending) {
5247 /* page is being acted upon, so don't mess with it */
5248 vm_page_purged_others++;
5249 goto skip_page;
5250 }
5251
2d21ac55
A
5252 assert(!p->laundry);
5253 assert(p->object != kernel_object);
91447636
A
5254
5255 /* we can discard this page */
5256
5257 /* advertize that this page is in a transition state */
5258 p->busy = TRUE;
5259
2d21ac55 5260 if (p->pmapped == TRUE) {
91447636
A
5261 /* unmap the page */
5262 int refmod_state;
5263
5264 refmod_state = pmap_disconnect(p->phys_page);
5265 if (refmod_state & VM_MEM_MODIFIED) {
5266 p->dirty = TRUE;
5267 }
5268 }
5269
5270 if (p->dirty || p->precious) {
5271 /* we saved the cost of cleaning this page ! */
5272 num_purged_pages++;
5273 vm_page_purged_count++;
5274 }
5275
2d21ac55 5276 vm_page_free_prepare(p);
593a1d5f
A
5277 /*
5278 * vm_page_purgeable_count is not updated when freeing
5279 * a page from an "empty" object, so do it explicitly here.
5280 */
5281 assert(vm_page_purgeable_count > 0);
5282 vm_page_purgeable_count--;
91447636
A
5283
5284 /* ... and put it on our queue of pages to free */
91447636
A
5285 assert(p->pageq.next == NULL &&
5286 p->pageq.prev == NULL);
5287 p->pageq.next = (queue_entry_t) local_freeq;
5288 local_freeq = p;
5289 if (++local_freed >= PURGE_BATCH_FREE_LIMIT) {
5290 /* flush our queue of pages to free */
5291 vm_page_free_list(local_freeq);
5292 local_freeq = VM_PAGE_NULL;
5293 local_freed = 0;
5294 }
5295 }
5296
5297 /* flush our local queue of pages to free one last time */
5298 if (local_freeq != VM_PAGE_NULL) {
5299 vm_page_free_list(local_freeq);
5300 local_freeq = VM_PAGE_NULL;
5301 local_freed = 0;
5302 }
5303
5304 return num_purged_pages;
5305}
5306
5307/*
2d21ac55
A
5308 * vm_object_purgeable_control() allows the caller to control and investigate the
5309 * state of a purgeable object. A purgeable object is created via a call to
5310 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
5311 * never be coalesced with any other object -- even other purgeable objects --
5312 * and will thus always remain a distinct object. A purgeable object has
91447636 5313 * special semantics when its reference count is exactly 1. If its reference
2d21ac55 5314 * count is greater than 1, then a purgeable object will behave like a normal
91447636
A
5315 * object and attempts to use this interface will result in an error return
5316 * of KERN_INVALID_ARGUMENT.
5317 *
2d21ac55 5318 * A purgeable object may be put into a "volatile" state which will make the
91447636
A
5319 * object's pages elligable for being reclaimed without paging to backing
5320 * store if the system runs low on memory. If the pages in a volatile
2d21ac55
A
5321 * purgeable object are reclaimed, the purgeable object is said to have been
5322 * "emptied." When a purgeable object is emptied the system will reclaim as
91447636
A
5323 * many pages from the object as it can in a convenient manner (pages already
5324 * en route to backing store or busy for other reasons are left as is). When
2d21ac55 5325 * a purgeable object is made volatile, its pages will generally be reclaimed
91447636
A
5326 * before other pages in the application's working set. This semantic is
5327 * generally used by applications which can recreate the data in the object
5328 * faster than it can be paged in. One such example might be media assets
5329 * which can be reread from a much faster RAID volume.
5330 *
2d21ac55 5331 * A purgeable object may be designated as "non-volatile" which means it will
91447636
A
5332 * behave like all other objects in the system with pages being written to and
5333 * read from backing store as needed to satisfy system memory needs. If the
5334 * object was emptied before the object was made non-volatile, that fact will
2d21ac55 5335 * be returned as the old state of the purgeable object (see
91447636
A
5336 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
5337 * were reclaimed as part of emptying the object will be refaulted in as
5338 * zero-fill on demand. It is up to the application to note that an object
5339 * was emptied and recreate the objects contents if necessary. When a
2d21ac55
A
5340 * purgeable object is made non-volatile, its pages will generally not be paged
5341 * out to backing store in the immediate future. A purgeable object may also
91447636
A
5342 * be manually emptied.
5343 *
5344 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
2d21ac55 5345 * volatile purgeable object may be queried at any time. This information may
91447636
A
5346 * be used as a control input to let the application know when the system is
5347 * experiencing memory pressure and is reclaiming memory.
5348 *
2d21ac55 5349 * The specified address may be any address within the purgeable object. If
91447636
A
5350 * the specified address does not represent any object in the target task's
5351 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
2d21ac55 5352 * object containing the specified address is not a purgeable object, then
91447636
A
5353 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
5354 * returned.
5355 *
5356 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
5357 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
2d21ac55
A
5358 * state is used to set the new state of the purgeable object and return its
5359 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
91447636
A
5360 * object is returned in the parameter state.
5361 *
5362 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
5363 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
5364 * the non-volatile, volatile and volatile/empty states described above.
2d21ac55 5365 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
91447636
A
5366 * immediately reclaim as many pages in the object as can be conveniently
5367 * collected (some may have already been written to backing store or be
5368 * otherwise busy).
5369 *
2d21ac55
A
5370 * The process of making a purgeable object non-volatile and determining its
5371 * previous state is atomic. Thus, if a purgeable object is made
91447636 5372 * VM_PURGABLE_NONVOLATILE and the old state is returned as
2d21ac55 5373 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
91447636
A
5374 * completely intact and will remain so until the object is made volatile
5375 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
5376 * was reclaimed while it was in a volatile state and its previous contents
5377 * have been lost.
5378 */
5379/*
5380 * The object must be locked.
5381 */
5382kern_return_t
5383vm_object_purgable_control(
5384 vm_object_t object,
5385 vm_purgable_t control,
5386 int *state)
5387{
5388 int old_state;
2d21ac55 5389 int new_state;
91447636
A
5390
5391 if (object == VM_OBJECT_NULL) {
5392 /*
2d21ac55 5393 * Object must already be present or it can't be purgeable.
91447636
A
5394 */
5395 return KERN_INVALID_ARGUMENT;
5396 }
5397
5398 /*
2d21ac55 5399 * Get current state of the purgeable object.
91447636 5400 */
2d21ac55
A
5401 old_state = object->purgable;
5402 if (old_state == VM_PURGABLE_DENY)
91447636
A
5403 return KERN_INVALID_ARGUMENT;
5404
2d21ac55 5405 /* purgeable cant have delayed copies - now or in the future */
91447636
A
5406 assert(object->copy == VM_OBJECT_NULL);
5407 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5408
5409 /*
5410 * Execute the desired operation.
5411 */
5412 if (control == VM_PURGABLE_GET_STATE) {
5413 *state = old_state;
5414 return KERN_SUCCESS;
5415 }
5416
2d21ac55
A
5417 new_state = *state & VM_PURGABLE_STATE_MASK;
5418 switch (new_state) {
5419 case VM_PURGABLE_DENY:
91447636 5420 case VM_PURGABLE_NONVOLATILE:
2d21ac55
A
5421 object->purgable = new_state;
5422
5423 if (old_state != VM_PURGABLE_NONVOLATILE) {
5424 vm_page_lock_queues();
2d21ac55 5425 if (old_state==VM_PURGABLE_VOLATILE) {
593a1d5f
A
5426 assert(vm_page_purgeable_count >=
5427 object->resident_page_count);
5428 vm_page_purgeable_count -= object->resident_page_count;
5429
2d21ac55
A
5430 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
5431 purgeable_q_t queue = vm_purgeable_object_remove(object);
5432 assert(queue);
91447636 5433
2d21ac55
A
5434 vm_purgeable_token_delete_first(queue);
5435 assert(queue->debug_count_objects>=0);
5436 };
5437 vm_page_unlock_queues();
91447636 5438 }
91447636
A
5439 break;
5440
5441 case VM_PURGABLE_VOLATILE:
91447636 5442
593a1d5f
A
5443 if (old_state == VM_PURGABLE_EMPTY &&
5444 object->resident_page_count == 0)
2d21ac55
A
5445 break;
5446 purgeable_q_t queue;
5447
5448 /* find the correct queue */
5449 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
593a1d5f 5450 queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
2d21ac55
A
5451 else {
5452 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
5453 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
5454 else
5455 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
91447636 5456 }
2d21ac55 5457
593a1d5f
A
5458 if (old_state == VM_PURGABLE_NONVOLATILE ||
5459 old_state == VM_PURGABLE_EMPTY) {
2d21ac55
A
5460 /* try to add token... this can fail */
5461 vm_page_lock_queues();
91447636 5462
2d21ac55
A
5463 kern_return_t result = vm_purgeable_token_add(queue);
5464 if (result != KERN_SUCCESS) {
5465 vm_page_unlock_queues();
5466 return result;
91447636 5467 }
2d21ac55
A
5468 vm_page_purgeable_count += object->resident_page_count;
5469
5470 vm_page_unlock_queues();
5471
5472 object->purgable = new_state;
5473
5474 /* object should not be on a queue */
5475 assert(object->objq.next == NULL && object->objq.prev == NULL);
91447636 5476 }
2d21ac55
A
5477 else if (old_state == VM_PURGABLE_VOLATILE) {
5478 /*
5479 * if reassigning priorities / purgeable groups, we don't change the
5480 * token queue. So moving priorities will not make pages stay around longer.
5481 * Reasoning is that the algorithm gives most priority to the most important
5482 * object. If a new token is added, the most important object' priority is boosted.
5483 * This biases the system already for purgeable queues that move a lot.
5484 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
5485 */
5486 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
5487
5488 purgeable_q_t old_queue=vm_purgeable_object_remove(object);
5489 assert(old_queue);
5490
5491 if (old_queue != queue) {
5492 kern_return_t result;
5493
5494 /* Changing queue. Have to move token. */
5495 vm_page_lock_queues();
5496 vm_purgeable_token_delete_first(old_queue);
5497 result = vm_purgeable_token_add(queue);
5498 vm_page_unlock_queues();
91447636 5499
2d21ac55
A
5500 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */
5501 }
5502 };
5503 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT );
5504
5505 assert(queue->debug_count_objects>=0);
5506
91447636
A
5507 break;
5508
5509
5510 case VM_PURGABLE_EMPTY:
2d21ac55
A
5511 if (old_state != new_state)
5512 {
5513 assert(old_state==VM_PURGABLE_NONVOLATILE || old_state==VM_PURGABLE_VOLATILE);
5514 if(old_state==VM_PURGABLE_VOLATILE) {
5515 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
5516 purgeable_q_t old_queue=vm_purgeable_object_remove(object);
5517 assert(old_queue);
5518 vm_page_lock_queues();
5519 vm_purgeable_token_delete_first(old_queue);
5520 }
5521
593a1d5f
A
5522 if (old_state==VM_PURGABLE_NONVOLATILE ||
5523 old_state == VM_PURGABLE_EMPTY) {
2d21ac55 5524 vm_page_lock_queues();
593a1d5f 5525 vm_page_purgeable_count += object->resident_page_count;
2d21ac55 5526 }
593a1d5f 5527 object->purgable = VM_PURGABLE_VOLATILE;
2d21ac55
A
5528 (void) vm_object_purge(object);
5529 vm_page_unlock_queues();
91447636 5530 }
91447636
A
5531 break;
5532
5533 }
5534 *state = old_state;
5535
5536 return KERN_SUCCESS;
5537}
0b4e3aa0
A
5538
5539#if TASK_SWAPPER
5540/*
5541 * vm_object_res_deallocate
5542 *
5543 * (recursively) decrement residence counts on vm objects and their shadows.
5544 * Called from vm_object_deallocate and when swapping out an object.
5545 *
5546 * The object is locked, and remains locked throughout the function,
5547 * even as we iterate down the shadow chain. Locks on intermediate objects
5548 * will be dropped, but not the original object.
5549 *
5550 * NOTE: this function used to use recursion, rather than iteration.
5551 */
5552
5553__private_extern__ void
5554vm_object_res_deallocate(
5555 vm_object_t object)
5556{
5557 vm_object_t orig_object = object;
5558 /*
5559 * Object is locked so it can be called directly
5560 * from vm_object_deallocate. Original object is never
5561 * unlocked.
5562 */
5563 assert(object->res_count > 0);
5564 while (--object->res_count == 0) {
5565 assert(object->ref_count >= object->res_count);
5566 vm_object_deactivate_all_pages(object);
5567 /* iterate on shadow, if present */
5568 if (object->shadow != VM_OBJECT_NULL) {
5569 vm_object_t tmp_object = object->shadow;
5570 vm_object_lock(tmp_object);
5571 if (object != orig_object)
5572 vm_object_unlock(object);
5573 object = tmp_object;
5574 assert(object->res_count > 0);
5575 } else
5576 break;
5577 }
5578 if (object != orig_object)
1c79356b 5579 vm_object_unlock(object);
0b4e3aa0
A
5580}
5581
5582/*
5583 * vm_object_res_reference
5584 *
5585 * Internal function to increment residence count on a vm object
5586 * and its shadows. It is called only from vm_object_reference, and
5587 * when swapping in a vm object, via vm_map_swap.
5588 *
5589 * The object is locked, and remains locked throughout the function,
5590 * even as we iterate down the shadow chain. Locks on intermediate objects
5591 * will be dropped, but not the original object.
5592 *
5593 * NOTE: this function used to use recursion, rather than iteration.
5594 */
5595
5596__private_extern__ void
5597vm_object_res_reference(
5598 vm_object_t object)
5599{
5600 vm_object_t orig_object = object;
5601 /*
5602 * Object is locked, so this can be called directly
5603 * from vm_object_reference. This lock is never released.
5604 */
5605 while ((++object->res_count == 1) &&
5606 (object->shadow != VM_OBJECT_NULL)) {
5607 vm_object_t tmp_object = object->shadow;
5608
5609 assert(object->ref_count >= object->res_count);
5610 vm_object_lock(tmp_object);
5611 if (object != orig_object)
5612 vm_object_unlock(object);
5613 object = tmp_object;
1c79356b 5614 }
0b4e3aa0
A
5615 if (object != orig_object)
5616 vm_object_unlock(object);
5617 assert(orig_object->ref_count >= orig_object->res_count);
1c79356b 5618}
0b4e3aa0
A
5619#endif /* TASK_SWAPPER */
5620
5621/*
5622 * vm_object_reference:
5623 *
5624 * Gets another reference to the given object.
5625 */
5626#ifdef vm_object_reference
5627#undef vm_object_reference
5628#endif
5629__private_extern__ void
5630vm_object_reference(
5631 register vm_object_t object)
5632{
5633 if (object == VM_OBJECT_NULL)
5634 return;
5635
5636 vm_object_lock(object);
5637 assert(object->ref_count > 0);
5638 vm_object_reference_locked(object);
5639 vm_object_unlock(object);
5640}
5641
1c79356b
A
5642#ifdef MACH_BSD
5643/*
5644 * Scale the vm_object_cache
5645 * This is required to make sure that the vm_object_cache is big
5646 * enough to effectively cache the mapped file.
5647 * This is really important with UBC as all the regular file vnodes
5648 * have memory object associated with them. Havving this cache too
5649 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5650 *
5651 * This is also needed as number of vnodes can be dynamically scaled.
5652 */
5653kern_return_t
91447636
A
5654adjust_vm_object_cache(
5655 __unused vm_size_t oval,
5656 vm_size_t nval)
1c79356b
A
5657{
5658 vm_object_cached_max = nval;
5659 vm_object_cache_trim(FALSE);
5660 return (KERN_SUCCESS);
5661}
5662#endif /* MACH_BSD */
5663
91447636
A
5664
5665/*
5666 * vm_object_transpose
5667 *
5668 * This routine takes two VM objects of the same size and exchanges
5669 * their backing store.
5670 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5671 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5672 *
5673 * The VM objects must not be locked by caller.
5674 */
5675kern_return_t
5676vm_object_transpose(
5677 vm_object_t object1,
5678 vm_object_t object2,
5679 vm_object_size_t transpose_size)
5680{
5681 vm_object_t tmp_object;
5682 kern_return_t retval;
5683 boolean_t object1_locked, object2_locked;
5684 boolean_t object1_paging, object2_paging;
5685 vm_page_t page;
5686 vm_object_offset_t page_offset;
5687
5688 tmp_object = VM_OBJECT_NULL;
5689 object1_locked = FALSE; object2_locked = FALSE;
5690 object1_paging = FALSE; object2_paging = FALSE;
5691
5692 if (object1 == object2 ||
5693 object1 == VM_OBJECT_NULL ||
5694 object2 == VM_OBJECT_NULL) {
5695 /*
5696 * If the 2 VM objects are the same, there's
5697 * no point in exchanging their backing store.
5698 */
5699 retval = KERN_INVALID_VALUE;
5700 goto done;
5701 }
5702
5703 vm_object_lock(object1);
5704 object1_locked = TRUE;
2d21ac55
A
5705 if (!object1->alive || object1->terminating ||
5706 object1->copy || object1->shadow || object1->shadowed ||
5707 object1->purgable != VM_PURGABLE_DENY) {
91447636
A
5708 /*
5709 * We don't deal with copy or shadow objects (yet).
5710 */
5711 retval = KERN_INVALID_VALUE;
5712 goto done;
5713 }
5714 /*
5715 * Since we're about to mess with the object's backing store,
5716 * mark it as "paging_in_progress". Note that this is not enough
5717 * to prevent any paging activity on this object, so the caller should
5718 * have "quiesced" the objects beforehand, via a UPL operation with
5719 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5720 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5721 */
5722 vm_object_paging_begin(object1);
5723 object1_paging = TRUE;
5724 vm_object_unlock(object1);
5725 object1_locked = FALSE;
5726
5727 /*
5728 * Same as above for the 2nd object...
5729 */
5730 vm_object_lock(object2);
5731 object2_locked = TRUE;
2d21ac55
A
5732 if (! object2->alive || object2->terminating ||
5733 object2->copy || object2->shadow || object2->shadowed ||
5734 object2->purgable != VM_PURGABLE_DENY) {
91447636
A
5735 retval = KERN_INVALID_VALUE;
5736 goto done;
5737 }
5738 vm_object_paging_begin(object2);
5739 object2_paging = TRUE;
5740 vm_object_unlock(object2);
5741 object2_locked = FALSE;
5742
5743 /*
5744 * Allocate a temporary VM object to hold object1's contents
5745 * while we copy object2 to object1.
5746 */
5747 tmp_object = vm_object_allocate(transpose_size);
5748 vm_object_lock(tmp_object);
5749 vm_object_paging_begin(tmp_object);
5750 tmp_object->can_persist = FALSE;
5751
5752 /*
5753 * Since we need to lock both objects at the same time,
5754 * make sure we always lock them in the same order to
5755 * avoid deadlocks.
5756 */
5757 if (object1 < object2) {
5758 vm_object_lock(object1);
5759 vm_object_lock(object2);
5760 } else {
5761 vm_object_lock(object2);
5762 vm_object_lock(object1);
5763 }
5764 object1_locked = TRUE;
5765 object2_locked = TRUE;
5766
5767 if (object1->size != object2->size ||
5768 object1->size != transpose_size) {
5769 /*
5770 * If the 2 objects don't have the same size, we can't
5771 * exchange their backing stores or one would overflow.
5772 * If their size doesn't match the caller's
5773 * "transpose_size", we can't do it either because the
5774 * transpose operation will affect the entire span of
5775 * the objects.
5776 */
5777 retval = KERN_INVALID_VALUE;
5778 goto done;
5779 }
5780
5781
5782 /*
5783 * Transpose the lists of resident pages.
2d21ac55 5784 * This also updates the resident_page_count and the memq_hint.
91447636
A
5785 */
5786 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
5787 /*
5788 * No pages in object1, just transfer pages
5789 * from object2 to object1. No need to go through
5790 * an intermediate object.
5791 */
5792 while (!queue_empty(&object2->memq)) {
5793 page = (vm_page_t) queue_first(&object2->memq);
2d21ac55 5794 vm_page_rename(page, object1, page->offset, FALSE);
91447636
A
5795 }
5796 assert(queue_empty(&object2->memq));
5797 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
5798 /*
5799 * No pages in object2, just transfer pages
5800 * from object1 to object2. No need to go through
5801 * an intermediate object.
5802 */
5803 while (!queue_empty(&object1->memq)) {
5804 page = (vm_page_t) queue_first(&object1->memq);
2d21ac55 5805 vm_page_rename(page, object2, page->offset, FALSE);
91447636
A
5806 }
5807 assert(queue_empty(&object1->memq));
5808 } else {
5809 /* transfer object1's pages to tmp_object */
5810 vm_page_lock_queues();
5811 while (!queue_empty(&object1->memq)) {
5812 page = (vm_page_t) queue_first(&object1->memq);
5813 page_offset = page->offset;
5814 vm_page_remove(page);
5815 page->offset = page_offset;
5816 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
5817 }
5818 vm_page_unlock_queues();
5819 assert(queue_empty(&object1->memq));
5820 /* transfer object2's pages to object1 */
5821 while (!queue_empty(&object2->memq)) {
5822 page = (vm_page_t) queue_first(&object2->memq);
2d21ac55 5823 vm_page_rename(page, object1, page->offset, FALSE);
91447636
A
5824 }
5825 assert(queue_empty(&object2->memq));
5826 /* transfer tmp_object's pages to object1 */
5827 while (!queue_empty(&tmp_object->memq)) {
5828 page = (vm_page_t) queue_first(&tmp_object->memq);
5829 queue_remove(&tmp_object->memq, page,
5830 vm_page_t, listq);
5831 vm_page_insert(page, object2, page->offset);
5832 }
5833 assert(queue_empty(&tmp_object->memq));
5834 }
5835
91447636
A
5836#define __TRANSPOSE_FIELD(field) \
5837MACRO_BEGIN \
5838 tmp_object->field = object1->field; \
5839 object1->field = object2->field; \
5840 object2->field = tmp_object->field; \
5841MACRO_END
5842
2d21ac55
A
5843 /* "size" should be identical */
5844 assert(object1->size == object2->size);
5845 /* "Lock" refers to the object not its contents */
5846 /* "ref_count" refers to the object not its contents */
5847#if TASK_SWAPPER
5848 /* "res_count" refers to the object not its contents */
5849#endif
5850 /* "resident_page_count" was updated above when transposing pages */
5851 /* there should be no "copy" */
91447636
A
5852 assert(!object1->copy);
5853 assert(!object2->copy);
2d21ac55 5854 /* there should be no "shadow" */
91447636
A
5855 assert(!object1->shadow);
5856 assert(!object2->shadow);
91447636
A
5857 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
5858 __TRANSPOSE_FIELD(pager);
5859 __TRANSPOSE_FIELD(paging_offset);
91447636
A
5860 __TRANSPOSE_FIELD(pager_control);
5861 /* update the memory_objects' pointers back to the VM objects */
5862 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5863 memory_object_control_collapse(object1->pager_control,
5864 object1);
5865 }
5866 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5867 memory_object_control_collapse(object2->pager_control,
5868 object2);
5869 }
2d21ac55
A
5870 __TRANSPOSE_FIELD(copy_strategy);
5871 /* "paging_in_progress" refers to the object not its contents */
91447636
A
5872 assert(object1->paging_in_progress);
5873 assert(object2->paging_in_progress);
2d21ac55 5874 /* "all_wanted" refers to the object not its contents */
91447636
A
5875 __TRANSPOSE_FIELD(pager_created);
5876 __TRANSPOSE_FIELD(pager_initialized);
5877 __TRANSPOSE_FIELD(pager_ready);
5878 __TRANSPOSE_FIELD(pager_trusted);
2d21ac55 5879 __TRANSPOSE_FIELD(can_persist);
91447636
A
5880 __TRANSPOSE_FIELD(internal);
5881 __TRANSPOSE_FIELD(temporary);
5882 __TRANSPOSE_FIELD(private);
5883 __TRANSPOSE_FIELD(pageout);
2d21ac55
A
5884 /* "alive" should be set */
5885 assert(object1->alive);
5886 assert(object2->alive);
5887 /* "purgeable" should be non-purgeable */
5888 assert(object1->purgable == VM_PURGABLE_DENY);
5889 assert(object2->purgable == VM_PURGABLE_DENY);
5890 /* "shadowed" refers to the the object not its contents */
5891 __TRANSPOSE_FIELD(silent_overwrite);
5892 __TRANSPOSE_FIELD(advisory_pageout);
91447636 5893 __TRANSPOSE_FIELD(true_share);
2d21ac55
A
5894 /* "terminating" should not be set */
5895 assert(!object1->terminating);
5896 assert(!object2->terminating);
5897 __TRANSPOSE_FIELD(named);
5898 /* "shadow_severed" refers to the object not its contents */
91447636
A
5899 __TRANSPOSE_FIELD(phys_contiguous);
5900 __TRANSPOSE_FIELD(nophyscache);
2d21ac55
A
5901 /* "cached_list" should be NULL */
5902 assert(object1->cached_list.prev == NULL);
5903 assert(object1->cached_list.next == NULL);
5904 assert(object2->cached_list.prev == NULL);
5905 assert(object2->cached_list.next == NULL);
5906 /* "msr_q" is linked to the object not its contents */
5907 assert(queue_empty(&object1->msr_q));
5908 assert(queue_empty(&object2->msr_q));
91447636
A
5909 __TRANSPOSE_FIELD(last_alloc);
5910 __TRANSPOSE_FIELD(sequential);
2d21ac55
A
5911 __TRANSPOSE_FIELD(pages_created);
5912 __TRANSPOSE_FIELD(pages_used);
5913#if MACH_PAGEMAP
91447636 5914 __TRANSPOSE_FIELD(existence_map);
2d21ac55 5915#endif
91447636 5916 __TRANSPOSE_FIELD(cow_hint);
2d21ac55
A
5917#if MACH_ASSERT
5918 __TRANSPOSE_FIELD(paging_object);
5919#endif
91447636 5920 __TRANSPOSE_FIELD(wimg_bits);
2d21ac55
A
5921 __TRANSPOSE_FIELD(code_signed);
5922 __TRANSPOSE_FIELD(not_in_use);
5923#ifdef UPL_DEBUG
5924 /* "uplq" refers to the object not its contents (see upl_transpose()) */
5925#endif
91447636
A
5926
5927#undef __TRANSPOSE_FIELD
5928
5929 retval = KERN_SUCCESS;
5930
5931done:
5932 /*
5933 * Cleanup.
5934 */
5935 if (tmp_object != VM_OBJECT_NULL) {
5936 vm_object_paging_end(tmp_object);
5937 vm_object_unlock(tmp_object);
5938 /*
5939 * Re-initialize the temporary object to avoid
5940 * deallocating a real pager.
5941 */
5942 _vm_object_allocate(transpose_size, tmp_object);
5943 vm_object_deallocate(tmp_object);
5944 tmp_object = VM_OBJECT_NULL;
5945 }
5946
5947 if (object1_locked) {
5948 vm_object_unlock(object1);
5949 object1_locked = FALSE;
5950 }
5951 if (object2_locked) {
5952 vm_object_unlock(object2);
5953 object2_locked = FALSE;
5954 }
5955 if (object1_paging) {
5956 vm_object_lock(object1);
5957 vm_object_paging_end(object1);
5958 vm_object_unlock(object1);
5959 object1_paging = FALSE;
5960 }
5961 if (object2_paging) {
5962 vm_object_lock(object2);
5963 vm_object_paging_end(object2);
5964 vm_object_unlock(object2);
5965 object2_paging = FALSE;
5966 }
5967
5968 return retval;
5969}
0c530ab8
A
5970
5971
2d21ac55
A
5972/*
5973 * vm_object_build_cluster
5974 *
5975 * Determine how big a cluster we should issue an I/O for...
5976 *
5977 * Inputs: *start == offset of page needed
5978 * *length == maximum cluster pager can handle
5979 * Outputs: *start == beginning offset of cluster
5980 * *length == length of cluster to try
5981 *
5982 * The original *start will be encompassed by the cluster
5983 *
5984 */
5985extern int speculative_reads_disabled;
5986
5987uint32_t pre_heat_scaling[MAX_UPL_TRANSFER];
5988uint32_t pre_heat_cluster[MAX_UPL_TRANSFER];
5989
5990#define PRE_HEAT_MULTIPLIER 4
5991
5992__private_extern__ void
5993vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
5994 vm_size_t *length, vm_object_fault_info_t fault_info)
5995{
5996 vm_size_t pre_heat_size;
5997 vm_size_t tail_size;
5998 vm_size_t head_size;
5999 vm_size_t max_length;
6000 vm_size_t cluster_size;
6001 vm_object_offset_t object_size;
6002 vm_object_offset_t orig_start;
6003 vm_object_offset_t target_start;
6004 vm_object_offset_t offset;
6005 vm_behavior_t behavior;
6006 boolean_t look_behind = TRUE;
6007 boolean_t look_ahead = TRUE;
6008 int sequential_run;
6009 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
6010
6011 assert( !(*length & PAGE_MASK));
6012 assert( !(*start & PAGE_MASK_64));
6013
6014 if ( (max_length = *length) > (MAX_UPL_TRANSFER * PAGE_SIZE) )
6015 max_length = (MAX_UPL_TRANSFER * PAGE_SIZE);
6016 /*
6017 * we'll always return a cluster size of at least
6018 * 1 page, since the original fault must always
6019 * be processed
6020 */
6021 *length = PAGE_SIZE;
6022
6023 if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
6024 /*
6025 * no cluster... just fault the page in
6026 */
6027 return;
6028 }
6029 orig_start = *start;
6030 target_start = orig_start;
6031 cluster_size = round_page_32(fault_info->cluster_size);
6032 behavior = fault_info->behavior;
6033
6034 vm_object_lock(object);
6035
6036 if (object->internal)
6037 object_size = object->size;
6038 else if (object->pager != MEMORY_OBJECT_NULL)
6039 vnode_pager_get_object_size(object->pager, &object_size);
6040 else
6041 goto out; /* pager is gone for this object, nothing more to do */
6042
6043 object_size = round_page_64(object_size);
6044
6045 if (orig_start >= object_size) {
6046 /*
6047 * fault occurred beyond the EOF...
6048 * we need to punt w/o changing the
6049 * starting offset
6050 */
6051 goto out;
6052 }
6053 if (object->pages_used > object->pages_created) {
6054 /*
6055 * must have wrapped our 32 bit counters
6056 * so reset
6057 */
6058 object->pages_used = object->pages_created = 0;
6059 }
6060 if ((sequential_run = object->sequential)) {
6061 if (sequential_run < 0) {
6062 sequential_behavior = VM_BEHAVIOR_RSEQNTL;
6063 sequential_run = 0 - sequential_run;
6064 } else {
6065 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
6066 }
6067 }
6068 switch(behavior) {
6069
6070 default:
6071 behavior = VM_BEHAVIOR_DEFAULT;
6072
6073 case VM_BEHAVIOR_DEFAULT:
6074 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
6075 goto out;
6076
6077 if (sequential_run >= (3 * PAGE_SIZE)) {
6078 pre_heat_size = sequential_run + PAGE_SIZE;
6079
6080 if ((behavior = sequential_behavior) == VM_BEHAVIOR_SEQUENTIAL)
6081 look_behind = FALSE;
6082 else
6083 look_ahead = FALSE;
6084 } else {
6085 uint32_t pages_unused;
6086
6087 if (object->pages_created < 32 * PRE_HEAT_MULTIPLIER) {
6088 /*
6089 * prime the pump
6090 */
6091 pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
6092 break;
6093 }
6094 pages_unused = object->pages_created - object->pages_used;
6095
6096 if (pages_unused < (object->pages_created / 8)) {
6097 pre_heat_size = PAGE_SIZE * 32 * PRE_HEAT_MULTIPLIER;
6098 } else if (pages_unused < (object->pages_created / 4)) {
6099 pre_heat_size = PAGE_SIZE * 16 * PRE_HEAT_MULTIPLIER;
6100 } else if (pages_unused < (object->pages_created / 2)) {
6101 pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
6102 } else {
6103 pre_heat_size = PAGE_SIZE * 4 * PRE_HEAT_MULTIPLIER;
6104 }
6105 }
6106 break;
6107
6108 case VM_BEHAVIOR_RANDOM:
6109 if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
6110 goto out;
6111 break;
6112
6113 case VM_BEHAVIOR_SEQUENTIAL:
6114 if ((pre_heat_size = cluster_size) == 0)
6115 pre_heat_size = sequential_run + PAGE_SIZE;
6116 look_behind = FALSE;
6117
6118 break;
6119
6120 case VM_BEHAVIOR_RSEQNTL:
6121 if ((pre_heat_size = cluster_size) == 0)
6122 pre_heat_size = sequential_run + PAGE_SIZE;
6123 look_ahead = FALSE;
6124
6125 break;
6126
6127 }
6128 if (pre_heat_size > max_length)
6129 pre_heat_size = max_length;
6130
6131 if (behavior == VM_BEHAVIOR_DEFAULT && vm_page_free_count < vm_page_free_target)
6132 pre_heat_size /= 2;
6133
6134 if (look_ahead == TRUE) {
6135 if (look_behind == TRUE)
6136 target_start &= ~(pre_heat_size - 1);
6137
6138 if ((target_start + pre_heat_size) > object_size)
6139 pre_heat_size = (vm_size_t)(trunc_page_64(object_size - target_start));
6140
6141 tail_size = pre_heat_size - (orig_start - target_start) - PAGE_SIZE;
6142 } else {
6143 if (pre_heat_size > target_start)
6144 pre_heat_size = target_start;
6145 tail_size = 0;
6146 }
6147 pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
6148
6149 if (pre_heat_size <= PAGE_SIZE)
6150 goto out;
6151
6152 if (look_behind == TRUE) {
6153 /*
6154 * take a look at the pages before the original
6155 * faulting offset
6156 */
6157 head_size = pre_heat_size - tail_size - PAGE_SIZE;
6158
6159 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
6160 /*
6161 * don't poke below the lowest offset
6162 */
6163 if (offset < fault_info->lo_offset)
6164 break;
6165 /*
6166 * for external objects and internal objects w/o an existence map
6167 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
6168 */
6169#if MACH_PAGEMAP
6170 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
6171 /*
6172 * we know for a fact that the pager can't provide the page
6173 * so don't include it or any pages beyond it in this cluster
6174 */
6175 break;
6176 }
6177#endif
6178 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
6179 /*
6180 * don't bridge resident pages
6181 */
6182 break;
6183 }
6184 *start = offset;
6185 *length += PAGE_SIZE;
6186 }
6187 }
6188 if (look_ahead == TRUE) {
6189 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
6190 /*
6191 * don't poke above the highest offset
6192 */
6193 if (offset >= fault_info->hi_offset)
6194 break;
6195 /*
6196 * for external objects and internal objects w/o an existence map
6197 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
6198 */
6199#if MACH_PAGEMAP
6200 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
6201 /*
6202 * we know for a fact that the pager can't provide the page
6203 * so don't include it or any pages beyond it in this cluster
6204 */
6205 break;
6206 }
6207#endif
6208 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
6209 /*
6210 * don't bridge resident pages
6211 */
6212 break;
6213 }
6214 *length += PAGE_SIZE;
6215 }
6216 }
6217out:
6218 pre_heat_cluster[*length / PAGE_SIZE]++;
6219
6220 vm_object_unlock(object);
6221}
6222
6223
6224/*
6225 * Allow manipulation of individual page state. This is actually part of
6226 * the UPL regimen but takes place on the VM object rather than on a UPL
6227 */
0c530ab8
A
6228
6229kern_return_t
6230vm_object_page_op(
6231 vm_object_t object,
6232 vm_object_offset_t offset,
6233 int ops,
6234 ppnum_t *phys_entry,
6235 int *flags)
6236{
6237 vm_page_t dst_page;
6238
6239 vm_object_lock(object);
6240
6241 if(ops & UPL_POP_PHYSICAL) {
6242 if(object->phys_contiguous) {
6243 if (phys_entry) {
6244 *phys_entry = (ppnum_t)
935ed37a 6245 (object->shadow_offset >> PAGE_SHIFT);
0c530ab8
A
6246 }
6247 vm_object_unlock(object);
6248 return KERN_SUCCESS;
6249 } else {
6250 vm_object_unlock(object);
6251 return KERN_INVALID_OBJECT;
6252 }
6253 }
6254 if(object->phys_contiguous) {
6255 vm_object_unlock(object);
6256 return KERN_INVALID_OBJECT;
6257 }
6258
6259 while(TRUE) {
6260 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
6261 vm_object_unlock(object);
6262 return KERN_FAILURE;
6263 }
6264
6265 /* Sync up on getting the busy bit */
6266 if((dst_page->busy || dst_page->cleaning) &&
6267 (((ops & UPL_POP_SET) &&
6268 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
6269 /* someone else is playing with the page, we will */
6270 /* have to wait */
6271 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
6272 continue;
6273 }
6274
6275 if (ops & UPL_POP_DUMP) {
2d21ac55 6276 if (dst_page->pmapped == TRUE)
0c530ab8 6277 pmap_disconnect(dst_page->phys_page);
0c530ab8 6278
2d21ac55
A
6279 vm_page_lock_queues();
6280 vm_page_free(dst_page);
0c530ab8 6281 vm_page_unlock_queues();
2d21ac55 6282
0c530ab8
A
6283 break;
6284 }
6285
6286 if (flags) {
6287 *flags = 0;
6288
6289 /* Get the condition of flags before requested ops */
6290 /* are undertaken */
6291
6292 if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
6293 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
6294 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
6295 if(dst_page->absent) *flags |= UPL_POP_ABSENT;
6296 if(dst_page->busy) *flags |= UPL_POP_BUSY;
6297 }
6298
6299 /* The caller should have made a call either contingent with */
6300 /* or prior to this call to set UPL_POP_BUSY */
6301 if(ops & UPL_POP_SET) {
6302 /* The protection granted with this assert will */
6303 /* not be complete. If the caller violates the */
6304 /* convention and attempts to change page state */
6305 /* without first setting busy we may not see it */
6306 /* because the page may already be busy. However */
6307 /* if such violations occur we will assert sooner */
6308 /* or later. */
6309 assert(dst_page->busy || (ops & UPL_POP_BUSY));
6310 if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
6311 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
6312 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
6313 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
6314 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
6315 }
6316
6317 if(ops & UPL_POP_CLR) {
6318 assert(dst_page->busy);
6319 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
6320 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
6321 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
6322 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
6323 if (ops & UPL_POP_BUSY) {
6324 dst_page->busy = FALSE;
6325 PAGE_WAKEUP(dst_page);
6326 }
6327 }
6328
6329 if (dst_page->encrypted) {
6330 /*
6331 * ENCRYPTED SWAP:
6332 * We need to decrypt this encrypted page before the
6333 * caller can access its contents.
6334 * But if the caller really wants to access the page's
6335 * contents, they have to keep the page "busy".
6336 * Otherwise, the page could get recycled or re-encrypted
6337 * at any time.
6338 */
6339 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
6340 dst_page->busy) {
6341 /*
6342 * The page is stable enough to be accessed by
6343 * the caller, so make sure its contents are
6344 * not encrypted.
6345 */
6346 vm_page_decrypt(dst_page, 0);
6347 } else {
6348 /*
6349 * The page is not busy, so don't bother
6350 * decrypting it, since anything could
6351 * happen to it between now and when the
6352 * caller wants to access it.
6353 * We should not give the caller access
6354 * to this page.
6355 */
6356 assert(!phys_entry);
6357 }
6358 }
6359
6360 if (phys_entry) {
6361 /*
6362 * The physical page number will remain valid
6363 * only if the page is kept busy.
6364 * ENCRYPTED SWAP: make sure we don't let the
6365 * caller access an encrypted page.
6366 */
6367 assert(dst_page->busy);
6368 assert(!dst_page->encrypted);
6369 *phys_entry = dst_page->phys_page;
6370 }
6371
6372 break;
6373 }
6374
6375 vm_object_unlock(object);
6376 return KERN_SUCCESS;
6377
6378}
6379
6380/*
6381 * vm_object_range_op offers performance enhancement over
6382 * vm_object_page_op for page_op functions which do not require page
6383 * level state to be returned from the call. Page_op was created to provide
6384 * a low-cost alternative to page manipulation via UPLs when only a single
6385 * page was involved. The range_op call establishes the ability in the _op
6386 * family of functions to work on multiple pages where the lack of page level
6387 * state handling allows the caller to avoid the overhead of the upl structures.
6388 */
6389
6390kern_return_t
6391vm_object_range_op(
6392 vm_object_t object,
6393 vm_object_offset_t offset_beg,
6394 vm_object_offset_t offset_end,
6395 int ops,
6396 int *range)
6397{
6398 vm_object_offset_t offset;
6399 vm_page_t dst_page;
6400
6401 if (object->resident_page_count == 0) {
6402 if (range) {
6403 if (ops & UPL_ROP_PRESENT)
6404 *range = 0;
6405 else
6406 *range = offset_end - offset_beg;
6407 }
6408 return KERN_SUCCESS;
6409 }
6410 vm_object_lock(object);
6411
6412 if (object->phys_contiguous) {
6413 vm_object_unlock(object);
6414 return KERN_INVALID_OBJECT;
6415 }
6416
2d21ac55 6417 offset = offset_beg & ~PAGE_MASK_64;
0c530ab8
A
6418
6419 while (offset < offset_end) {
6420 dst_page = vm_page_lookup(object, offset);
6421 if (dst_page != VM_PAGE_NULL) {
6422 if (ops & UPL_ROP_DUMP) {
6423 if (dst_page->busy || dst_page->cleaning) {
6424 /*
6425 * someone else is playing with the
6426 * page, we will have to wait
6427 */
2d21ac55 6428 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
0c530ab8
A
6429 /*
6430 * need to relook the page up since it's
6431 * state may have changed while we slept
6432 * it might even belong to a different object
6433 * at this point
6434 */
6435 continue;
6436 }
2d21ac55 6437 if (dst_page->pmapped == TRUE)
0c530ab8 6438 pmap_disconnect(dst_page->phys_page);
0c530ab8 6439
2d21ac55
A
6440 vm_page_lock_queues();
6441 vm_page_free(dst_page);
0c530ab8 6442 vm_page_unlock_queues();
2d21ac55 6443
0c530ab8
A
6444 } else if (ops & UPL_ROP_ABSENT)
6445 break;
6446 } else if (ops & UPL_ROP_PRESENT)
6447 break;
6448
6449 offset += PAGE_SIZE;
6450 }
6451 vm_object_unlock(object);
6452
2d21ac55
A
6453 if (range) {
6454 if (offset > offset_end)
6455 offset = offset_end;
cf7d32b8
A
6456 if(offset > offset_beg)
6457 *range = offset - offset_beg;
6458 else *range=0;
2d21ac55 6459 }
0c530ab8
A
6460 return KERN_SUCCESS;
6461}
2d21ac55
A
6462
6463
6464uint32_t scan_object_collision = 0;
6465
6466void
6467vm_object_lock(vm_object_t object)
6468{
6469 if (object == vm_pageout_scan_wants_object) {
6470 scan_object_collision++;
6471 mutex_pause(2);
6472 }
6473 lck_rw_lock_exclusive(&object->Lock);
6474}
6475
6476boolean_t
6477vm_object_lock_try(vm_object_t object)
6478{
6479 if (object == vm_pageout_scan_wants_object) {
6480 scan_object_collision++;
6481 mutex_pause(2);
6482 }
6483 return (lck_rw_try_lock_exclusive(&object->Lock));
6484}
6485
6486void
6487vm_object_lock_shared(vm_object_t object)
6488{
6489 if (object == vm_pageout_scan_wants_object) {
6490 scan_object_collision++;
6491 mutex_pause(2);
6492 }
6493 lck_rw_lock_shared(&object->Lock);
6494}
6495
6496boolean_t
6497vm_object_lock_try_shared(vm_object_t object)
6498{
6499 if (object == vm_pageout_scan_wants_object) {
6500 scan_object_collision++;
6501 mutex_pause(2);
6502 }
6503 return (lck_rw_try_lock_shared(&object->Lock));
6504}