]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_object.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_object.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Virtual memory object module.
63 */
64
2d21ac55 65#include <debug.h>
1c79356b
A
66#include <mach_pagemap.h>
67#include <task_swapper.h>
68
0b4e3aa0 69#include <mach/mach_types.h>
1c79356b
A
70#include <mach/memory_object.h>
71#include <mach/memory_object_default.h>
72#include <mach/memory_object_control_server.h>
73#include <mach/vm_param.h>
91447636
A
74
75#include <ipc/ipc_types.h>
1c79356b 76#include <ipc/ipc_port.h>
91447636
A
77
78#include <kern/kern_types.h>
1c79356b
A
79#include <kern/assert.h>
80#include <kern/lock.h>
81#include <kern/queue.h>
82#include <kern/xpr.h>
83#include <kern/zalloc.h>
84#include <kern/host.h>
85#include <kern/host_statistics.h>
86#include <kern/processor.h>
91447636
A
87#include <kern/misc_protos.h>
88
1c79356b
A
89#include <vm/memory_object.h>
90#include <vm/vm_fault.h>
91#include <vm/vm_map.h>
92#include <vm/vm_object.h>
93#include <vm/vm_page.h>
94#include <vm/vm_pageout.h>
91447636 95#include <vm/vm_protos.h>
2d21ac55 96#include <vm/vm_purgeable_internal.h>
1c79356b 97
b0d623f7
A
98#if CONFIG_EMBEDDED
99#include <sys/kern_memorystatus.h>
100#endif
101
1c79356b
A
102/*
103 * Virtual memory objects maintain the actual data
104 * associated with allocated virtual memory. A given
105 * page of memory exists within exactly one object.
106 *
107 * An object is only deallocated when all "references"
0b4e3aa0 108 * are given up.
1c79356b
A
109 *
110 * Associated with each object is a list of all resident
111 * memory pages belonging to that object; this list is
112 * maintained by the "vm_page" module, but locked by the object's
113 * lock.
114 *
0b4e3aa0 115 * Each object also records the memory object reference
1c79356b 116 * that is used by the kernel to request and write
0b4e3aa0 117 * back data (the memory object, field "pager"), etc...
1c79356b
A
118 *
119 * Virtual memory objects are allocated to provide
120 * zero-filled memory (vm_allocate) or map a user-defined
121 * memory object into a virtual address space (vm_map).
122 *
123 * Virtual memory objects that refer to a user-defined
124 * memory object are called "permanent", because all changes
125 * made in virtual memory are reflected back to the
126 * memory manager, which may then store it permanently.
127 * Other virtual memory objects are called "temporary",
128 * meaning that changes need be written back only when
129 * necessary to reclaim pages, and that storage associated
130 * with the object can be discarded once it is no longer
131 * mapped.
132 *
133 * A permanent memory object may be mapped into more
134 * than one virtual address space. Moreover, two threads
135 * may attempt to make the first mapping of a memory
136 * object concurrently. Only one thread is allowed to
137 * complete this mapping; all others wait for the
138 * "pager_initialized" field is asserted, indicating
139 * that the first thread has initialized all of the
140 * necessary fields in the virtual memory object structure.
141 *
142 * The kernel relies on a *default memory manager* to
143 * provide backing storage for the zero-filled virtual
0b4e3aa0 144 * memory objects. The pager memory objects associated
1c79356b 145 * with these temporary virtual memory objects are only
0b4e3aa0
A
146 * requested from the default memory manager when it
147 * becomes necessary. Virtual memory objects
1c79356b
A
148 * that depend on the default memory manager are called
149 * "internal". The "pager_created" field is provided to
150 * indicate whether these ports have ever been allocated.
151 *
152 * The kernel may also create virtual memory objects to
153 * hold changed pages after a copy-on-write operation.
154 * In this case, the virtual memory object (and its
155 * backing storage -- its memory object) only contain
156 * those pages that have been changed. The "shadow"
157 * field refers to the virtual memory object that contains
158 * the remainder of the contents. The "shadow_offset"
159 * field indicates where in the "shadow" these contents begin.
160 * The "copy" field refers to a virtual memory object
161 * to which changed pages must be copied before changing
162 * this object, in order to implement another form
163 * of copy-on-write optimization.
164 *
165 * The virtual memory object structure also records
166 * the attributes associated with its memory object.
167 * The "pager_ready", "can_persist" and "copy_strategy"
168 * fields represent those attributes. The "cached_list"
169 * field is used in the implementation of the persistence
170 * attribute.
171 *
172 * ZZZ Continue this comment.
173 */
174
175/* Forward declarations for internal functions. */
0b4e3aa0 176static kern_return_t vm_object_terminate(
1c79356b
A
177 vm_object_t object);
178
179extern void vm_object_remove(
180 vm_object_t object);
181
0b4e3aa0 182static kern_return_t vm_object_copy_call(
1c79356b
A
183 vm_object_t src_object,
184 vm_object_offset_t src_offset,
185 vm_object_size_t size,
186 vm_object_t *_result_object);
187
0b4e3aa0 188static void vm_object_do_collapse(
1c79356b
A
189 vm_object_t object,
190 vm_object_t backing_object);
191
0b4e3aa0 192static void vm_object_do_bypass(
1c79356b
A
193 vm_object_t object,
194 vm_object_t backing_object);
195
0b4e3aa0 196static void vm_object_release_pager(
b0d623f7
A
197 memory_object_t pager,
198 boolean_t hashed);
1c79356b 199
0b4e3aa0 200static zone_t vm_object_zone; /* vm backing store zone */
1c79356b
A
201
202/*
203 * All wired-down kernel memory belongs to a single virtual
204 * memory object (kernel_object) to avoid wasting data structures.
205 */
0b4e3aa0 206static struct vm_object kernel_object_store;
0c530ab8 207vm_object_t kernel_object;
1c79356b 208
2d21ac55 209
1c79356b
A
210/*
211 * The submap object is used as a placeholder for vm_map_submap
212 * operations. The object is declared in vm_map.c because it
213 * is exported by the vm_map module. The storage is declared
214 * here because it must be initialized here.
215 */
0b4e3aa0 216static struct vm_object vm_submap_object_store;
1c79356b
A
217
218/*
219 * Virtual memory objects are initialized from
220 * a template (see vm_object_allocate).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
0b4e3aa0 224 * (see _vm_object_allocate()).
1c79356b 225 */
0b4e3aa0 226static struct vm_object vm_object_template;
1c79356b 227
b0d623f7
A
228unsigned int vm_page_purged_wired = 0;
229unsigned int vm_page_purged_busy = 0;
230unsigned int vm_page_purged_others = 0;
231
232#if VM_OBJECT_CACHE
1c79356b
A
233/*
234 * Virtual memory objects that are not referenced by
235 * any address maps, but that are allowed to persist
236 * (an attribute specified by the associated memory manager),
237 * are kept in a queue (vm_object_cached_list).
238 *
239 * When an object from this queue is referenced again,
240 * for example to make another address space mapping,
241 * it must be removed from the queue. That is, the
242 * queue contains *only* objects with zero references.
243 *
244 * The kernel may choose to terminate objects from this
245 * queue in order to reclaim storage. The current policy
246 * is to permit a fixed maximum number of unreferenced
247 * objects (vm_object_cached_max).
248 *
249 * A spin lock (accessed by routines
250 * vm_object_cache_{lock,lock_try,unlock}) governs the
251 * object cache. It must be held when objects are
252 * added to or removed from the cache (in vm_object_terminate).
253 * The routines that acquire a reference to a virtual
254 * memory object based on one of the memory object ports
255 * must also lock the cache.
256 *
257 * Ideally, the object cache should be more isolated
258 * from the reference mechanism, so that the lock need
259 * not be held to make simple references.
260 */
b0d623f7
A
261static vm_object_t vm_object_cache_trim(
262 boolean_t called_from_vm_object_deallocate);
263
0b4e3aa0 264static queue_head_t vm_object_cached_list;
9bccf70c 265static int vm_object_cached_count=0;
0b4e3aa0
A
266static int vm_object_cached_high; /* highest # cached objects */
267static int vm_object_cached_max = 512; /* may be patched*/
1c79356b 268
b0d623f7
A
269static lck_mtx_t vm_object_cached_lock_data;
270static lck_mtx_ext_t vm_object_cached_lock_data_ext;
1c79356b
A
271
272#define vm_object_cache_lock() \
b0d623f7
A
273 lck_mtx_lock(&vm_object_cached_lock_data)
274#define vm_object_cache_lock_try() \
275 lck_mtx_try_lock(&vm_object_cached_lock_data)
276#define vm_object_cache_lock_spin() \
277 lck_mtx_lock_spin(&vm_object_cached_lock_data)
1c79356b 278#define vm_object_cache_unlock() \
b0d623f7
A
279 lck_mtx_unlock(&vm_object_cached_lock_data)
280
281#endif /* VM_OBJECT_CACHE */
282
283
284static void vm_object_deactivate_all_pages(
285 vm_object_t object);
286
1c79356b
A
287
288#define VM_OBJECT_HASH_COUNT 1024
b0d623f7
A
289#define VM_OBJECT_HASH_LOCK_COUNT 512
290
d1ecb069
A
291static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT];
292static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT];
b0d623f7 293
0b4e3aa0 294static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
b0d623f7 295static struct zone *vm_object_hash_zone;
1c79356b
A
296
297struct vm_object_hash_entry {
298 queue_chain_t hash_link; /* hash chain link */
0b4e3aa0 299 memory_object_t pager; /* pager we represent */
1c79356b
A
300 vm_object_t object; /* corresponding object */
301 boolean_t waiting; /* someone waiting for
302 * termination */
303};
304
305typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
306#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
307
b0d623f7 308#define VM_OBJECT_HASH_SHIFT 5
1c79356b 309#define vm_object_hash(pager) \
b0d623f7
A
310 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT))
311
312#define vm_object_lock_hash(pager) \
313 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT))
1c79356b 314
91447636
A
315void vm_object_hash_entry_free(
316 vm_object_hash_entry_t entry);
317
8f6c56a5
A
318static void vm_object_reap(vm_object_t object);
319static void vm_object_reap_async(vm_object_t object);
320static void vm_object_reaper_thread(void);
b0d623f7
A
321
322static lck_mtx_t vm_object_reaper_lock_data;
323static lck_mtx_ext_t vm_object_reaper_lock_data_ext;
324
325static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */
8f6c56a5
A
326unsigned int vm_object_reap_count = 0;
327unsigned int vm_object_reap_count_async = 0;
328
b0d623f7
A
329#define vm_object_reaper_lock() \
330 lck_mtx_lock(&vm_object_reaper_lock_data)
331#define vm_object_reaper_lock_spin() \
332 lck_mtx_lock_spin(&vm_object_reaper_lock_data)
333#define vm_object_reaper_unlock() \
334 lck_mtx_unlock(&vm_object_reaper_lock_data)
335
336
337
338static lck_mtx_t *
339vm_object_hash_lock_spin(
340 memory_object_t pager)
341{
342 int index;
343
344 index = vm_object_lock_hash(pager);
345
346 lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]);
347
348 return (&vm_object_hashed_lock_data[index]);
349}
350
351static void
352vm_object_hash_unlock(lck_mtx_t *lck)
353{
354 lck_mtx_unlock(lck);
355}
356
357
1c79356b
A
358/*
359 * vm_object_hash_lookup looks up a pager in the hashtable
360 * and returns the corresponding entry, with optional removal.
361 */
0b4e3aa0 362static vm_object_hash_entry_t
1c79356b 363vm_object_hash_lookup(
0b4e3aa0 364 memory_object_t pager,
1c79356b
A
365 boolean_t remove_entry)
366{
b0d623f7
A
367 queue_t bucket;
368 vm_object_hash_entry_t entry;
1c79356b
A
369
370 bucket = &vm_object_hashtable[vm_object_hash(pager)];
371
372 entry = (vm_object_hash_entry_t)queue_first(bucket);
373 while (!queue_end(bucket, (queue_entry_t)entry)) {
b0d623f7
A
374 if (entry->pager == pager) {
375 if (remove_entry) {
376 queue_remove(bucket, entry,
377 vm_object_hash_entry_t, hash_link);
378 }
1c79356b
A
379 return(entry);
380 }
1c79356b
A
381 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
382 }
1c79356b
A
383 return(VM_OBJECT_HASH_ENTRY_NULL);
384}
385
386/*
387 * vm_object_hash_enter enters the specified
388 * pager / cache object association in the hashtable.
389 */
390
0b4e3aa0 391static void
1c79356b 392vm_object_hash_insert(
b0d623f7
A
393 vm_object_hash_entry_t entry,
394 vm_object_t object)
1c79356b 395{
b0d623f7 396 queue_t bucket;
1c79356b
A
397
398 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
399
400 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
b0d623f7
A
401
402 entry->object = object;
403 object->hashed = TRUE;
1c79356b
A
404}
405
0b4e3aa0 406static vm_object_hash_entry_t
1c79356b 407vm_object_hash_entry_alloc(
0b4e3aa0 408 memory_object_t pager)
1c79356b
A
409{
410 vm_object_hash_entry_t entry;
411
412 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
413 entry->pager = pager;
414 entry->object = VM_OBJECT_NULL;
415 entry->waiting = FALSE;
416
417 return(entry);
418}
419
420void
421vm_object_hash_entry_free(
422 vm_object_hash_entry_t entry)
423{
91447636 424 zfree(vm_object_hash_zone, entry);
1c79356b
A
425}
426
427/*
428 * vm_object_allocate:
429 *
430 * Returns a new object with the given size.
431 */
432
91447636 433__private_extern__ void
1c79356b
A
434_vm_object_allocate(
435 vm_object_size_t size,
436 vm_object_t object)
437{
438 XPR(XPR_VM_OBJECT,
439 "vm_object_allocate, object 0x%X size 0x%X\n",
b0d623f7 440 object, size, 0,0,0);
1c79356b
A
441
442 *object = vm_object_template;
443 queue_init(&object->memq);
444 queue_init(&object->msr_q);
b0d623f7 445#if UPL_DEBUG
1c79356b 446 queue_init(&object->uplq);
91447636 447#endif /* UPL_DEBUG */
1c79356b
A
448 vm_object_lock_init(object);
449 object->size = size;
450}
451
0b4e3aa0 452__private_extern__ vm_object_t
1c79356b
A
453vm_object_allocate(
454 vm_object_size_t size)
455{
456 register vm_object_t object;
1c79356b
A
457
458 object = (vm_object_t) zalloc(vm_object_zone);
459
0b4e3aa0
A
460// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
461
462 if (object != VM_OBJECT_NULL)
463 _vm_object_allocate(size, object);
1c79356b
A
464
465 return object;
466}
467
2d21ac55 468
b0d623f7 469lck_grp_t vm_object_lck_grp;
2d21ac55 470lck_grp_attr_t vm_object_lck_grp_attr;
b0d623f7
A
471lck_attr_t vm_object_lck_attr;
472lck_attr_t kernel_object_lck_attr;
2d21ac55 473
1c79356b
A
474/*
475 * vm_object_bootstrap:
476 *
477 * Initialize the VM objects module.
478 */
0b4e3aa0 479__private_extern__ void
1c79356b
A
480vm_object_bootstrap(void)
481{
91447636 482 register int i;
1c79356b
A
483
484 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
b0d623f7
A
485 round_page(512*1024),
486 round_page(12*1024),
1c79356b 487 "vm objects");
0b4c1975 488 zone_change(vm_object_zone, Z_NOENCRYPT, TRUE);
1c79356b 489
b0d623f7
A
490 vm_object_init_lck_grp();
491
492#if VM_OBJECT_CACHE
1c79356b 493 queue_init(&vm_object_cached_list);
b0d623f7
A
494
495 lck_mtx_init_ext(&vm_object_cached_lock_data,
496 &vm_object_cached_lock_data_ext,
497 &vm_object_lck_grp,
498 &vm_object_lck_attr);
499#endif
500 queue_init(&vm_object_reaper_queue);
501
502 for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) {
503 lck_mtx_init_ext(&vm_object_hashed_lock_data[i],
504 &vm_object_hashed_lock_data_ext[i],
505 &vm_object_lck_grp,
506 &vm_object_lck_attr);
507 }
508 lck_mtx_init_ext(&vm_object_reaper_lock_data,
509 &vm_object_reaper_lock_data_ext,
510 &vm_object_lck_grp,
511 &vm_object_lck_attr);
1c79356b
A
512
513 vm_object_hash_zone =
514 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
b0d623f7
A
515 round_page(512*1024),
516 round_page(12*1024),
1c79356b 517 "vm object hash entries");
0b4c1975 518 zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE);
1c79356b
A
519
520 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
521 queue_init(&vm_object_hashtable[i]);
522
2d21ac55 523
1c79356b
A
524 /*
525 * Fill in a template object, for quick initialization
526 */
527
528 /* memq; Lock; init after allocation */
2d21ac55
A
529 vm_object_template.memq.prev = NULL;
530 vm_object_template.memq.next = NULL;
531#if 0
532 /*
533 * We can't call vm_object_lock_init() here because that will
534 * allocate some memory and VM is not fully initialized yet.
b0d623f7 535 * The lock will be initialized for each allocated object in
2d21ac55
A
536 * _vm_object_allocate(), so we don't need to initialize it in
537 * the vm_object_template.
538 */
539 vm_object_lock_init(&vm_object_template);
540#endif
1c79356b 541 vm_object_template.size = 0;
91447636 542 vm_object_template.memq_hint = VM_PAGE_NULL;
1c79356b
A
543 vm_object_template.ref_count = 1;
544#if TASK_SWAPPER
545 vm_object_template.res_count = 1;
546#endif /* TASK_SWAPPER */
547 vm_object_template.resident_page_count = 0;
b0d623f7
A
548 vm_object_template.wired_page_count = 0;
549 vm_object_template.reusable_page_count = 0;
1c79356b
A
550 vm_object_template.copy = VM_OBJECT_NULL;
551 vm_object_template.shadow = VM_OBJECT_NULL;
552 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
0b4e3aa0 553 vm_object_template.pager = MEMORY_OBJECT_NULL;
1c79356b 554 vm_object_template.paging_offset = 0;
91447636 555 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b 556 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
1c79356b 557 vm_object_template.paging_in_progress = 0;
b0d623f7 558 vm_object_template.activity_in_progress = 0;
1c79356b
A
559
560 /* Begin bitfields */
561 vm_object_template.all_wanted = 0; /* all bits FALSE */
562 vm_object_template.pager_created = FALSE;
563 vm_object_template.pager_initialized = FALSE;
564 vm_object_template.pager_ready = FALSE;
565 vm_object_template.pager_trusted = FALSE;
566 vm_object_template.can_persist = FALSE;
567 vm_object_template.internal = TRUE;
568 vm_object_template.temporary = TRUE;
569 vm_object_template.private = FALSE;
570 vm_object_template.pageout = FALSE;
571 vm_object_template.alive = TRUE;
2d21ac55
A
572 vm_object_template.purgable = VM_PURGABLE_DENY;
573 vm_object_template.shadowed = FALSE;
1c79356b
A
574 vm_object_template.silent_overwrite = FALSE;
575 vm_object_template.advisory_pageout = FALSE;
2d21ac55 576 vm_object_template.true_share = FALSE;
1c79356b 577 vm_object_template.terminating = FALSE;
2d21ac55 578 vm_object_template.named = FALSE;
1c79356b
A
579 vm_object_template.shadow_severed = FALSE;
580 vm_object_template.phys_contiguous = FALSE;
0b4e3aa0 581 vm_object_template.nophyscache = FALSE;
1c79356b
A
582 /* End bitfields */
583
2d21ac55
A
584 vm_object_template.cached_list.prev = NULL;
585 vm_object_template.cached_list.next = NULL;
586 vm_object_template.msr_q.prev = NULL;
587 vm_object_template.msr_q.next = NULL;
588
1c79356b 589 vm_object_template.last_alloc = (vm_object_offset_t) 0;
2d21ac55
A
590 vm_object_template.sequential = (vm_object_offset_t) 0;
591 vm_object_template.pages_created = 0;
592 vm_object_template.pages_used = 0;
593
1c79356b
A
594#if MACH_PAGEMAP
595 vm_object_template.existence_map = VM_EXTERNAL_NULL;
596#endif /* MACH_PAGEMAP */
2d21ac55 597 vm_object_template.cow_hint = ~(vm_offset_t)0;
1c79356b
A
598#if MACH_ASSERT
599 vm_object_template.paging_object = VM_OBJECT_NULL;
600#endif /* MACH_ASSERT */
601
2d21ac55
A
602 /* cache bitfields */
603 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
604 vm_object_template.code_signed = FALSE;
b0d623f7
A
605 vm_object_template.hashed = FALSE;
606 vm_object_template.transposed = FALSE;
593a1d5f 607 vm_object_template.mapping_in_progress = FALSE;
b0d623f7
A
608 vm_object_template.volatile_empty = FALSE;
609 vm_object_template.volatile_fault = FALSE;
610 vm_object_template.all_reusable = FALSE;
611 vm_object_template.blocked_access = FALSE;
612 vm_object_template.__object2_unused_bits = 0;
613#if UPL_DEBUG
2d21ac55
A
614 vm_object_template.uplq.prev = NULL;
615 vm_object_template.uplq.next = NULL;
616#endif /* UPL_DEBUG */
617#ifdef VM_PIP_DEBUG
618 bzero(&vm_object_template.pip_holders,
619 sizeof (vm_object_template.pip_holders));
620#endif /* VM_PIP_DEBUG */
621
622 vm_object_template.objq.next=NULL;
623 vm_object_template.objq.prev=NULL;
624
625
1c79356b
A
626 /*
627 * Initialize the "kernel object"
628 */
629
630 kernel_object = &kernel_object_store;
631
632/*
633 * Note that in the following size specifications, we need to add 1 because
55e303ae 634 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
1c79356b 635 */
55e303ae
A
636
637#ifdef ppc
b0d623f7
A
638 _vm_object_allocate(vm_last_addr + 1,
639 kernel_object);
55e303ae 640#else
b0d623f7
A
641 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
642 kernel_object);
55e303ae
A
643#endif
644 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1c79356b
A
645
646 /*
647 * Initialize the "submap object". Make it as large as the
648 * kernel object so that no limit is imposed on submap sizes.
649 */
650
651 vm_submap_object = &vm_submap_object_store;
55e303ae 652#ifdef ppc
b0d623f7
A
653 _vm_object_allocate(vm_last_addr + 1,
654 vm_submap_object);
55e303ae 655#else
b0d623f7
A
656 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
657 vm_submap_object);
55e303ae
A
658#endif
659 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
660
1c79356b
A
661 /*
662 * Create an "extra" reference to this object so that we never
663 * try to deallocate it; zfree doesn't like to be called with
664 * non-zone memory.
665 */
666 vm_object_reference(vm_submap_object);
667
668#if MACH_PAGEMAP
669 vm_external_module_initialize();
670#endif /* MACH_PAGEMAP */
671}
672
8f6c56a5
A
673void
674vm_object_reaper_init(void)
675{
676 kern_return_t kr;
677 thread_t thread;
678
8f6c56a5
A
679 kr = kernel_thread_start_priority(
680 (thread_continue_t) vm_object_reaper_thread,
681 NULL,
682 BASEPRI_PREEMPT - 1,
683 &thread);
684 if (kr != KERN_SUCCESS) {
2d21ac55 685 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
8f6c56a5
A
686 }
687 thread_deallocate(thread);
688}
689
0b4e3aa0 690__private_extern__ void
1c79356b
A
691vm_object_init(void)
692{
693 /*
694 * Finish initializing the kernel object.
695 */
696}
697
2d21ac55
A
698
699__private_extern__ void
700vm_object_init_lck_grp(void)
701{
b0d623f7 702 /*
2d21ac55
A
703 * initialze the vm_object lock world
704 */
b0d623f7 705 lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
2d21ac55
A
706 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
707 lck_attr_setdefault(&vm_object_lck_attr);
708 lck_attr_setdefault(&kernel_object_lck_attr);
709 lck_attr_cleardebug(&kernel_object_lck_attr);
710}
711
b0d623f7 712#if VM_OBJECT_CACHE
1c79356b
A
713#define MIGHT_NOT_CACHE_SHADOWS 1
714#if MIGHT_NOT_CACHE_SHADOWS
0b4e3aa0 715static int cache_shadows = TRUE;
1c79356b 716#endif /* MIGHT_NOT_CACHE_SHADOWS */
b0d623f7 717#endif
1c79356b
A
718
719/*
720 * vm_object_deallocate:
721 *
722 * Release a reference to the specified object,
723 * gained either through a vm_object_allocate
724 * or a vm_object_reference call. When all references
725 * are gone, storage associated with this object
726 * may be relinquished.
727 *
728 * No object may be locked.
729 */
2d21ac55
A
730unsigned long vm_object_deallocate_shared_successes = 0;
731unsigned long vm_object_deallocate_shared_failures = 0;
732unsigned long vm_object_deallocate_shared_swap_failures = 0;
0b4e3aa0 733__private_extern__ void
1c79356b
A
734vm_object_deallocate(
735 register vm_object_t object)
736{
b0d623f7 737#if VM_OBJECT_CACHE
2d21ac55 738 boolean_t retry_cache_trim = FALSE;
2d21ac55 739 uint32_t try_failed_count = 0;
b0d623f7
A
740#endif
741 vm_object_t shadow = VM_OBJECT_NULL;
1c79356b
A
742
743// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
744// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
745
2d21ac55
A
746 if (object == VM_OBJECT_NULL)
747 return;
748
749 if (object == kernel_object) {
b0d623f7
A
750 vm_object_lock_shared(object);
751
752 OSAddAtomic(-1, &object->ref_count);
753
754 if (object->ref_count == 0) {
2d21ac55
A
755 panic("vm_object_deallocate: losing kernel_object\n");
756 }
b0d623f7 757 vm_object_unlock(object);
2d21ac55
A
758 return;
759 }
760
761 if (object->ref_count > 2 ||
762 (!object->named && object->ref_count > 1)) {
763 UInt32 original_ref_count;
764 volatile UInt32 *ref_count_p;
765 Boolean atomic_swap;
766
767 /*
768 * The object currently looks like it is not being
769 * kept alive solely by the reference we're about to release.
770 * Let's try and release our reference without taking
771 * all the locks we would need if we had to terminate the
772 * object (cache lock + exclusive object lock).
773 * Lock the object "shared" to make sure we don't race with
774 * anyone holding it "exclusive".
775 */
776 vm_object_lock_shared(object);
777 ref_count_p = (volatile UInt32 *) &object->ref_count;
778 original_ref_count = object->ref_count;
779 /*
780 * Test again as "ref_count" could have changed.
781 * "named" shouldn't change.
782 */
783 if (original_ref_count > 2 ||
784 (!object->named && original_ref_count > 1)) {
785 atomic_swap = OSCompareAndSwap(
786 original_ref_count,
787 original_ref_count - 1,
788 (UInt32 *) &object->ref_count);
789 if (atomic_swap == FALSE) {
790 vm_object_deallocate_shared_swap_failures++;
791 }
792
793 } else {
794 atomic_swap = FALSE;
795 }
796 vm_object_unlock(object);
797
798 if (atomic_swap) {
b0d623f7
A
799 /*
800 * ref_count was updated atomically !
801 */
2d21ac55
A
802 vm_object_deallocate_shared_successes++;
803 return;
804 }
805
806 /*
807 * Someone else updated the ref_count at the same
808 * time and we lost the race. Fall back to the usual
809 * slow but safe path...
810 */
811 vm_object_deallocate_shared_failures++;
812 }
1c79356b
A
813
814 while (object != VM_OBJECT_NULL) {
815
b0d623f7 816 vm_object_lock(object);
2d21ac55 817
0b4e3aa0
A
818 assert(object->ref_count > 0);
819
820 /*
821 * If the object has a named reference, and only
822 * that reference would remain, inform the pager
823 * about the last "mapping" reference going away.
824 */
825 if ((object->ref_count == 2) && (object->named)) {
826 memory_object_t pager = object->pager;
827
828 /* Notify the Pager that there are no */
829 /* more mappers for this object */
830
831 if (pager != MEMORY_OBJECT_NULL) {
593a1d5f
A
832 vm_object_mapping_wait(object, THREAD_UNINT);
833 vm_object_mapping_begin(object);
0b4e3aa0 834 vm_object_unlock(object);
2d21ac55 835
b0d623f7 836 memory_object_last_unmap(pager);
593a1d5f 837
b0d623f7 838 vm_object_lock(object);
593a1d5f 839 vm_object_mapping_end(object);
0b4e3aa0 840 }
b0d623f7
A
841 /*
842 * recheck the ref_count since we dropped the object lock
843 * to call 'memory_object_last_unmap'... it's possible
844 * additional references got taken and we only want
845 * to deactivate the pages if this 'named' object will only
846 * referenced by the backing pager once we drop our reference
847 * below
848 */
849 if (!object->terminating && object->ref_count == 2)
850 vm_object_deactivate_all_pages(object);
851
852 assert(object->ref_count > 0);
0b4e3aa0 853 }
1c79356b
A
854
855 /*
856 * Lose the reference. If other references
857 * remain, then we are done, unless we need
858 * to retry a cache trim.
859 * If it is the last reference, then keep it
860 * until any pending initialization is completed.
861 */
862
0b4e3aa0
A
863 /* if the object is terminating, it cannot go into */
864 /* the cache and we obviously should not call */
865 /* terminate again. */
866
867 if ((object->ref_count > 1) || object->terminating) {
2d21ac55 868 vm_object_lock_assert_exclusive(object);
1c79356b 869 object->ref_count--;
1c79356b 870 vm_object_res_deallocate(object);
91447636
A
871
872 if (object->ref_count == 1 &&
873 object->shadow != VM_OBJECT_NULL) {
874 /*
0c530ab8
A
875 * There's only one reference left on this
876 * VM object. We can't tell if it's a valid
877 * one (from a mapping for example) or if this
878 * object is just part of a possibly stale and
879 * useless shadow chain.
880 * We would like to try and collapse it into
881 * its parent, but we don't have any pointers
882 * back to this parent object.
91447636
A
883 * But we can try and collapse this object with
884 * its own shadows, in case these are useless
885 * too...
0c530ab8
A
886 * We can't bypass this object though, since we
887 * don't know if this last reference on it is
888 * meaningful or not.
91447636 889 */
0c530ab8 890 vm_object_collapse(object, 0, FALSE);
91447636 891 }
91447636 892 vm_object_unlock(object);
b0d623f7 893#if VM_OBJECT_CACHE
1c79356b
A
894 if (retry_cache_trim &&
895 ((object = vm_object_cache_trim(TRUE)) !=
896 VM_OBJECT_NULL)) {
897 continue;
898 }
b0d623f7 899#endif
1c79356b
A
900 return;
901 }
902
903 /*
904 * We have to wait for initialization
905 * before destroying or caching the object.
906 */
907
908 if (object->pager_created && ! object->pager_initialized) {
909 assert(! object->can_persist);
910 vm_object_assert_wait(object,
911 VM_OBJECT_EVENT_INITIALIZED,
912 THREAD_UNINT);
913 vm_object_unlock(object);
b0d623f7 914
9bccf70c 915 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
916 continue;
917 }
918
b0d623f7 919#if VM_OBJECT_CACHE
1c79356b
A
920 /*
921 * If this object can persist, then enter it in
922 * the cache. Otherwise, terminate it.
923 *
924 * NOTE: Only permanent objects are cached, and
925 * permanent objects cannot have shadows. This
926 * affects the residence counting logic in a minor
927 * way (can do it in-line, mostly).
928 */
929
0b4e3aa0 930 if ((object->can_persist) && (object->alive)) {
1c79356b
A
931 /*
932 * Now it is safe to decrement reference count,
933 * and to return if reference count is > 0.
934 */
b0d623f7 935
2d21ac55 936 vm_object_lock_assert_exclusive(object);
1c79356b
A
937 if (--object->ref_count > 0) {
938 vm_object_res_deallocate(object);
939 vm_object_unlock(object);
b0d623f7 940
1c79356b
A
941 if (retry_cache_trim &&
942 ((object = vm_object_cache_trim(TRUE)) !=
943 VM_OBJECT_NULL)) {
944 continue;
945 }
946 return;
947 }
948
949#if MIGHT_NOT_CACHE_SHADOWS
950 /*
951 * Remove shadow now if we don't
952 * want to cache shadows.
953 */
954 if (! cache_shadows) {
955 shadow = object->shadow;
956 object->shadow = VM_OBJECT_NULL;
957 }
958#endif /* MIGHT_NOT_CACHE_SHADOWS */
959
960 /*
961 * Enter the object onto the queue of
962 * cached objects, and deactivate
963 * all of its pages.
964 */
965 assert(object->shadow == VM_OBJECT_NULL);
966 VM_OBJ_RES_DECR(object);
967 XPR(XPR_VM_OBJECT,
968 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
b0d623f7
A
969 object,
970 vm_object_cached_list.next,
971 vm_object_cached_list.prev,0,0);
972
973
974 vm_object_unlock(object);
975
976 try_failed_count = 0;
977 for (;;) {
978 vm_object_cache_lock();
979
980 /*
981 * if we try to take a regular lock here
982 * we risk deadlocking against someone
983 * holding a lock on this object while
984 * trying to vm_object_deallocate a different
985 * object
986 */
987 if (vm_object_lock_try(object))
988 break;
989 vm_object_cache_unlock();
990 try_failed_count++;
1c79356b 991
b0d623f7
A
992 mutex_pause(try_failed_count); /* wait a bit */
993 }
1c79356b
A
994 vm_object_cached_count++;
995 if (vm_object_cached_count > vm_object_cached_high)
996 vm_object_cached_high = vm_object_cached_count;
997 queue_enter(&vm_object_cached_list, object,
998 vm_object_t, cached_list);
999 vm_object_cache_unlock();
b0d623f7 1000
0b4e3aa0 1001 vm_object_deactivate_all_pages(object);
1c79356b
A
1002 vm_object_unlock(object);
1003
1004#if MIGHT_NOT_CACHE_SHADOWS
1005 /*
1006 * If we have a shadow that we need
1007 * to deallocate, do so now, remembering
1008 * to trim the cache later.
1009 */
1010 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
1011 object = shadow;
1012 retry_cache_trim = TRUE;
1013 continue;
1014 }
1015#endif /* MIGHT_NOT_CACHE_SHADOWS */
1016
1017 /*
1018 * Trim the cache. If the cache trim
1019 * returns with a shadow for us to deallocate,
1020 * then remember to retry the cache trim
1021 * when we are done deallocating the shadow.
1022 * Otherwise, we are done.
1023 */
1024
1025 object = vm_object_cache_trim(TRUE);
1026 if (object == VM_OBJECT_NULL) {
1027 return;
1028 }
1029 retry_cache_trim = TRUE;
b0d623f7
A
1030 } else
1031#endif /* VM_OBJECT_CACHE */
1032 {
1c79356b
A
1033 /*
1034 * This object is not cachable; terminate it.
1035 */
1036 XPR(XPR_VM_OBJECT,
91447636 1037 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
b0d623f7 1038 object, object->resident_page_count,
91447636
A
1039 object->paging_in_progress,
1040 (void *)current_thread(),object->ref_count);
1c79356b
A
1041
1042 VM_OBJ_RES_DECR(object); /* XXX ? */
1043 /*
1044 * Terminate this object. If it had a shadow,
1045 * then deallocate it; otherwise, if we need
1046 * to retry a cache trim, do so now; otherwise,
1047 * we are done. "pageout" objects have a shadow,
1048 * but maintain a "paging reference" rather than
1049 * a normal reference.
1050 */
1051 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
b0d623f7
A
1052
1053 if (vm_object_terminate(object) != KERN_SUCCESS) {
1c79356b
A
1054 return;
1055 }
1056 if (shadow != VM_OBJECT_NULL) {
1057 object = shadow;
1058 continue;
1059 }
b0d623f7 1060#if VM_OBJECT_CACHE
1c79356b
A
1061 if (retry_cache_trim &&
1062 ((object = vm_object_cache_trim(TRUE)) !=
1063 VM_OBJECT_NULL)) {
1064 continue;
1065 }
b0d623f7 1066#endif
1c79356b
A
1067 return;
1068 }
1069 }
b0d623f7 1070#if VM_OBJECT_CACHE
1c79356b 1071 assert(! retry_cache_trim);
b0d623f7 1072#endif
1c79356b
A
1073}
1074
b0d623f7
A
1075
1076#if VM_OBJECT_CACHE
1c79356b
A
1077/*
1078 * Check to see whether we really need to trim
1079 * down the cache. If so, remove an object from
1080 * the cache, terminate it, and repeat.
1081 *
1082 * Called with, and returns with, cache lock unlocked.
1083 */
1084vm_object_t
1085vm_object_cache_trim(
1086 boolean_t called_from_vm_object_deallocate)
1087{
1088 register vm_object_t object = VM_OBJECT_NULL;
1089 vm_object_t shadow;
1090
1091 for (;;) {
1092
1093 /*
1094 * If we no longer need to trim the cache,
1095 * then we are done.
1096 */
b0d623f7
A
1097 if (vm_object_cached_count <= vm_object_cached_max)
1098 return VM_OBJECT_NULL;
1c79356b
A
1099
1100 vm_object_cache_lock();
1101 if (vm_object_cached_count <= vm_object_cached_max) {
1102 vm_object_cache_unlock();
1103 return VM_OBJECT_NULL;
1104 }
1105
1106 /*
1107 * We must trim down the cache, so remove
1108 * the first object in the cache.
1109 */
1110 XPR(XPR_VM_OBJECT,
1111 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
b0d623f7
A
1112 vm_object_cached_list.next,
1113 vm_object_cached_list.prev, 0, 0, 0);
1c79356b
A
1114
1115 object = (vm_object_t) queue_first(&vm_object_cached_list);
9bccf70c
A
1116 if(object == (vm_object_t) &vm_object_cached_list) {
1117 /* something's wrong with the calling parameter or */
1118 /* the value of vm_object_cached_count, just fix */
1119 /* and return */
1120 if(vm_object_cached_max < 0)
1121 vm_object_cached_max = 0;
1122 vm_object_cached_count = 0;
1123 vm_object_cache_unlock();
1124 return VM_OBJECT_NULL;
1125 }
1c79356b
A
1126 vm_object_lock(object);
1127 queue_remove(&vm_object_cached_list, object, vm_object_t,
1128 cached_list);
1129 vm_object_cached_count--;
1130
b0d623f7 1131 vm_object_cache_unlock();
1c79356b
A
1132 /*
1133 * Since this object is in the cache, we know
1134 * that it is initialized and has no references.
1135 * Take a reference to avoid recursive deallocations.
1136 */
1137
1138 assert(object->pager_initialized);
1139 assert(object->ref_count == 0);
2d21ac55 1140 vm_object_lock_assert_exclusive(object);
1c79356b
A
1141 object->ref_count++;
1142
1143 /*
1144 * Terminate the object.
1145 * If the object had a shadow, we let vm_object_deallocate
1146 * deallocate it. "pageout" objects have a shadow, but
1147 * maintain a "paging reference" rather than a normal
1148 * reference.
1149 * (We are careful here to limit recursion.)
1150 */
1151 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
b0d623f7 1152
1c79356b
A
1153 if(vm_object_terminate(object) != KERN_SUCCESS)
1154 continue;
b0d623f7 1155
1c79356b
A
1156 if (shadow != VM_OBJECT_NULL) {
1157 if (called_from_vm_object_deallocate) {
1158 return shadow;
1159 } else {
1160 vm_object_deallocate(shadow);
1161 }
1162 }
1163 }
1164}
b0d623f7 1165#endif
1c79356b 1166
1c79356b
A
1167
1168/*
1169 * Routine: vm_object_terminate
1170 * Purpose:
1171 * Free all resources associated with a vm_object.
1172 * In/out conditions:
0b4e3aa0 1173 * Upon entry, the object must be locked,
1c79356b
A
1174 * and the object must have exactly one reference.
1175 *
1176 * The shadow object reference is left alone.
1177 *
1178 * The object must be unlocked if its found that pages
1179 * must be flushed to a backing object. If someone
1180 * manages to map the object while it is being flushed
1181 * the object is returned unlocked and unchanged. Otherwise,
1182 * upon exit, the cache will be unlocked, and the
1183 * object will cease to exist.
1184 */
0b4e3aa0 1185static kern_return_t
1c79356b 1186vm_object_terminate(
b0d623f7 1187 vm_object_t object)
1c79356b 1188{
b0d623f7 1189 vm_object_t shadow_object;
1c79356b
A
1190
1191 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
b0d623f7
A
1192 object, object->ref_count, 0, 0, 0);
1193
1194 if (!object->pageout && (!object->temporary || object->can_persist) &&
1195 (object->pager != NULL || object->shadow_severed)) {
1c79356b
A
1196 /*
1197 * Clear pager_trusted bit so that the pages get yanked
1198 * out of the object instead of cleaned in place. This
1199 * prevents a deadlock in XMM and makes more sense anyway.
1200 */
1201 object->pager_trusted = FALSE;
1202
b0d623f7 1203 vm_object_reap_pages(object, REAP_TERMINATE);
1c79356b 1204 }
0b4e3aa0
A
1205 /*
1206 * Make sure the object isn't already being terminated
1207 */
b0d623f7 1208 if (object->terminating) {
2d21ac55
A
1209 vm_object_lock_assert_exclusive(object);
1210 object->ref_count--;
0b4e3aa0 1211 assert(object->ref_count > 0);
0b4e3aa0
A
1212 vm_object_unlock(object);
1213 return KERN_FAILURE;
1214 }
1215
1216 /*
1217 * Did somebody get a reference to the object while we were
1218 * cleaning it?
1219 */
b0d623f7 1220 if (object->ref_count != 1) {
2d21ac55
A
1221 vm_object_lock_assert_exclusive(object);
1222 object->ref_count--;
0b4e3aa0 1223 assert(object->ref_count > 0);
1c79356b 1224 vm_object_res_deallocate(object);
1c79356b
A
1225 vm_object_unlock(object);
1226 return KERN_FAILURE;
1227 }
1228
1c79356b
A
1229 /*
1230 * Make sure no one can look us up now.
1231 */
1232
0b4e3aa0
A
1233 object->terminating = TRUE;
1234 object->alive = FALSE;
1c79356b 1235
b0d623f7
A
1236 if (object->hashed) {
1237 lck_mtx_t *lck;
1238
1239 lck = vm_object_hash_lock_spin(object->pager);
1240 vm_object_remove(object);
1241 vm_object_hash_unlock(lck);
1242 }
1c79356b
A
1243 /*
1244 * Detach the object from its shadow if we are the shadow's
55e303ae
A
1245 * copy. The reference we hold on the shadow must be dropped
1246 * by our caller.
1c79356b
A
1247 */
1248 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1249 !(object->pageout)) {
1250 vm_object_lock(shadow_object);
55e303ae
A
1251 if (shadow_object->copy == object)
1252 shadow_object->copy = VM_OBJECT_NULL;
1c79356b
A
1253 vm_object_unlock(shadow_object);
1254 }
1255
b0d623f7
A
1256 if (object->paging_in_progress != 0 ||
1257 object->activity_in_progress != 0) {
8f6c56a5
A
1258 /*
1259 * There are still some paging_in_progress references
1260 * on this object, meaning that there are some paging
1261 * or other I/O operations in progress for this VM object.
1262 * Such operations take some paging_in_progress references
1263 * up front to ensure that the object doesn't go away, but
1264 * they may also need to acquire a reference on the VM object,
1265 * to map it in kernel space, for example. That means that
1266 * they may end up releasing the last reference on the VM
1267 * object, triggering its termination, while still holding
1268 * paging_in_progress references. Waiting for these
1269 * pending paging_in_progress references to go away here would
1270 * deadlock.
1271 *
1272 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1273 * complete the VM object termination if it still holds
1274 * paging_in_progress references at this point.
1275 *
1276 * No new paging_in_progress should appear now that the
1277 * VM object is "terminating" and not "alive".
1278 */
1279 vm_object_reap_async(object);
8f6c56a5 1280 vm_object_unlock(object);
6601e61a
A
1281 /*
1282 * Return KERN_FAILURE to let the caller know that we
1283 * haven't completed the termination and it can't drop this
1284 * object's reference on its shadow object yet.
1285 * The reaper thread will take care of that once it has
1286 * completed this object's termination.
1287 */
1288 return KERN_FAILURE;
8f6c56a5 1289 }
b0d623f7
A
1290 /*
1291 * complete the VM object termination
1292 */
8f6c56a5
A
1293 vm_object_reap(object);
1294 object = VM_OBJECT_NULL;
8f6c56a5 1295
2d21ac55 1296 /*
b0d623f7
A
1297 * the object lock was released by vm_object_reap()
1298 *
2d21ac55
A
1299 * KERN_SUCCESS means that this object has been terminated
1300 * and no longer needs its shadow object but still holds a
1301 * reference on it.
1302 * The caller is responsible for dropping that reference.
1303 * We can't call vm_object_deallocate() here because that
1304 * would create a recursion.
1305 */
8f6c56a5
A
1306 return KERN_SUCCESS;
1307}
1308
b0d623f7 1309
8f6c56a5
A
1310/*
1311 * vm_object_reap():
1312 *
1313 * Complete the termination of a VM object after it's been marked
1314 * as "terminating" and "!alive" by vm_object_terminate().
1315 *
b0d623f7
A
1316 * The VM object must be locked by caller.
1317 * The lock will be released on return and the VM object is no longer valid.
8f6c56a5
A
1318 */
1319void
1320vm_object_reap(
1321 vm_object_t object)
1322{
1323 memory_object_t pager;
8f6c56a5 1324
2d21ac55
A
1325 vm_object_lock_assert_exclusive(object);
1326 assert(object->paging_in_progress == 0);
b0d623f7 1327 assert(object->activity_in_progress == 0);
8f6c56a5
A
1328
1329 vm_object_reap_count++;
1330
0b4e3aa0
A
1331 pager = object->pager;
1332 object->pager = MEMORY_OBJECT_NULL;
1333
1334 if (pager != MEMORY_OBJECT_NULL)
91447636 1335 memory_object_control_disable(object->pager_control);
0b4e3aa0 1336
1c79356b
A
1337 object->ref_count--;
1338#if TASK_SWAPPER
1339 assert(object->res_count == 0);
1340#endif /* TASK_SWAPPER */
1341
1c79356b
A
1342 assert (object->ref_count == 0);
1343
b0d623f7
A
1344 /*
1345 * remove from purgeable queue if it's on
1346 */
2d21ac55
A
1347 if (object->objq.next || object->objq.prev) {
1348 purgeable_q_t queue = vm_purgeable_object_remove(object);
1349 assert(queue);
1350
1351 /* Must take page lock for this - using it to protect token queue */
1352 vm_page_lock_queues();
1353 vm_purgeable_token_delete_first(queue);
1354
1355 assert(queue->debug_count_objects>=0);
1356 vm_page_unlock_queues();
1357 }
1358
1c79356b
A
1359 /*
1360 * Clean or free the pages, as appropriate.
1361 * It is possible for us to find busy/absent pages,
1362 * if some faults on this object were aborted.
1363 */
1364 if (object->pageout) {
8f6c56a5 1365 assert(object->shadow != VM_OBJECT_NULL);
1c79356b
A
1366
1367 vm_pageout_object_terminate(object);
1368
b0d623f7 1369 } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) {
2d21ac55 1370
b0d623f7 1371 vm_object_reap_pages(object, REAP_REAP);
1c79356b 1372 }
b0d623f7 1373 assert(queue_empty(&object->memq));
1c79356b 1374 assert(object->paging_in_progress == 0);
b0d623f7 1375 assert(object->activity_in_progress == 0);
1c79356b
A
1376 assert(object->ref_count == 0);
1377
1c79356b 1378 /*
0b4e3aa0
A
1379 * If the pager has not already been released by
1380 * vm_object_destroy, we need to terminate it and
1381 * release our reference to it here.
1c79356b 1382 */
0b4e3aa0
A
1383 if (pager != MEMORY_OBJECT_NULL) {
1384 vm_object_unlock(object);
b0d623f7 1385 vm_object_release_pager(pager, object->hashed);
0b4e3aa0 1386 vm_object_lock(object);
1c79356b 1387 }
0b4e3aa0 1388
1c79356b 1389 /* kick off anyone waiting on terminating */
0b4e3aa0 1390 object->terminating = FALSE;
1c79356b
A
1391 vm_object_paging_begin(object);
1392 vm_object_paging_end(object);
1393 vm_object_unlock(object);
1394
1395#if MACH_PAGEMAP
1396 vm_external_destroy(object->existence_map, object->size);
1397#endif /* MACH_PAGEMAP */
1398
6601e61a
A
1399 object->shadow = VM_OBJECT_NULL;
1400
2d21ac55 1401 vm_object_lock_destroy(object);
1c79356b
A
1402 /*
1403 * Free the space for the object.
1404 */
91447636 1405 zfree(vm_object_zone, object);
8f6c56a5
A
1406 object = VM_OBJECT_NULL;
1407}
1408
8f6c56a5 1409
8f6c56a5 1410
b0d623f7
A
1411#define V_O_R_MAX_BATCH 128
1412
1413
1414#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \
1415 MACRO_BEGIN \
1416 if (_local_free_q) { \
1417 if (do_disconnect) { \
1418 vm_page_t m; \
1419 for (m = _local_free_q; \
1420 m != VM_PAGE_NULL; \
1421 m = (vm_page_t) m->pageq.next) { \
1422 if (m->pmapped) { \
1423 pmap_disconnect(m->phys_page); \
1424 } \
1425 } \
1426 } \
1427 vm_page_free_list(_local_free_q, TRUE); \
1428 _local_free_q = VM_PAGE_NULL; \
1429 } \
1430 MACRO_END
1431
8f6c56a5
A
1432
1433void
b0d623f7
A
1434vm_object_reap_pages(
1435 vm_object_t object,
1436 int reap_type)
8f6c56a5 1437{
b0d623f7
A
1438 vm_page_t p;
1439 vm_page_t next;
1440 vm_page_t local_free_q = VM_PAGE_NULL;
1441 int loop_count;
1442 boolean_t disconnect_on_release;
8f6c56a5 1443
b0d623f7 1444 if (reap_type == REAP_DATA_FLUSH) {
2d21ac55 1445 /*
b0d623f7
A
1446 * We need to disconnect pages from all pmaps before
1447 * releasing them to the free list
2d21ac55 1448 */
b0d623f7
A
1449 disconnect_on_release = TRUE;
1450 } else {
1451 /*
1452 * Either the caller has already disconnected the pages
1453 * from all pmaps, or we disconnect them here as we add
1454 * them to out local list of pages to be released.
1455 * No need to re-disconnect them when we release the pages
1456 * to the free list.
1457 */
1458 disconnect_on_release = FALSE;
1459 }
1460
1461restart_after_sleep:
1462 if (queue_empty(&object->memq))
1463 return;
1464 loop_count = V_O_R_MAX_BATCH + 1;
1465
1466 vm_page_lockspin_queues();
1467
1468 next = (vm_page_t)queue_first(&object->memq);
1469
1470 while (!queue_end(&object->memq, (queue_entry_t)next)) {
1471
1472 p = next;
1473 next = (vm_page_t)queue_next(&next->listq);
1474
1475 if (--loop_count == 0) {
1476
1477 vm_page_unlock_queues();
1478
1479 if (local_free_q) {
1480 /*
1481 * Free the pages we reclaimed so far
1482 * and take a little break to avoid
1483 * hogging the page queue lock too long
1484 */
1485 VM_OBJ_REAP_FREELIST(local_free_q,
1486 disconnect_on_release);
1487 } else
1488 mutex_pause(0);
1489
1490 loop_count = V_O_R_MAX_BATCH + 1;
1491
1492 vm_page_lockspin_queues();
1493 }
1494 if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) {
1495
0b4c1975
A
1496 if (reap_type == REAP_DATA_FLUSH &&
1497 ((p->pageout == TRUE || p->cleaning == TRUE) && p->list_req_pending == TRUE)) {
b0d623f7
A
1498 p->list_req_pending = FALSE;
1499 p->cleaning = FALSE;
b0d623f7
A
1500 /*
1501 * need to drop the laundry count...
1502 * we may also need to remove it
1503 * from the I/O paging queue...
1504 * vm_pageout_throttle_up handles both cases
1505 *
1506 * the laundry and pageout_queue flags are cleared...
1507 */
d1ecb069
A
1508#if CONFIG_EMBEDDED
1509 if (p->laundry)
1510 vm_pageout_throttle_up(p);
1511#else
b0d623f7 1512 vm_pageout_throttle_up(p);
d1ecb069 1513#endif
0b4c1975
A
1514 if (p->pageout == TRUE) {
1515 /*
1516 * toss the wire count we picked up
1517 * when we initially set this page up
1518 * to be cleaned and stolen...
1519 */
1520 vm_page_unwire(p, TRUE);
1521 p->pageout = FALSE;
1522 }
b0d623f7
A
1523 PAGE_WAKEUP(p);
1524
1525 } else if (p->busy || p->cleaning) {
1526
1527 vm_page_unlock_queues();
1528 /*
1529 * free the pages reclaimed so far
1530 */
1531 VM_OBJ_REAP_FREELIST(local_free_q,
1532 disconnect_on_release);
1533
1534 PAGE_SLEEP(object, p, THREAD_UNINT);
1535
1536 goto restart_after_sleep;
1537 }
1538 }
1539 switch (reap_type) {
1540
1541 case REAP_DATA_FLUSH:
1542 if (VM_PAGE_WIRED(p)) {
1543 /*
1544 * this is an odd case... perhaps we should
1545 * zero-fill this page since we're conceptually
1546 * tossing its data at this point, but leaving
1547 * it on the object to honor the 'wire' contract
1548 */
1549 continue;
1550 }
1551 break;
1552
1553 case REAP_PURGEABLE:
1554 if (VM_PAGE_WIRED(p)) {
1555 /* can't purge a wired page */
1556 vm_page_purged_wired++;
1557 continue;
1558 }
1559
1560 if (p->busy) {
1561 /*
1562 * We can't reclaim a busy page but we can
1563 * make it pageable (it's not wired) to make
1564 * sure that it gets considered by
1565 * vm_pageout_scan() later.
1566 */
1567 vm_page_deactivate(p);
1568 vm_page_purged_busy++;
1569 continue;
1570 }
1571
1572 if (p->cleaning || p->laundry || p->list_req_pending) {
1573 /*
1574 * page is being acted upon,
1575 * so don't mess with it
1576 */
1577 vm_page_purged_others++;
1578 continue;
1579 }
1580 assert(p->object != kernel_object);
1581
1582 /*
1583 * we can discard this page...
1584 */
1585 if (p->pmapped == TRUE) {
1586 int refmod_state;
1587 /*
1588 * unmap the page
1589 */
1590 refmod_state = pmap_disconnect(p->phys_page);
1591 if (refmod_state & VM_MEM_MODIFIED) {
1592 p->dirty = TRUE;
1593 }
1594 }
1595 if (p->dirty || p->precious) {
1596 /*
1597 * we saved the cost of cleaning this page !
1598 */
1599 vm_page_purged_count++;
1600 }
1601
1602 break;
1603
1604 case REAP_TERMINATE:
1605 if (p->absent || p->private) {
1606 /*
1607 * For private pages, VM_PAGE_FREE just
1608 * leaves the page structure around for
1609 * its owner to clean up. For absent
1610 * pages, the structure is returned to
1611 * the appropriate pool.
1612 */
1613 break;
1614 }
1615 if (p->fictitious) {
1616 assert (p->phys_page == vm_page_guard_addr);
1617 break;
1618 }
1619 if (!p->dirty && p->wpmapped)
1620 p->dirty = pmap_is_modified(p->phys_page);
1621
1622 if ((p->dirty || p->precious) && !p->error && object->alive) {
1623
1624 p->busy = TRUE;
1625
1626 VM_PAGE_QUEUES_REMOVE(p);
1627
1628 vm_page_unlock_queues();
1629 /*
1630 * free the pages reclaimed so far
1631 */
1632 VM_OBJ_REAP_FREELIST(local_free_q,
1633 disconnect_on_release);
1634
1635 /*
1636 * flush page... page will be freed
1637 * upon completion of I/O
1638 */
1639 vm_pageout_cluster(p);
1640 vm_object_paging_wait(object, THREAD_UNINT);
1641
1642 goto restart_after_sleep;
1643 }
1644 break;
1645
1646 case REAP_REAP:
1647 break;
1648 }
1649 vm_page_free_prepare_queues(p);
1650 assert(p->pageq.next == NULL && p->pageq.prev == NULL);
1651 /*
1652 * Add this page to our list of reclaimed pages,
1653 * to be freed later.
1654 */
1655 p->pageq.next = (queue_entry_t) local_free_q;
1656 local_free_q = p;
1657 }
1658 vm_page_unlock_queues();
1659
1660 /*
1661 * Free the remaining reclaimed pages
1662 */
1663 VM_OBJ_REAP_FREELIST(local_free_q,
1664 disconnect_on_release);
1665}
1666
1667
1668void
1669vm_object_reap_async(
1670 vm_object_t object)
1671{
1672 vm_object_lock_assert_exclusive(object);
1673
1674 vm_object_reaper_lock_spin();
1675
1676 vm_object_reap_count_async++;
1677
1678 /* enqueue the VM object... */
1679 queue_enter(&vm_object_reaper_queue, object,
1680 vm_object_t, cached_list);
1681
1682 vm_object_reaper_unlock();
1683
1684 /* ... and wake up the reaper thread */
1685 thread_wakeup((event_t) &vm_object_reaper_queue);
1686}
1687
1688
1689void
1690vm_object_reaper_thread(void)
1691{
1692 vm_object_t object, shadow_object;
1693
1694 vm_object_reaper_lock_spin();
1695
1696 while (!queue_empty(&vm_object_reaper_queue)) {
1697 queue_remove_first(&vm_object_reaper_queue,
1698 object,
1699 vm_object_t,
1700 cached_list);
1701
1702 vm_object_reaper_unlock();
1703 vm_object_lock(object);
1704
1705 assert(object->terminating);
1706 assert(!object->alive);
1707
1708 /*
1709 * The pageout daemon might be playing with our pages.
1710 * Now that the object is dead, it won't touch any more
1711 * pages, but some pages might already be on their way out.
1712 * Hence, we wait until the active paging activities have
1713 * ceased before we break the association with the pager
1714 * itself.
1715 */
1716 while (object->paging_in_progress != 0 ||
1717 object->activity_in_progress != 0) {
1718 vm_object_wait(object,
1719 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1720 THREAD_UNINT);
1721 vm_object_lock(object);
1722 }
1723
1724 shadow_object =
1725 object->pageout ? VM_OBJECT_NULL : object->shadow;
6601e61a 1726
8f6c56a5
A
1727 vm_object_reap(object);
1728 /* cache is unlocked and object is no longer valid */
1729 object = VM_OBJECT_NULL;
1730
6601e61a
A
1731 if (shadow_object != VM_OBJECT_NULL) {
1732 /*
1733 * Drop the reference "object" was holding on
1734 * its shadow object.
1735 */
1736 vm_object_deallocate(shadow_object);
1737 shadow_object = VM_OBJECT_NULL;
1738 }
b0d623f7 1739 vm_object_reaper_lock_spin();
8f6c56a5
A
1740 }
1741
1742 /* wait for more work... */
1743 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT);
b0d623f7
A
1744
1745 vm_object_reaper_unlock();
1746
8f6c56a5
A
1747 thread_block((thread_continue_t) vm_object_reaper_thread);
1748 /*NOTREACHED*/
1c79356b
A
1749}
1750
1751/*
1752 * Routine: vm_object_pager_wakeup
1753 * Purpose: Wake up anyone waiting for termination of a pager.
1754 */
1755
0b4e3aa0 1756static void
1c79356b 1757vm_object_pager_wakeup(
0b4e3aa0 1758 memory_object_t pager)
1c79356b
A
1759{
1760 vm_object_hash_entry_t entry;
1761 boolean_t waiting = FALSE;
b0d623f7 1762 lck_mtx_t *lck;
1c79356b
A
1763
1764 /*
1765 * If anyone was waiting for the memory_object_terminate
1766 * to be queued, wake them up now.
1767 */
b0d623f7 1768 lck = vm_object_hash_lock_spin(pager);
1c79356b
A
1769 entry = vm_object_hash_lookup(pager, TRUE);
1770 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1771 waiting = entry->waiting;
b0d623f7
A
1772 vm_object_hash_unlock(lck);
1773
1c79356b
A
1774 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1775 if (waiting)
1776 thread_wakeup((event_t) pager);
1777 vm_object_hash_entry_free(entry);
1778 }
1779}
1780
1781/*
0b4e3aa0
A
1782 * Routine: vm_object_release_pager
1783 * Purpose: Terminate the pager and, upon completion,
1784 * release our last reference to it.
1785 * just like memory_object_terminate, except
1786 * that we wake up anyone blocked in vm_object_enter
1787 * waiting for termination message to be queued
1788 * before calling memory_object_init.
1c79356b 1789 */
0b4e3aa0
A
1790static void
1791vm_object_release_pager(
b0d623f7
A
1792 memory_object_t pager,
1793 boolean_t hashed)
1c79356b 1794{
1c79356b 1795
0b4e3aa0
A
1796 /*
1797 * Terminate the pager.
1798 */
1c79356b 1799
0b4e3aa0 1800 (void) memory_object_terminate(pager);
1c79356b 1801
b0d623f7
A
1802 if (hashed == TRUE) {
1803 /*
1804 * Wakeup anyone waiting for this terminate
1805 * and remove the entry from the hash
1806 */
1807 vm_object_pager_wakeup(pager);
1808 }
0b4e3aa0
A
1809 /*
1810 * Release reference to pager.
1811 */
1812 memory_object_deallocate(pager);
1813}
1c79356b 1814
1c79356b 1815/*
0b4e3aa0 1816 * Routine: vm_object_destroy
1c79356b 1817 * Purpose:
0b4e3aa0 1818 * Shut down a VM object, despite the
1c79356b
A
1819 * presence of address map (or other) references
1820 * to the vm_object.
1821 */
1822kern_return_t
0b4e3aa0
A
1823vm_object_destroy(
1824 vm_object_t object,
91447636 1825 __unused kern_return_t reason)
1c79356b 1826{
0b4e3aa0 1827 memory_object_t old_pager;
1c79356b
A
1828
1829 if (object == VM_OBJECT_NULL)
1830 return(KERN_SUCCESS);
1831
1832 /*
0b4e3aa0 1833 * Remove the pager association immediately.
1c79356b
A
1834 *
1835 * This will prevent the memory manager from further
1836 * meddling. [If it wanted to flush data or make
1837 * other changes, it should have done so before performing
1838 * the destroy call.]
1839 */
1840
1c79356b 1841 vm_object_lock(object);
1c79356b
A
1842 object->can_persist = FALSE;
1843 object->named = FALSE;
0b4e3aa0 1844 object->alive = FALSE;
1c79356b 1845
b0d623f7
A
1846 if (object->hashed) {
1847 lck_mtx_t *lck;
1848 /*
1849 * Rip out the pager from the vm_object now...
1850 */
1851 lck = vm_object_hash_lock_spin(object->pager);
1852 vm_object_remove(object);
1853 vm_object_hash_unlock(lck);
1854 }
0b4e3aa0
A
1855 old_pager = object->pager;
1856 object->pager = MEMORY_OBJECT_NULL;
1857 if (old_pager != MEMORY_OBJECT_NULL)
91447636 1858 memory_object_control_disable(object->pager_control);
1c79356b
A
1859
1860 /*
b0d623f7
A
1861 * Wait for the existing paging activity (that got
1862 * through before we nulled out the pager) to subside.
1863 */
1864
1865 vm_object_paging_wait(object, THREAD_UNINT);
1866 vm_object_unlock(object);
1867
1868 /*
1869 * Terminate the object now.
1870 */
1871 if (old_pager != MEMORY_OBJECT_NULL) {
1872 vm_object_release_pager(old_pager, object->hashed);
1873
1874 /*
1875 * JMM - Release the caller's reference. This assumes the
1876 * caller had a reference to release, which is a big (but
1877 * currently valid) assumption if this is driven from the
1878 * vnode pager (it is holding a named reference when making
1879 * this call)..
1880 */
1881 vm_object_deallocate(object);
1882
1883 }
1884 return(KERN_SUCCESS);
1885}
1886
1887
1888#define VM_OBJ_DEACT_ALL_STATS DEBUG
1889#if VM_OBJ_DEACT_ALL_STATS
1890uint32_t vm_object_deactivate_all_pages_batches = 0;
1891uint32_t vm_object_deactivate_all_pages_pages = 0;
1892#endif /* VM_OBJ_DEACT_ALL_STATS */
1893/*
1894 * vm_object_deactivate_all_pages
1895 *
1896 * Deactivate all pages in the specified object. (Keep its pages
1897 * in memory even though it is no longer referenced.)
1898 *
1899 * The object must be locked.
1900 */
1901static void
1902vm_object_deactivate_all_pages(
1903 register vm_object_t object)
1904{
1905 register vm_page_t p;
1906 int loop_count;
1907#if VM_OBJ_DEACT_ALL_STATS
1908 int pages_count;
1909#endif /* VM_OBJ_DEACT_ALL_STATS */
1910#define V_O_D_A_P_MAX_BATCH 256
1911
1912 loop_count = V_O_D_A_P_MAX_BATCH;
1913#if VM_OBJ_DEACT_ALL_STATS
1914 pages_count = 0;
1915#endif /* VM_OBJ_DEACT_ALL_STATS */
1916 vm_page_lock_queues();
1917 queue_iterate(&object->memq, p, vm_page_t, listq) {
1918 if (--loop_count == 0) {
1919#if VM_OBJ_DEACT_ALL_STATS
1920 hw_atomic_add(&vm_object_deactivate_all_pages_batches,
1921 1);
1922 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1923 pages_count);
1924 pages_count = 0;
1925#endif /* VM_OBJ_DEACT_ALL_STATS */
1926 lck_mtx_yield(&vm_page_queue_lock);
1927 loop_count = V_O_D_A_P_MAX_BATCH;
1928 }
1929 if (!p->busy && !p->throttled) {
1930#if VM_OBJ_DEACT_ALL_STATS
1931 pages_count++;
1932#endif /* VM_OBJ_DEACT_ALL_STATS */
1933 vm_page_deactivate(p);
1934 }
1935 }
1936#if VM_OBJ_DEACT_ALL_STATS
1937 if (pages_count) {
1938 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1);
1939 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1940 pages_count);
1941 pages_count = 0;
1942 }
1943#endif /* VM_OBJ_DEACT_ALL_STATS */
1944 vm_page_unlock_queues();
1945}
1946
1947
1948
1949/*
1950 * when deallocating pages it is necessary to hold
1951 * the vm_page_queue_lock (a hot global lock) for certain operations
1952 * on the page... however, the majority of the work can be done
1953 * while merely holding the object lock... to mitigate the time spent behind the
1954 * global lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
1955 * while doing all of the work that doesn't require the vm_page_queue_lock...
1956 * them call dw_do_work to acquire the vm_page_queue_lock and do the
1957 * necessary work for each page... we will grab the busy bit on the page
1958 * so that dw_do_work can drop the object lock if it can't immediately take the
1959 * vm_page_queue_lock in order to compete for the locks in the same order that
1960 * vm_pageout_scan takes them.
1961 */
1962
1963#define DELAYED_WORK_LIMIT 32
1964
1965#define DW_clear_reference 0x01
1966#define DW_move_page 0x02
1967#define DW_clear_busy 0x04
1968#define DW_PAGE_WAKEUP 0x08
1969
1970
1971struct dw {
1972 vm_page_t dw_m;
1973 int dw_mask;
1974};
1975
1976static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count);
1977
1978
1979static void
1980dw_do_work(
1981 vm_object_t object,
1982 struct dw *dwp,
1983 int dw_count)
1984{
1985 vm_page_t m;
1986 int j;
1987
1988 /*
1989 * pageout_scan takes the vm_page_lock_queues first
1990 * then tries for the object lock... to avoid what
1991 * is effectively a lock inversion, we'll go to the
1992 * trouble of taking them in that same order... otherwise
1993 * if this object contains the majority of the pages resident
1994 * in the UBC (or a small set of large objects actively being
1995 * worked on contain the majority of the pages), we could
1996 * cause the pageout_scan thread to 'starve' in its attempt
1997 * to find pages to move to the free queue, since it has to
1998 * successfully acquire the object lock of any candidate page
1999 * before it can steal/clean it.
2000 */
2001 if (!vm_page_trylockspin_queues()) {
2002 vm_object_unlock(object);
2003
2004 vm_page_lockspin_queues();
2005
2006 for (j = 0; ; j++) {
2007 if (!vm_object_lock_avoid(object) &&
2008 _vm_object_lock_try(object))
2009 break;
2010 vm_page_unlock_queues();
2011 mutex_pause(j);
2012 vm_page_lockspin_queues();
2013 }
2014 }
2015 for (j = 0; j < dw_count; j++, dwp++) {
2016
2017 m = dwp->dw_m;
2018
2019 if (dwp->dw_mask & DW_clear_reference)
2020 m->reference = FALSE;
2021
2022 if (dwp->dw_mask & DW_move_page) {
2023 VM_PAGE_QUEUES_REMOVE(m);
2024
2025 assert(!m->laundry);
2026 assert(m->object != kernel_object);
2027 assert(m->pageq.next == NULL &&
2028 m->pageq.prev == NULL);
2029
2030 if (m->zero_fill) {
2031 queue_enter_first(&vm_page_queue_zf, m, vm_page_t, pageq);
2032 vm_zf_queue_count++;
2033 } else {
2034 queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq);
2035 }
2036 m->inactive = TRUE;
2037
2038 if (!m->fictitious) {
2039 vm_page_inactive_count++;
2040 token_new_pagecount++;
2041 } else {
2042 assert(m->phys_page == vm_page_fictitious_addr);
2043 }
2044 }
2045 if (dwp->dw_mask & DW_clear_busy)
2046 dwp->dw_m->busy = FALSE;
2047
2048 if (dwp->dw_mask & DW_PAGE_WAKEUP)
2049 PAGE_WAKEUP(dwp->dw_m);
2050 }
2051 vm_page_unlock_queues();
2052
2053#if CONFIG_EMBEDDED
2054 {
2055 int percent_avail;
2056
2057 /*
2058 * Decide if we need to send a memory status notification.
2059 */
2060 percent_avail =
2061 (vm_page_active_count + vm_page_inactive_count +
2062 vm_page_speculative_count + vm_page_free_count +
2063 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2064 atop_64(max_mem);
2065 if (percent_avail >= (kern_memorystatus_level + 5) ||
2066 percent_avail <= (kern_memorystatus_level - 5)) {
2067 kern_memorystatus_level = percent_avail;
2068 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2069 }
2070 }
2071#endif
2072}
2073
2074
2075
2076/*
2077 * The "chunk" macros are used by routines below when looking for pages to deactivate. These
2078 * exist because of the need to handle shadow chains. When deactivating pages, we only
2079 * want to deactive the ones at the top most level in the object chain. In order to do
2080 * this efficiently, the specified address range is divided up into "chunks" and we use
2081 * a bit map to keep track of which pages have already been processed as we descend down
2082 * the shadow chain. These chunk macros hide the details of the bit map implementation
2083 * as much as we can.
2084 *
2085 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is
2086 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest
2087 * order bit represents page 0 in the current range and highest order bit represents
2088 * page 63.
2089 *
2090 * For further convenience, we also use negative logic for the page state in the bit map.
2091 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has
2092 * been processed. This way we can simply test the 64-bit long word to see if it's zero
2093 * to easily tell if the whole range has been processed. Therefore, the bit map starts
2094 * out with all the bits set. The macros below hide all these details from the caller.
2095 */
2096
2097#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */
2098 /* be the same as the number of bits in */
2099 /* the chunk_state_t type. We use 64 */
2100 /* just for convenience. */
2101
2102#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */
2103
2104typedef uint64_t chunk_state_t;
2105
2106/*
2107 * The bit map uses negative logic, so we start out with all 64 bits set to indicate
2108 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE,
2109 * then we mark pages beyond the len as having been "processed" so that we don't waste time
2110 * looking at pages in that range. This can save us from unnecessarily chasing down the
2111 * shadow chain.
2112 */
2113
2114#define CHUNK_INIT(c, len) \
2115 MACRO_BEGIN \
2116 uint64_t p; \
2117 \
2118 (c) = 0xffffffffffffffffLL; \
2119 \
2120 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \
2121 MARK_PAGE_HANDLED(c, p); \
2122 MACRO_END
2123
2124/*
2125 * Return true if all pages in the chunk have not yet been processed.
2126 */
2127
2128#define CHUNK_NOT_COMPLETE(c) ((c) != 0)
2129
2130/*
2131 * Return true if the page at offset 'p' in the bit map has already been handled
2132 * while processing a higher level object in the shadow chain.
2133 */
2134
2135#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0)
2136
2137/*
2138 * Mark the page at offset 'p' in the bit map as having been processed.
2139 */
2140
2141#define MARK_PAGE_HANDLED(c, p) \
2142MACRO_BEGIN \
2143 (c) = (c) & ~(1LL << (p)); \
2144MACRO_END
2145
2146
2147/*
2148 * Return true if the page at the given offset has been paged out. Object is
2149 * locked upon entry and returned locked.
2150 */
2151
2152static boolean_t
2153page_is_paged_out(
2154 vm_object_t object,
2155 vm_object_offset_t offset)
2156{
2157 kern_return_t kr;
2158 memory_object_t pager;
2159
2160 /*
2161 * Check the existence map for the page if we have one, otherwise
2162 * ask the pager about this page.
2163 */
2164
2165#if MACH_PAGEMAP
2166 if (object->existence_map) {
2167 if (vm_external_state_get(object->existence_map, offset)
2168 == VM_EXTERNAL_STATE_EXISTS) {
2169 /*
2170 * We found the page
2171 */
2172
2173 return TRUE;
2174 }
2175 } else
2176#endif
2177 if (object->internal &&
2178 object->alive &&
2179 !object->terminating &&
2180 object->pager_ready) {
2181
2182 /*
2183 * We're already holding a "paging in progress" reference
2184 * so the object can't disappear when we release the lock.
2185 */
2186
2187 assert(object->paging_in_progress);
2188 pager = object->pager;
2189 vm_object_unlock(object);
2190
2191 kr = memory_object_data_request(
2192 pager,
2193 offset + object->paging_offset,
2194 0, /* just poke the pager */
2195 VM_PROT_READ,
2196 NULL);
2197
2198 vm_object_lock(object);
2199
2200 if (kr == KERN_SUCCESS) {
2201
2202 /*
2203 * We found the page
2204 */
2205
2206 return TRUE;
2207 }
2208 }
2209
2210 return FALSE;
2211}
2212
2213
2214/*
2215 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any
2216 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify
2217 * a size that is less than or equal to the CHUNK_SIZE.
2218 */
2219
2220static void
2221deactivate_pages_in_object(
2222 vm_object_t object,
2223 vm_object_offset_t offset,
2224 vm_object_size_t size,
2225 boolean_t kill_page,
2226 boolean_t reusable_page,
2227#if !MACH_ASSERT
2228 __unused
2229#endif
2230 boolean_t all_reusable,
2231 chunk_state_t *chunk_state)
2232{
2233 vm_page_t m;
2234 int p;
2235 struct dw dw_array[DELAYED_WORK_LIMIT];
2236 struct dw *dwp;
2237 int dw_count;
2238 unsigned int reusable = 0;
2239
2240
2241 /*
2242 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the
2243 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may
2244 * have pages marked as having been processed already. We stop the loop early if we find we've handled
2245 * all the pages in the chunk.
2246 */
2247
2248 dwp = &dw_array[0];
2249 dw_count = 0;
2250
2251 for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) {
2252
2253 /*
2254 * If this offset has already been found and handled in a higher level object, then don't
2255 * do anything with it in the current shadow object.
2256 */
2257
2258 if (PAGE_ALREADY_HANDLED(*chunk_state, p))
2259 continue;
2260
2261 /*
2262 * See if the page at this offset is around. First check to see if the page is resident,
2263 * then if not, check the existence map or with the pager.
2264 */
2265
2266 if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
2267
2268 /*
2269 * We found a page we were looking for. Mark it as "handled" now in the chunk_state
2270 * so that we won't bother looking for a page at this offset again if there are more
2271 * shadow objects. Then deactivate the page.
2272 */
2273
2274 MARK_PAGE_HANDLED(*chunk_state, p);
2275
2276 if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy)) {
2277 int clear_refmod;
2278
2279 assert(!m->laundry);
2280
2281 clear_refmod = VM_MEM_REFERENCED;
2282 dwp->dw_mask = DW_clear_reference;
2283
2284 if ((kill_page) && (object->internal)) {
2285 m->precious = FALSE;
2286 m->dirty = FALSE;
2287
2288 clear_refmod |= VM_MEM_MODIFIED;
d1ecb069
A
2289 if (m->throttled) {
2290 /*
2291 * This page is now clean and
2292 * reclaimable. Move it out
2293 * of the throttled queue, so
2294 * that vm_pageout_scan() can
2295 * find it.
2296 */
2297 dwp->dw_mask |= DW_move_page;
2298 }
b0d623f7
A
2299#if MACH_PAGEMAP
2300 vm_external_state_clr(object->existence_map, offset);
2301#endif /* MACH_PAGEMAP */
2302
2303 if (reusable_page && !m->reusable) {
2304 assert(!all_reusable);
2305 assert(!object->all_reusable);
2306 m->reusable = TRUE;
2307 object->reusable_page_count++;
2308 assert(object->resident_page_count >= object->reusable_page_count);
2309 reusable++;
d1ecb069
A
2310#if CONFIG_EMBEDDED
2311 } else {
2312 if (m->reusable) {
2313 m->reusable = FALSE;
2314 object->reusable_page_count--;
2315 }
2316#endif
b0d623f7
A
2317 }
2318 }
2319 pmap_clear_refmod(m->phys_page, clear_refmod);
2320
2321 if (!m->throttled && !(reusable_page || all_reusable))
2322 dwp->dw_mask |= DW_move_page;
2323 /*
2324 * dw_do_work may need to drop the object lock
2325 * if it does, we need the pages its looking at to
2326 * be held stable via the busy bit.
2327 */
2328 m->busy = TRUE;
2329 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
2330
2331 dwp->dw_m = m;
2332 dwp++;
2333 dw_count++;
2334
2335 if (dw_count >= DELAYED_WORK_LIMIT) {
2336 if (reusable) {
2337 OSAddAtomic(reusable,
2338 &vm_page_stats_reusable.reusable_count);
2339 vm_page_stats_reusable.reusable += reusable;
2340 reusable = 0;
2341 }
2342 dw_do_work(object, &dw_array[0], dw_count);
2343
2344 dwp = &dw_array[0];
2345 dw_count = 0;
2346 }
2347 }
2348
2349 } else {
2350
2351 /*
2352 * The page at this offset isn't memory resident, check to see if it's
2353 * been paged out. If so, mark it as handled so we don't bother looking
2354 * for it in the shadow chain.
2355 */
2356
2357 if (page_is_paged_out(object, offset)) {
2358 MARK_PAGE_HANDLED(*chunk_state, p);
2359
2360 /*
2361 * If we're killing a non-resident page, then clear the page in the existence
2362 * map so we don't bother paging it back in if it's touched again in the future.
2363 */
2364
2365 if ((kill_page) && (object->internal)) {
2366#if MACH_PAGEMAP
2367 vm_external_state_clr(object->existence_map, offset);
2368#endif /* MACH_PAGEMAP */
2369 }
2370 }
2371 }
2372 }
2373
2374 if (reusable) {
2375 OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count);
2376 vm_page_stats_reusable.reusable += reusable;
2377 reusable = 0;
2378 }
2379
2380 if (dw_count)
2381 dw_do_work(object, &dw_array[0], dw_count);
2382}
2383
2384
2385/*
2386 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk"
2387 * will always be less than or equal to the given size. The total range is divided up
2388 * into chunks for efficiency and performance related to the locks and handling the shadow
2389 * chain. This routine returns how much of the given "size" it actually processed. It's
2390 * up to the caler to loop and keep calling this routine until the entire range they want
2391 * to process has been done.
2392 */
2393
2394static vm_object_size_t
2395deactivate_a_chunk(
2396 vm_object_t orig_object,
2397 vm_object_offset_t offset,
2398 vm_object_size_t size,
2399 boolean_t kill_page,
2400 boolean_t reusable_page,
2401 boolean_t all_reusable)
2402{
2403 vm_object_t object;
2404 vm_object_t tmp_object;
2405 vm_object_size_t length;
2406 chunk_state_t chunk_state;
2407
2408
2409 /*
2410 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the
2411 * remaining size the caller asked for.
2412 */
2413
2414 length = MIN(size, CHUNK_SIZE);
2415
2416 /*
2417 * The chunk_state keeps track of which pages we've already processed if there's
2418 * a shadow chain on this object. At this point, we haven't done anything with this
2419 * range of pages yet, so initialize the state to indicate no pages processed yet.
1c79356b
A
2420 */
2421
b0d623f7
A
2422 CHUNK_INIT(chunk_state, length);
2423 object = orig_object;
1c79356b
A
2424
2425 /*
b0d623f7
A
2426 * Start at the top level object and iterate around the loop once for each object
2427 * in the shadow chain. We stop processing early if we've already found all the pages
2428 * in the range. Otherwise we stop when we run out of shadow objects.
1c79356b 2429 */
0b4e3aa0 2430
b0d623f7
A
2431 while (object && CHUNK_NOT_COMPLETE(chunk_state)) {
2432 vm_object_paging_begin(object);
2433
2434 deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state);
2435
2436 vm_object_paging_end(object);
2437
2438 /*
2439 * We've finished with this object, see if there's a shadow object. If
2440 * there is, update the offset and lock the new object. We also turn off
2441 * kill_page at this point since we only kill pages in the top most object.
0b4e3aa0 2442 */
1c79356b 2443
b0d623f7
A
2444 tmp_object = object->shadow;
2445
2446 if (tmp_object) {
2447 kill_page = FALSE;
2448 reusable_page = FALSE;
2449 all_reusable = FALSE;
2450 offset += object->shadow_offset;
2451 vm_object_lock(tmp_object);
2452 }
2453
2454 if (object != orig_object)
2455 vm_object_unlock(object);
2456
2457 object = tmp_object;
1c79356b 2458 }
b0d623f7
A
2459
2460 if (object && object != orig_object)
2461 vm_object_unlock(object);
2462
2463 return length;
1c79356b
A
2464}
2465
b0d623f7
A
2466
2467
1c79356b 2468/*
b0d623f7
A
2469 * Move any resident pages in the specified range to the inactive queue. If kill_page is set,
2470 * we also clear the modified status of the page and "forget" any changes that have been made
2471 * to the page.
1c79356b 2472 */
1c79356b 2473
0b4e3aa0
A
2474__private_extern__ void
2475vm_object_deactivate_pages(
2476 vm_object_t object,
2477 vm_object_offset_t offset,
2478 vm_object_size_t size,
b0d623f7
A
2479 boolean_t kill_page,
2480 boolean_t reusable_page)
0b4e3aa0 2481{
b0d623f7
A
2482 vm_object_size_t length;
2483 boolean_t all_reusable;
0b4e3aa0
A
2484
2485 /*
b0d623f7
A
2486 * We break the range up into chunks and do one chunk at a time. This is for
2487 * efficiency and performance while handling the shadow chains and the locks.
2488 * The deactivate_a_chunk() function returns how much of the range it processed.
2489 * We keep calling this routine until the given size is exhausted.
0b4e3aa0 2490 */
0b4e3aa0 2491
0b4e3aa0 2492
b0d623f7
A
2493 all_reusable = FALSE;
2494 if (reusable_page &&
2495 object->size != 0 &&
2496 object->size == size &&
2497 object->reusable_page_count == 0) {
2498 all_reusable = TRUE;
2499 reusable_page = FALSE;
2500 }
0b4e3aa0 2501
d1ecb069
A
2502#if CONFIG_EMBEDDED
2503 if ((reusable_page || all_reusable) && object->all_reusable) {
2504 /* This means MADV_FREE_REUSABLE has been called twice, which
2505 * is probably illegal. */
2506 return;
2507 }
2508#endif
2509
b0d623f7
A
2510 while (size) {
2511 length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable);
0b4e3aa0 2512
b0d623f7
A
2513 size -= length;
2514 offset += length;
2515 }
91447636 2516
b0d623f7
A
2517 if (all_reusable) {
2518 if (!object->all_reusable) {
2519 unsigned int reusable;
2520
2521 object->all_reusable = TRUE;
2522 assert(object->reusable_page_count == 0);
2523 /* update global stats */
2524 reusable = object->resident_page_count;
2525 OSAddAtomic(reusable,
2526 &vm_page_stats_reusable.reusable_count);
2527 vm_page_stats_reusable.reusable += reusable;
2528 vm_page_stats_reusable.all_reusable_calls++;
2529 }
2530 } else if (reusable_page) {
2531 vm_page_stats_reusable.partial_reusable_calls++;
2532 }
2533}
0b4e3aa0 2534
b0d623f7
A
2535void
2536vm_object_reuse_pages(
2537 vm_object_t object,
2538 vm_object_offset_t start_offset,
2539 vm_object_offset_t end_offset,
2540 boolean_t allow_partial_reuse)
2541{
2542 vm_object_offset_t cur_offset;
2543 vm_page_t m;
2544 unsigned int reused, reusable;
0b4e3aa0 2545
b0d623f7
A
2546#define VM_OBJECT_REUSE_PAGE(object, m, reused) \
2547 MACRO_BEGIN \
2548 if ((m) != VM_PAGE_NULL && \
2549 (m)->reusable) { \
2550 assert((object)->reusable_page_count <= \
2551 (object)->resident_page_count); \
2552 assert((object)->reusable_page_count > 0); \
2553 (object)->reusable_page_count--; \
2554 (m)->reusable = FALSE; \
2555 (reused)++; \
2556 } \
2557 MACRO_END
2d21ac55 2558
b0d623f7
A
2559 reused = 0;
2560 reusable = 0;
0b4e3aa0 2561
b0d623f7 2562 vm_object_lock_assert_exclusive(object);
0b4e3aa0 2563
b0d623f7
A
2564 if (object->all_reusable) {
2565 assert(object->reusable_page_count == 0);
2566 object->all_reusable = FALSE;
2567 if (end_offset - start_offset == object->size ||
2568 !allow_partial_reuse) {
2569 vm_page_stats_reusable.all_reuse_calls++;
2570 reused = object->resident_page_count;
2571 } else {
2572 vm_page_stats_reusable.partial_reuse_calls++;
2573 queue_iterate(&object->memq, m, vm_page_t, listq) {
2574 if (m->offset < start_offset ||
2575 m->offset >= end_offset) {
2576 m->reusable = TRUE;
2577 object->reusable_page_count++;
2578 assert(object->resident_page_count >= object->reusable_page_count);
2579 continue;
2580 } else {
2581 assert(!m->reusable);
2582 reused++;
0b4e3aa0
A
2583 }
2584 }
2585 }
b0d623f7
A
2586 } else if (object->resident_page_count >
2587 ((end_offset - start_offset) >> PAGE_SHIFT)) {
2588 vm_page_stats_reusable.partial_reuse_calls++;
2589 for (cur_offset = start_offset;
2590 cur_offset < end_offset;
2591 cur_offset += PAGE_SIZE_64) {
2592 if (object->reusable_page_count == 0) {
2593 break;
2594 }
2595 m = vm_page_lookup(object, cur_offset);
2596 VM_OBJECT_REUSE_PAGE(object, m, reused);
2597 }
2598 } else {
2599 vm_page_stats_reusable.partial_reuse_calls++;
2600 queue_iterate(&object->memq, m, vm_page_t, listq) {
2601 if (object->reusable_page_count == 0) {
2602 break;
2603 }
2604 if (m->offset < start_offset ||
2605 m->offset >= end_offset) {
2606 continue;
2607 }
2608 VM_OBJECT_REUSE_PAGE(object, m, reused);
2609 }
0b4e3aa0 2610 }
b0d623f7
A
2611
2612 /* update global stats */
2613 OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count);
2614 vm_page_stats_reusable.reused += reused;
2615 vm_page_stats_reusable.reusable += reusable;
0b4e3aa0 2616}
1c79356b
A
2617
2618/*
2619 * Routine: vm_object_pmap_protect
2620 *
2621 * Purpose:
2622 * Reduces the permission for all physical
2623 * pages in the specified object range.
2624 *
2625 * If removing write permission only, it is
2626 * sufficient to protect only the pages in
2627 * the top-level object; only those pages may
2628 * have write permission.
2629 *
2630 * If removing all access, we must follow the
2631 * shadow chain from the top-level object to
2632 * remove access to all pages in shadowed objects.
2633 *
2634 * The object must *not* be locked. The object must
2635 * be temporary/internal.
2636 *
2637 * If pmap is not NULL, this routine assumes that
2638 * the only mappings for the pages are in that
2639 * pmap.
2640 */
2641
0b4e3aa0 2642__private_extern__ void
1c79356b
A
2643vm_object_pmap_protect(
2644 register vm_object_t object,
2645 register vm_object_offset_t offset,
91447636 2646 vm_object_size_t size,
1c79356b 2647 pmap_t pmap,
91447636 2648 vm_map_offset_t pmap_start,
1c79356b
A
2649 vm_prot_t prot)
2650{
2651 if (object == VM_OBJECT_NULL)
2652 return;
91447636
A
2653 size = vm_object_round_page(size);
2654 offset = vm_object_trunc_page(offset);
1c79356b
A
2655
2656 vm_object_lock(object);
2657
2d21ac55
A
2658 if (object->phys_contiguous) {
2659 if (pmap != NULL) {
2660 vm_object_unlock(object);
2661 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
2662 } else {
2663 vm_object_offset_t phys_start, phys_end, phys_addr;
2664
2665 phys_start = object->shadow_offset + offset;
2666 phys_end = phys_start + size;
2667 assert(phys_start <= phys_end);
2668 assert(phys_end <= object->shadow_offset + object->size);
2669 vm_object_unlock(object);
2670
2671 for (phys_addr = phys_start;
2672 phys_addr < phys_end;
2673 phys_addr += PAGE_SIZE_64) {
b0d623f7 2674 pmap_page_protect((ppnum_t) (phys_addr >> PAGE_SHIFT), prot);
2d21ac55
A
2675 }
2676 }
2677 return;
2678 }
2679
55e303ae 2680 assert(object->internal);
de355530 2681
1c79356b 2682 while (TRUE) {
91447636 2683 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1c79356b
A
2684 vm_object_unlock(object);
2685 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
2686 return;
2687 }
2688
9bccf70c
A
2689 /* if we are doing large ranges with respect to resident */
2690 /* page count then we should interate over pages otherwise */
2691 /* inverse page look-up will be faster */
91447636 2692 if (ptoa_64(object->resident_page_count / 4) < size) {
9bccf70c
A
2693 vm_page_t p;
2694 vm_object_offset_t end;
1c79356b
A
2695
2696 end = offset + size;
2697
2698 if (pmap != PMAP_NULL) {
2699 queue_iterate(&object->memq, p, vm_page_t, listq) {
2700 if (!p->fictitious &&
2701 (offset <= p->offset) && (p->offset < end)) {
91447636 2702 vm_map_offset_t start;
1c79356b 2703
91447636
A
2704 start = pmap_start + p->offset - offset;
2705 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1c79356b
A
2706 }
2707 }
2708 } else {
2709 queue_iterate(&object->memq, p, vm_page_t, listq) {
2710 if (!p->fictitious &&
2711 (offset <= p->offset) && (p->offset < end)) {
2712
2d21ac55 2713 pmap_page_protect(p->phys_page, prot);
1c79356b
A
2714 }
2715 }
2716 }
9bccf70c
A
2717 } else {
2718 vm_page_t p;
2719 vm_object_offset_t end;
2720 vm_object_offset_t target_off;
2721
2722 end = offset + size;
2723
2724 if (pmap != PMAP_NULL) {
2725 for(target_off = offset;
91447636
A
2726 target_off < end;
2727 target_off += PAGE_SIZE) {
2728 p = vm_page_lookup(object, target_off);
2729 if (p != VM_PAGE_NULL) {
b0d623f7 2730 vm_object_offset_t start;
91447636 2731 start = pmap_start +
b0d623f7 2732 (p->offset - offset);
9bccf70c 2733 pmap_protect(pmap, start,
b0d623f7 2734 start + PAGE_SIZE, prot);
9bccf70c
A
2735 }
2736 }
2737 } else {
2738 for(target_off = offset;
2739 target_off < end; target_off += PAGE_SIZE) {
91447636
A
2740 p = vm_page_lookup(object, target_off);
2741 if (p != VM_PAGE_NULL) {
2d21ac55 2742 pmap_page_protect(p->phys_page, prot);
9bccf70c
A
2743 }
2744 }
2745 }
2746 }
1c79356b
A
2747
2748 if (prot == VM_PROT_NONE) {
2749 /*
2750 * Must follow shadow chain to remove access
2751 * to pages in shadowed objects.
2752 */
2753 register vm_object_t next_object;
2754
2755 next_object = object->shadow;
2756 if (next_object != VM_OBJECT_NULL) {
2757 offset += object->shadow_offset;
2758 vm_object_lock(next_object);
2759 vm_object_unlock(object);
2760 object = next_object;
2761 }
2762 else {
2763 /*
2764 * End of chain - we are done.
2765 */
2766 break;
2767 }
2768 }
2769 else {
2770 /*
2771 * Pages in shadowed objects may never have
2772 * write permission - we may stop here.
2773 */
2774 break;
2775 }
2776 }
2777
2778 vm_object_unlock(object);
2779}
2780
2781/*
2782 * Routine: vm_object_copy_slowly
2783 *
2784 * Description:
2785 * Copy the specified range of the source
2786 * virtual memory object without using
2787 * protection-based optimizations (such
2788 * as copy-on-write). The pages in the
2789 * region are actually copied.
2790 *
2791 * In/out conditions:
2792 * The caller must hold a reference and a lock
2793 * for the source virtual memory object. The source
2794 * object will be returned *unlocked*.
2795 *
2796 * Results:
2797 * If the copy is completed successfully, KERN_SUCCESS is
2798 * returned. If the caller asserted the interruptible
2799 * argument, and an interruption occurred while waiting
2800 * for a user-generated event, MACH_SEND_INTERRUPTED is
2801 * returned. Other values may be returned to indicate
2802 * hard errors during the copy operation.
2803 *
2804 * A new virtual memory object is returned in a
2805 * parameter (_result_object). The contents of this
2806 * new object, starting at a zero offset, are a copy
2807 * of the source memory region. In the event of
2808 * an error, this parameter will contain the value
2809 * VM_OBJECT_NULL.
2810 */
0b4e3aa0 2811__private_extern__ kern_return_t
1c79356b
A
2812vm_object_copy_slowly(
2813 register vm_object_t src_object,
2814 vm_object_offset_t src_offset,
2815 vm_object_size_t size,
2816 boolean_t interruptible,
2817 vm_object_t *_result_object) /* OUT */
2818{
2819 vm_object_t new_object;
2820 vm_object_offset_t new_offset;
2821
2d21ac55 2822 struct vm_object_fault_info fault_info;
1c79356b
A
2823
2824 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
2825 src_object, src_offset, size, 0, 0);
2826
2827 if (size == 0) {
2828 vm_object_unlock(src_object);
2829 *_result_object = VM_OBJECT_NULL;
2830 return(KERN_INVALID_ARGUMENT);
2831 }
2832
2833 /*
2834 * Prevent destruction of the source object while we copy.
2835 */
2836
2d21ac55 2837 vm_object_reference_locked(src_object);
1c79356b
A
2838 vm_object_unlock(src_object);
2839
2840 /*
2841 * Create a new object to hold the copied pages.
2842 * A few notes:
2843 * We fill the new object starting at offset 0,
2844 * regardless of the input offset.
2845 * We don't bother to lock the new object within
2846 * this routine, since we have the only reference.
2847 */
2848
2849 new_object = vm_object_allocate(size);
2850 new_offset = 0;
2851
2852 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
2853
2d21ac55
A
2854 fault_info.interruptible = interruptible;
2855 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
2856 fault_info.user_tag = 0;
2857 fault_info.lo_offset = src_offset;
2858 fault_info.hi_offset = src_offset + size;
2859 fault_info.no_cache = FALSE;
b0d623f7 2860 fault_info.stealth = TRUE;
0b4c1975 2861 fault_info.mark_zf_absent = FALSE;
2d21ac55 2862
1c79356b
A
2863 for ( ;
2864 size != 0 ;
2865 src_offset += PAGE_SIZE_64,
2866 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
2867 ) {
2868 vm_page_t new_page;
2869 vm_fault_return_t result;
2870
2d21ac55
A
2871 vm_object_lock(new_object);
2872
1c79356b
A
2873 while ((new_page = vm_page_alloc(new_object, new_offset))
2874 == VM_PAGE_NULL) {
2d21ac55
A
2875
2876 vm_object_unlock(new_object);
2877
1c79356b
A
2878 if (!vm_page_wait(interruptible)) {
2879 vm_object_deallocate(new_object);
91447636 2880 vm_object_deallocate(src_object);
1c79356b
A
2881 *_result_object = VM_OBJECT_NULL;
2882 return(MACH_SEND_INTERRUPTED);
2883 }
2d21ac55 2884 vm_object_lock(new_object);
1c79356b 2885 }
2d21ac55 2886 vm_object_unlock(new_object);
1c79356b
A
2887
2888 do {
2889 vm_prot_t prot = VM_PROT_READ;
2890 vm_page_t _result_page;
2891 vm_page_t top_page;
2892 register
2893 vm_page_t result_page;
2894 kern_return_t error_code;
2895
2896 vm_object_lock(src_object);
2897 vm_object_paging_begin(src_object);
2898
b0d623f7
A
2899 if (size > (vm_size_t) -1) {
2900 /* 32-bit overflow */
2901 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE);
2902 } else {
2903 fault_info.cluster_size = (vm_size_t) size;
2904 assert(fault_info.cluster_size == size);
2905 }
2d21ac55 2906
1c79356b
A
2907 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
2908 result = vm_fault_page(src_object, src_offset,
2d21ac55 2909 VM_PROT_READ, FALSE,
1c79356b
A
2910 &prot, &_result_page, &top_page,
2911 (int *)0,
2d21ac55 2912 &error_code, FALSE, FALSE, &fault_info);
1c79356b
A
2913
2914 switch(result) {
b0d623f7
A
2915 case VM_FAULT_SUCCESS:
2916 result_page = _result_page;
1c79356b 2917
b0d623f7
A
2918 /*
2919 * We don't need to hold the object
2920 * lock -- the busy page will be enough.
2921 * [We don't care about picking up any
2922 * new modifications.]
2923 *
2924 * Copy the page to the new object.
2925 *
2926 * POLICY DECISION:
2927 * If result_page is clean,
2928 * we could steal it instead
2929 * of copying.
2930 */
1c79356b 2931
b0d623f7
A
2932 vm_object_unlock(result_page->object);
2933 vm_page_copy(result_page, new_page);
1c79356b 2934
b0d623f7
A
2935 /*
2936 * Let go of both pages (make them
2937 * not busy, perform wakeup, activate).
2938 */
2939 vm_object_lock(new_object);
2940 new_page->dirty = TRUE;
2941 PAGE_WAKEUP_DONE(new_page);
2942 vm_object_unlock(new_object);
1c79356b 2943
b0d623f7
A
2944 vm_object_lock(result_page->object);
2945 PAGE_WAKEUP_DONE(result_page);
1c79356b 2946
b0d623f7
A
2947 vm_page_lockspin_queues();
2948 if (!result_page->active &&
2949 !result_page->inactive &&
2950 !result_page->throttled)
2951 vm_page_activate(result_page);
2952 vm_page_activate(new_page);
2953 vm_page_unlock_queues();
1c79356b 2954
b0d623f7
A
2955 /*
2956 * Release paging references and
2957 * top-level placeholder page, if any.
2958 */
2959
2960 vm_fault_cleanup(result_page->object,
2961 top_page);
2962
2963 break;
1c79356b 2964
b0d623f7
A
2965 case VM_FAULT_RETRY:
2966 break;
2967
2968 case VM_FAULT_FICTITIOUS_SHORTAGE:
2969 vm_page_more_fictitious();
2970 break;
1c79356b 2971
b0d623f7
A
2972 case VM_FAULT_MEMORY_SHORTAGE:
2973 if (vm_page_wait(interruptible))
1c79356b 2974 break;
b0d623f7 2975 /* fall thru */
1c79356b 2976
b0d623f7
A
2977 case VM_FAULT_INTERRUPTED:
2978 vm_object_lock(new_object);
2979 VM_PAGE_FREE(new_page);
2980 vm_object_unlock(new_object);
2981
2982 vm_object_deallocate(new_object);
2983 vm_object_deallocate(src_object);
2984 *_result_object = VM_OBJECT_NULL;
2985 return(MACH_SEND_INTERRUPTED);
1c79356b 2986
b0d623f7
A
2987 case VM_FAULT_SUCCESS_NO_VM_PAGE:
2988 /* success but no VM page: fail */
2989 vm_object_paging_end(src_object);
2990 vm_object_unlock(src_object);
2991 /*FALLTHROUGH*/
2992 case VM_FAULT_MEMORY_ERROR:
2993 /*
2994 * A policy choice:
2995 * (a) ignore pages that we can't
2996 * copy
2997 * (b) return the null object if
2998 * any page fails [chosen]
2999 */
593a1d5f 3000
b0d623f7
A
3001 vm_object_lock(new_object);
3002 VM_PAGE_FREE(new_page);
3003 vm_object_unlock(new_object);
1c79356b 3004
b0d623f7
A
3005 vm_object_deallocate(new_object);
3006 vm_object_deallocate(src_object);
3007 *_result_object = VM_OBJECT_NULL;
3008 return(error_code ? error_code:
3009 KERN_MEMORY_ERROR);
1c79356b 3010
b0d623f7
A
3011 default:
3012 panic("vm_object_copy_slowly: unexpected error"
3013 " 0x%x from vm_fault_page()\n", result);
1c79356b
A
3014 }
3015 } while (result != VM_FAULT_SUCCESS);
3016 }
3017
3018 /*
3019 * Lose the extra reference, and return our object.
3020 */
1c79356b
A
3021 vm_object_deallocate(src_object);
3022 *_result_object = new_object;
3023 return(KERN_SUCCESS);
3024}
3025
3026/*
3027 * Routine: vm_object_copy_quickly
3028 *
3029 * Purpose:
3030 * Copy the specified range of the source virtual
3031 * memory object, if it can be done without waiting
3032 * for user-generated events.
3033 *
3034 * Results:
3035 * If the copy is successful, the copy is returned in
3036 * the arguments; otherwise, the arguments are not
3037 * affected.
3038 *
3039 * In/out conditions:
3040 * The object should be unlocked on entry and exit.
3041 */
3042
3043/*ARGSUSED*/
0b4e3aa0 3044__private_extern__ boolean_t
1c79356b
A
3045vm_object_copy_quickly(
3046 vm_object_t *_object, /* INOUT */
91447636
A
3047 __unused vm_object_offset_t offset, /* IN */
3048 __unused vm_object_size_t size, /* IN */
1c79356b
A
3049 boolean_t *_src_needs_copy, /* OUT */
3050 boolean_t *_dst_needs_copy) /* OUT */
3051{
3052 vm_object_t object = *_object;
3053 memory_object_copy_strategy_t copy_strategy;
3054
3055 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
3056 *_object, offset, size, 0, 0);
3057 if (object == VM_OBJECT_NULL) {
3058 *_src_needs_copy = FALSE;
3059 *_dst_needs_copy = FALSE;
3060 return(TRUE);
3061 }
3062
3063 vm_object_lock(object);
3064
3065 copy_strategy = object->copy_strategy;
3066
3067 switch (copy_strategy) {
3068 case MEMORY_OBJECT_COPY_SYMMETRIC:
3069
3070 /*
3071 * Symmetric copy strategy.
3072 * Make another reference to the object.
3073 * Leave object/offset unchanged.
3074 */
3075
2d21ac55 3076 vm_object_reference_locked(object);
1c79356b
A
3077 object->shadowed = TRUE;
3078 vm_object_unlock(object);
3079
3080 /*
3081 * Both source and destination must make
3082 * shadows, and the source must be made
3083 * read-only if not already.
3084 */
3085
3086 *_src_needs_copy = TRUE;
3087 *_dst_needs_copy = TRUE;
3088
3089 break;
3090
3091 case MEMORY_OBJECT_COPY_DELAY:
3092 vm_object_unlock(object);
3093 return(FALSE);
3094
3095 default:
3096 vm_object_unlock(object);
3097 return(FALSE);
3098 }
3099 return(TRUE);
3100}
3101
0b4e3aa0
A
3102static int copy_call_count = 0;
3103static int copy_call_sleep_count = 0;
3104static int copy_call_restart_count = 0;
1c79356b
A
3105
3106/*
3107 * Routine: vm_object_copy_call [internal]
3108 *
3109 * Description:
3110 * Copy the source object (src_object), using the
3111 * user-managed copy algorithm.
3112 *
3113 * In/out conditions:
3114 * The source object must be locked on entry. It
3115 * will be *unlocked* on exit.
3116 *
3117 * Results:
3118 * If the copy is successful, KERN_SUCCESS is returned.
3119 * A new object that represents the copied virtual
3120 * memory is returned in a parameter (*_result_object).
3121 * If the return value indicates an error, this parameter
3122 * is not valid.
3123 */
0b4e3aa0 3124static kern_return_t
1c79356b
A
3125vm_object_copy_call(
3126 vm_object_t src_object,
3127 vm_object_offset_t src_offset,
3128 vm_object_size_t size,
3129 vm_object_t *_result_object) /* OUT */
3130{
3131 kern_return_t kr;
3132 vm_object_t copy;
3133 boolean_t check_ready = FALSE;
2d21ac55 3134 uint32_t try_failed_count = 0;
1c79356b
A
3135
3136 /*
3137 * If a copy is already in progress, wait and retry.
3138 *
3139 * XXX
3140 * Consider making this call interruptable, as Mike
3141 * intended it to be.
3142 *
3143 * XXXO
3144 * Need a counter or version or something to allow
3145 * us to use the copy that the currently requesting
3146 * thread is obtaining -- is it worth adding to the
3147 * vm object structure? Depends how common this case it.
3148 */
3149 copy_call_count++;
3150 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 3151 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 3152 THREAD_UNINT);
1c79356b
A
3153 copy_call_restart_count++;
3154 }
3155
3156 /*
3157 * Indicate (for the benefit of memory_object_create_copy)
3158 * that we want a copy for src_object. (Note that we cannot
3159 * do a real assert_wait before calling memory_object_copy,
3160 * so we simply set the flag.)
3161 */
3162
3163 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
3164 vm_object_unlock(src_object);
3165
3166 /*
3167 * Ask the memory manager to give us a memory object
3168 * which represents a copy of the src object.
3169 * The memory manager may give us a memory object
3170 * which we already have, or it may give us a
3171 * new memory object. This memory object will arrive
3172 * via memory_object_create_copy.
3173 */
3174
3175 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
3176 if (kr != KERN_SUCCESS) {
3177 return kr;
3178 }
3179
3180 /*
3181 * Wait for the copy to arrive.
3182 */
3183 vm_object_lock(src_object);
3184 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
9bccf70c 3185 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1c79356b 3186 THREAD_UNINT);
1c79356b
A
3187 copy_call_sleep_count++;
3188 }
3189Retry:
3190 assert(src_object->copy != VM_OBJECT_NULL);
3191 copy = src_object->copy;
3192 if (!vm_object_lock_try(copy)) {
3193 vm_object_unlock(src_object);
2d21ac55
A
3194
3195 try_failed_count++;
3196 mutex_pause(try_failed_count); /* wait a bit */
3197
1c79356b
A
3198 vm_object_lock(src_object);
3199 goto Retry;
3200 }
3201 if (copy->size < src_offset+size)
3202 copy->size = src_offset+size;
3203
3204 if (!copy->pager_ready)
3205 check_ready = TRUE;
3206
3207 /*
3208 * Return the copy.
3209 */
3210 *_result_object = copy;
3211 vm_object_unlock(copy);
3212 vm_object_unlock(src_object);
3213
3214 /* Wait for the copy to be ready. */
3215 if (check_ready == TRUE) {
3216 vm_object_lock(copy);
3217 while (!copy->pager_ready) {
9bccf70c 3218 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1c79356b
A
3219 }
3220 vm_object_unlock(copy);
3221 }
3222
3223 return KERN_SUCCESS;
3224}
3225
0b4e3aa0
A
3226static int copy_delayed_lock_collisions = 0;
3227static int copy_delayed_max_collisions = 0;
3228static int copy_delayed_lock_contention = 0;
3229static int copy_delayed_protect_iterate = 0;
1c79356b
A
3230
3231/*
3232 * Routine: vm_object_copy_delayed [internal]
3233 *
3234 * Description:
3235 * Copy the specified virtual memory object, using
3236 * the asymmetric copy-on-write algorithm.
3237 *
3238 * In/out conditions:
55e303ae
A
3239 * The src_object must be locked on entry. It will be unlocked
3240 * on exit - so the caller must also hold a reference to it.
1c79356b
A
3241 *
3242 * This routine will not block waiting for user-generated
3243 * events. It is not interruptible.
3244 */
0b4e3aa0 3245__private_extern__ vm_object_t
1c79356b
A
3246vm_object_copy_delayed(
3247 vm_object_t src_object,
3248 vm_object_offset_t src_offset,
2d21ac55
A
3249 vm_object_size_t size,
3250 boolean_t src_object_shared)
1c79356b
A
3251{
3252 vm_object_t new_copy = VM_OBJECT_NULL;
3253 vm_object_t old_copy;
3254 vm_page_t p;
55e303ae 3255 vm_object_size_t copy_size = src_offset + size;
1c79356b 3256
2d21ac55 3257
1c79356b
A
3258 int collisions = 0;
3259 /*
3260 * The user-level memory manager wants to see all of the changes
3261 * to this object, but it has promised not to make any changes on
3262 * its own.
3263 *
3264 * Perform an asymmetric copy-on-write, as follows:
3265 * Create a new object, called a "copy object" to hold
3266 * pages modified by the new mapping (i.e., the copy,
3267 * not the original mapping).
3268 * Record the original object as the backing object for
3269 * the copy object. If the original mapping does not
3270 * change a page, it may be used read-only by the copy.
3271 * Record the copy object in the original object.
3272 * When the original mapping causes a page to be modified,
3273 * it must be copied to a new page that is "pushed" to
3274 * the copy object.
3275 * Mark the new mapping (the copy object) copy-on-write.
3276 * This makes the copy object itself read-only, allowing
3277 * it to be reused if the original mapping makes no
3278 * changes, and simplifying the synchronization required
3279 * in the "push" operation described above.
3280 *
3281 * The copy-on-write is said to be assymetric because the original
3282 * object is *not* marked copy-on-write. A copied page is pushed
3283 * to the copy object, regardless which party attempted to modify
3284 * the page.
3285 *
3286 * Repeated asymmetric copy operations may be done. If the
3287 * original object has not been changed since the last copy, its
3288 * copy object can be reused. Otherwise, a new copy object can be
3289 * inserted between the original object and its previous copy
3290 * object. Since any copy object is read-only, this cannot affect
3291 * affect the contents of the previous copy object.
3292 *
3293 * Note that a copy object is higher in the object tree than the
3294 * original object; therefore, use of the copy object recorded in
3295 * the original object must be done carefully, to avoid deadlock.
3296 */
3297
3298 Retry:
1c79356b 3299
55e303ae
A
3300 /*
3301 * Wait for paging in progress.
3302 */
b0d623f7
A
3303 if (!src_object->true_share &&
3304 (src_object->paging_in_progress != 0 ||
3305 src_object->activity_in_progress != 0)) {
2d21ac55
A
3306 if (src_object_shared == TRUE) {
3307 vm_object_unlock(src_object);
2d21ac55
A
3308 vm_object_lock(src_object);
3309 src_object_shared = FALSE;
b0d623f7 3310 goto Retry;
2d21ac55 3311 }
55e303ae 3312 vm_object_paging_wait(src_object, THREAD_UNINT);
2d21ac55 3313 }
1c79356b
A
3314 /*
3315 * See whether we can reuse the result of a previous
3316 * copy operation.
3317 */
3318
3319 old_copy = src_object->copy;
3320 if (old_copy != VM_OBJECT_NULL) {
2d21ac55
A
3321 int lock_granted;
3322
1c79356b
A
3323 /*
3324 * Try to get the locks (out of order)
3325 */
2d21ac55
A
3326 if (src_object_shared == TRUE)
3327 lock_granted = vm_object_lock_try_shared(old_copy);
3328 else
3329 lock_granted = vm_object_lock_try(old_copy);
3330
3331 if (!lock_granted) {
1c79356b 3332 vm_object_unlock(src_object);
1c79356b 3333
1c79356b
A
3334 if (collisions++ == 0)
3335 copy_delayed_lock_contention++;
2d21ac55
A
3336 mutex_pause(collisions);
3337
3338 /* Heisenberg Rules */
3339 copy_delayed_lock_collisions++;
1c79356b
A
3340
3341 if (collisions > copy_delayed_max_collisions)
3342 copy_delayed_max_collisions = collisions;
3343
2d21ac55
A
3344 if (src_object_shared == TRUE)
3345 vm_object_lock_shared(src_object);
3346 else
3347 vm_object_lock(src_object);
3348
1c79356b
A
3349 goto Retry;
3350 }
3351
3352 /*
3353 * Determine whether the old copy object has
3354 * been modified.
3355 */
3356
3357 if (old_copy->resident_page_count == 0 &&
3358 !old_copy->pager_created) {
3359 /*
3360 * It has not been modified.
3361 *
3362 * Return another reference to
55e303ae
A
3363 * the existing copy-object if
3364 * we can safely grow it (if
3365 * needed).
de355530 3366 */
1c79356b 3367
55e303ae 3368 if (old_copy->size < copy_size) {
2d21ac55
A
3369 if (src_object_shared == TRUE) {
3370 vm_object_unlock(old_copy);
3371 vm_object_unlock(src_object);
3372
3373 vm_object_lock(src_object);
3374 src_object_shared = FALSE;
3375 goto Retry;
3376 }
55e303ae
A
3377 /*
3378 * We can't perform a delayed copy if any of the
3379 * pages in the extended range are wired (because
3380 * we can't safely take write permission away from
3381 * wired pages). If the pages aren't wired, then
3382 * go ahead and protect them.
3383 */
3384 copy_delayed_protect_iterate++;
2d21ac55 3385
55e303ae
A
3386 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
3387 if (!p->fictitious &&
3388 p->offset >= old_copy->size &&
3389 p->offset < copy_size) {
b0d623f7 3390 if (VM_PAGE_WIRED(p)) {
55e303ae
A
3391 vm_object_unlock(old_copy);
3392 vm_object_unlock(src_object);
91447636
A
3393
3394 if (new_copy != VM_OBJECT_NULL) {
3395 vm_object_unlock(new_copy);
3396 vm_object_deallocate(new_copy);
3397 }
3398
55e303ae
A
3399 return VM_OBJECT_NULL;
3400 } else {
3401 pmap_page_protect(p->phys_page,
2d21ac55 3402 (VM_PROT_ALL & ~VM_PROT_WRITE));
55e303ae
A
3403 }
3404 }
3405 }
3406 old_copy->size = copy_size;
3407 }
2d21ac55
A
3408 if (src_object_shared == TRUE)
3409 vm_object_reference_shared(old_copy);
3410 else
3411 vm_object_reference_locked(old_copy);
d7e50217
A
3412 vm_object_unlock(old_copy);
3413 vm_object_unlock(src_object);
91447636
A
3414
3415 if (new_copy != VM_OBJECT_NULL) {
3416 vm_object_unlock(new_copy);
3417 vm_object_deallocate(new_copy);
3418 }
55e303ae 3419 return(old_copy);
d7e50217 3420 }
2d21ac55
A
3421
3422
de355530
A
3423
3424 /*
3425 * Adjust the size argument so that the newly-created
3426 * copy object will be large enough to back either the
55e303ae 3427 * old copy object or the new mapping.
de355530 3428 */
55e303ae
A
3429 if (old_copy->size > copy_size)
3430 copy_size = old_copy->size;
3431
3432 if (new_copy == VM_OBJECT_NULL) {
3433 vm_object_unlock(old_copy);
3434 vm_object_unlock(src_object);
3435 new_copy = vm_object_allocate(copy_size);
3436 vm_object_lock(src_object);
3437 vm_object_lock(new_copy);
2d21ac55
A
3438
3439 src_object_shared = FALSE;
55e303ae
A
3440 goto Retry;
3441 }
3442 new_copy->size = copy_size;
1c79356b
A
3443
3444 /*
3445 * The copy-object is always made large enough to
3446 * completely shadow the original object, since
3447 * it may have several users who want to shadow
3448 * the original object at different points.
3449 */
3450
3451 assert((old_copy->shadow == src_object) &&
3452 (old_copy->shadow_offset == (vm_object_offset_t) 0));
3453
55e303ae
A
3454 } else if (new_copy == VM_OBJECT_NULL) {
3455 vm_object_unlock(src_object);
3456 new_copy = vm_object_allocate(copy_size);
3457 vm_object_lock(src_object);
3458 vm_object_lock(new_copy);
2d21ac55
A
3459
3460 src_object_shared = FALSE;
55e303ae
A
3461 goto Retry;
3462 }
3463
3464 /*
3465 * We now have the src object locked, and the new copy object
3466 * allocated and locked (and potentially the old copy locked).
3467 * Before we go any further, make sure we can still perform
3468 * a delayed copy, as the situation may have changed.
3469 *
3470 * Specifically, we can't perform a delayed copy if any of the
3471 * pages in the range are wired (because we can't safely take
3472 * write permission away from wired pages). If the pages aren't
3473 * wired, then go ahead and protect them.
3474 */
3475 copy_delayed_protect_iterate++;
2d21ac55 3476
55e303ae
A
3477 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
3478 if (!p->fictitious && p->offset < copy_size) {
b0d623f7 3479 if (VM_PAGE_WIRED(p)) {
55e303ae
A
3480 if (old_copy)
3481 vm_object_unlock(old_copy);
3482 vm_object_unlock(src_object);
3483 vm_object_unlock(new_copy);
3484 vm_object_deallocate(new_copy);
3485 return VM_OBJECT_NULL;
3486 } else {
3487 pmap_page_protect(p->phys_page,
2d21ac55 3488 (VM_PROT_ALL & ~VM_PROT_WRITE));
55e303ae
A
3489 }
3490 }
3491 }
55e303ae 3492 if (old_copy != VM_OBJECT_NULL) {
1c79356b
A
3493 /*
3494 * Make the old copy-object shadow the new one.
3495 * It will receive no more pages from the original
3496 * object.
3497 */
3498
2d21ac55
A
3499 /* remove ref. from old_copy */
3500 vm_object_lock_assert_exclusive(src_object);
3501 src_object->ref_count--;
1c79356b 3502 assert(src_object->ref_count > 0);
2d21ac55 3503 vm_object_lock_assert_exclusive(old_copy);
1c79356b 3504 old_copy->shadow = new_copy;
2d21ac55 3505 vm_object_lock_assert_exclusive(new_copy);
1c79356b
A
3506 assert(new_copy->ref_count > 0);
3507 new_copy->ref_count++; /* for old_copy->shadow ref. */
3508
3509#if TASK_SWAPPER
3510 if (old_copy->res_count) {
3511 VM_OBJ_RES_INCR(new_copy);
3512 VM_OBJ_RES_DECR(src_object);
3513 }
3514#endif
3515
3516 vm_object_unlock(old_copy); /* done with old_copy */
1c79356b
A
3517 }
3518
3519 /*
3520 * Point the new copy at the existing object.
3521 */
2d21ac55 3522 vm_object_lock_assert_exclusive(new_copy);
1c79356b
A
3523 new_copy->shadow = src_object;
3524 new_copy->shadow_offset = 0;
3525 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2d21ac55
A
3526
3527 vm_object_lock_assert_exclusive(src_object);
3528 vm_object_reference_locked(src_object);
1c79356b 3529 src_object->copy = new_copy;
55e303ae 3530 vm_object_unlock(src_object);
1c79356b
A
3531 vm_object_unlock(new_copy);
3532
1c79356b
A
3533 XPR(XPR_VM_OBJECT,
3534 "vm_object_copy_delayed: used copy object %X for source %X\n",
b0d623f7 3535 new_copy, src_object, 0, 0, 0);
1c79356b 3536
2d21ac55 3537 return new_copy;
1c79356b
A
3538}
3539
3540/*
3541 * Routine: vm_object_copy_strategically
3542 *
3543 * Purpose:
3544 * Perform a copy according to the source object's
3545 * declared strategy. This operation may block,
3546 * and may be interrupted.
3547 */
0b4e3aa0 3548__private_extern__ kern_return_t
1c79356b
A
3549vm_object_copy_strategically(
3550 register vm_object_t src_object,
3551 vm_object_offset_t src_offset,
3552 vm_object_size_t size,
3553 vm_object_t *dst_object, /* OUT */
3554 vm_object_offset_t *dst_offset, /* OUT */
3555 boolean_t *dst_needs_copy) /* OUT */
3556{
3557 boolean_t result;
3558 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2d21ac55 3559 boolean_t object_lock_shared = FALSE;
1c79356b
A
3560 memory_object_copy_strategy_t copy_strategy;
3561
3562 assert(src_object != VM_OBJECT_NULL);
3563
2d21ac55
A
3564 copy_strategy = src_object->copy_strategy;
3565
3566 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
3567 vm_object_lock_shared(src_object);
3568 object_lock_shared = TRUE;
3569 } else
3570 vm_object_lock(src_object);
1c79356b
A
3571
3572 /*
3573 * The copy strategy is only valid if the memory manager
3574 * is "ready". Internal objects are always ready.
3575 */
3576
3577 while (!src_object->internal && !src_object->pager_ready) {
9bccf70c 3578 wait_result_t wait_result;
1c79356b 3579
2d21ac55
A
3580 if (object_lock_shared == TRUE) {
3581 vm_object_unlock(src_object);
3582 vm_object_lock(src_object);
3583 object_lock_shared = FALSE;
3584 continue;
3585 }
9bccf70c
A
3586 wait_result = vm_object_sleep( src_object,
3587 VM_OBJECT_EVENT_PAGER_READY,
3588 interruptible);
3589 if (wait_result != THREAD_AWAKENED) {
3590 vm_object_unlock(src_object);
1c79356b
A
3591 *dst_object = VM_OBJECT_NULL;
3592 *dst_offset = 0;
3593 *dst_needs_copy = FALSE;
3594 return(MACH_SEND_INTERRUPTED);
3595 }
1c79356b
A
3596 }
3597
1c79356b
A
3598 /*
3599 * Use the appropriate copy strategy.
3600 */
3601
3602 switch (copy_strategy) {
55e303ae
A
3603 case MEMORY_OBJECT_COPY_DELAY:
3604 *dst_object = vm_object_copy_delayed(src_object,
2d21ac55 3605 src_offset, size, object_lock_shared);
55e303ae
A
3606 if (*dst_object != VM_OBJECT_NULL) {
3607 *dst_offset = src_offset;
3608 *dst_needs_copy = TRUE;
3609 result = KERN_SUCCESS;
3610 break;
3611 }
3612 vm_object_lock(src_object);
3613 /* fall thru when delayed copy not allowed */
3614
1c79356b
A
3615 case MEMORY_OBJECT_COPY_NONE:
3616 result = vm_object_copy_slowly(src_object, src_offset, size,
3617 interruptible, dst_object);
3618 if (result == KERN_SUCCESS) {
3619 *dst_offset = 0;
3620 *dst_needs_copy = FALSE;
3621 }
3622 break;
3623
3624 case MEMORY_OBJECT_COPY_CALL:
3625 result = vm_object_copy_call(src_object, src_offset, size,
3626 dst_object);
3627 if (result == KERN_SUCCESS) {
3628 *dst_offset = src_offset;
3629 *dst_needs_copy = TRUE;
3630 }
3631 break;
3632
1c79356b 3633 case MEMORY_OBJECT_COPY_SYMMETRIC:
b0d623f7 3634 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0);
1c79356b
A
3635 vm_object_unlock(src_object);
3636 result = KERN_MEMORY_RESTART_COPY;
3637 break;
3638
3639 default:
3640 panic("copy_strategically: bad strategy");
3641 result = KERN_INVALID_ARGUMENT;
3642 }
3643 return(result);
3644}
3645
3646/*
3647 * vm_object_shadow:
3648 *
3649 * Create a new object which is backed by the
3650 * specified existing object range. The source
3651 * object reference is deallocated.
3652 *
3653 * The new object and offset into that object
3654 * are returned in the source parameters.
3655 */
3656boolean_t vm_object_shadow_check = FALSE;
3657
0b4e3aa0 3658__private_extern__ boolean_t
1c79356b
A
3659vm_object_shadow(
3660 vm_object_t *object, /* IN/OUT */
3661 vm_object_offset_t *offset, /* IN/OUT */
3662 vm_object_size_t length)
3663{
3664 register vm_object_t source;
3665 register vm_object_t result;
3666
3667 source = *object;
2d21ac55
A
3668#if 0
3669 /*
3670 * XXX FBDP
3671 * This assertion is valid but it gets triggered by Rosetta for example
3672 * due to a combination of vm_remap() that changes a VM object's
3673 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
3674 * that then sets "needs_copy" on its map entry. This creates a
3675 * mapping situation that VM should never see and doesn't know how to
3676 * handle.
3677 * It's not clear if this can create any real problem but we should
3678 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
3679 * do more than just set "needs_copy" to handle the copy-on-write...
3680 * In the meantime, let's disable the assertion.
3681 */
1c79356b 3682 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2d21ac55 3683#endif
1c79356b
A
3684
3685 /*
3686 * Determine if we really need a shadow.
3687 */
3688
3689 if (vm_object_shadow_check && source->ref_count == 1 &&
3690 (source->shadow == VM_OBJECT_NULL ||
3691 source->shadow->copy == VM_OBJECT_NULL))
3692 {
3693 source->shadowed = FALSE;
3694 return FALSE;
3695 }
3696
3697 /*
3698 * Allocate a new object with the given length
3699 */
3700
3701 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
3702 panic("vm_object_shadow: no object for shadowing");
3703
3704 /*
3705 * The new object shadows the source object, adding
3706 * a reference to it. Our caller changes his reference
3707 * to point to the new object, removing a reference to
3708 * the source object. Net result: no change of reference
3709 * count.
3710 */
3711 result->shadow = source;
3712
3713 /*
3714 * Store the offset into the source object,
3715 * and fix up the offset into the new object.
3716 */
3717
3718 result->shadow_offset = *offset;
3719
3720 /*
3721 * Return the new things
3722 */
3723
3724 *offset = 0;
3725 *object = result;
3726 return TRUE;
3727}
3728
3729/*
3730 * The relationship between vm_object structures and
0b4e3aa0 3731 * the memory_object requires careful synchronization.
1c79356b 3732 *
0b4e3aa0
A
3733 * All associations are created by memory_object_create_named
3734 * for external pagers and vm_object_pager_create for internal
3735 * objects as follows:
3736 *
3737 * pager: the memory_object itself, supplied by
1c79356b
A
3738 * the user requesting a mapping (or the kernel,
3739 * when initializing internal objects); the
3740 * kernel simulates holding send rights by keeping
3741 * a port reference;
0b4e3aa0 3742 *
1c79356b
A
3743 * pager_request:
3744 * the memory object control port,
3745 * created by the kernel; the kernel holds
3746 * receive (and ownership) rights to this
3747 * port, but no other references.
1c79356b
A
3748 *
3749 * When initialization is complete, the "initialized" field
3750 * is asserted. Other mappings using a particular memory object,
3751 * and any references to the vm_object gained through the
3752 * port association must wait for this initialization to occur.
3753 *
3754 * In order to allow the memory manager to set attributes before
3755 * requests (notably virtual copy operations, but also data or
3756 * unlock requests) are made, a "ready" attribute is made available.
3757 * Only the memory manager may affect the value of this attribute.
3758 * Its value does not affect critical kernel functions, such as
3759 * internal object initialization or destruction. [Furthermore,
3760 * memory objects created by the kernel are assumed to be ready
3761 * immediately; the default memory manager need not explicitly
3762 * set the "ready" attribute.]
3763 *
3764 * [Both the "initialized" and "ready" attribute wait conditions
3765 * use the "pager" field as the wait event.]
3766 *
3767 * The port associations can be broken down by any of the
3768 * following routines:
3769 * vm_object_terminate:
3770 * No references to the vm_object remain, and
3771 * the object cannot (or will not) be cached.
3772 * This is the normal case, and is done even
3773 * though one of the other cases has already been
3774 * done.
1c79356b
A
3775 * memory_object_destroy:
3776 * The memory manager has requested that the
0b4e3aa0
A
3777 * kernel relinquish references to the memory
3778 * object. [The memory manager may not want to
3779 * destroy the memory object, but may wish to
3780 * refuse or tear down existing memory mappings.]
3781 *
1c79356b
A
3782 * Each routine that breaks an association must break all of
3783 * them at once. At some later time, that routine must clear
0b4e3aa0 3784 * the pager field and release the memory object references.
1c79356b
A
3785 * [Furthermore, each routine must cope with the simultaneous
3786 * or previous operations of the others.]
3787 *
b0d623f7 3788 * In addition to the lock on the object, the vm_object_hash_lock
0b4e3aa0 3789 * governs the associations. References gained through the
b0d623f7 3790 * association require use of the hash lock.
1c79356b 3791 *
0b4e3aa0 3792 * Because the pager field may be cleared spontaneously, it
1c79356b
A
3793 * cannot be used to determine whether a memory object has
3794 * ever been associated with a particular vm_object. [This
2d21ac55
A
3795 * knowledge is important to the shadow object mechanism.]
3796 * For this reason, an additional "created" attribute is
3797 * provided.
3798 *
3799 * During various paging operations, the pager reference found in the
3800 * vm_object must be valid. To prevent this from being released,
3801 * (other than being removed, i.e., made null), routines may use
3802 * the vm_object_paging_begin/end routines [actually, macros].
3803 * The implementation uses the "paging_in_progress" and "wanted" fields.
3804 * [Operations that alter the validity of the pager values include the
3805 * termination routines and vm_object_collapse.]
3806 */
1c79356b 3807
1c79356b
A
3808
3809/*
3810 * Routine: vm_object_enter
3811 * Purpose:
3812 * Find a VM object corresponding to the given
3813 * pager; if no such object exists, create one,
3814 * and initialize the pager.
3815 */
3816vm_object_t
3817vm_object_enter(
0b4e3aa0 3818 memory_object_t pager,
1c79356b
A
3819 vm_object_size_t size,
3820 boolean_t internal,
3821 boolean_t init,
0b4e3aa0 3822 boolean_t named)
1c79356b
A
3823{
3824 register vm_object_t object;
3825 vm_object_t new_object;
3826 boolean_t must_init;
1c79356b 3827 vm_object_hash_entry_t entry, new_entry;
2d21ac55 3828 uint32_t try_failed_count = 0;
b0d623f7 3829 lck_mtx_t *lck;
1c79356b 3830
0b4e3aa0 3831 if (pager == MEMORY_OBJECT_NULL)
1c79356b
A
3832 return(vm_object_allocate(size));
3833
3834 new_object = VM_OBJECT_NULL;
3835 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3836 must_init = init;
3837
3838 /*
3839 * Look for an object associated with this port.
3840 */
2d21ac55 3841Retry:
b0d623f7 3842 lck = vm_object_hash_lock_spin(pager);
55e303ae 3843 do {
1c79356b
A
3844 entry = vm_object_hash_lookup(pager, FALSE);
3845
55e303ae
A
3846 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
3847 if (new_object == VM_OBJECT_NULL) {
3848 /*
3849 * We must unlock to create a new object;
3850 * if we do so, we must try the lookup again.
3851 */
b0d623f7 3852 vm_object_hash_unlock(lck);
55e303ae
A
3853 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
3854 new_entry = vm_object_hash_entry_alloc(pager);
3855 new_object = vm_object_allocate(size);
b0d623f7 3856 lck = vm_object_hash_lock_spin(pager);
55e303ae
A
3857 } else {
3858 /*
3859 * Lookup failed twice, and we have something
3860 * to insert; set the object.
3861 */
b0d623f7 3862 vm_object_hash_insert(new_entry, new_object);
55e303ae 3863 entry = new_entry;
55e303ae
A
3864 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3865 new_object = VM_OBJECT_NULL;
3866 must_init = TRUE;
3867 }
3868 } else if (entry->object == VM_OBJECT_NULL) {
3869 /*
3870 * If a previous object is being terminated,
3871 * we must wait for the termination message
3872 * to be queued (and lookup the entry again).
3873 */
1c79356b 3874 entry->waiting = TRUE;
55e303ae 3875 entry = VM_OBJECT_HASH_ENTRY_NULL;
1c79356b 3876 assert_wait((event_t) pager, THREAD_UNINT);
b0d623f7
A
3877 vm_object_hash_unlock(lck);
3878
91447636 3879 thread_block(THREAD_CONTINUE_NULL);
b0d623f7 3880 lck = vm_object_hash_lock_spin(pager);
1c79356b 3881 }
55e303ae 3882 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
1c79356b
A
3883
3884 object = entry->object;
3885 assert(object != VM_OBJECT_NULL);
3886
3887 if (!must_init) {
b0d623f7 3888 if ( !vm_object_lock_try(object)) {
2d21ac55 3889
b0d623f7 3890 vm_object_hash_unlock(lck);
2d21ac55
A
3891
3892 try_failed_count++;
3893 mutex_pause(try_failed_count); /* wait a bit */
2d21ac55
A
3894 goto Retry;
3895 }
1c79356b 3896 assert(!internal || object->internal);
b0d623f7 3897#if VM_OBJECT_CACHE
1c79356b 3898 if (object->ref_count == 0) {
b0d623f7
A
3899 if ( !vm_object_cache_lock_try()) {
3900
3901 vm_object_hash_unlock(lck);
3902 vm_object_unlock(object);
3903
3904 try_failed_count++;
3905 mutex_pause(try_failed_count); /* wait a bit */
3906 goto Retry;
3907 }
1c79356b 3908 XPR(XPR_VM_OBJECT_CACHE,
b0d623f7
A
3909 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
3910 object,
3911 vm_object_cached_list.next,
3912 vm_object_cached_list.prev, 0,0);
1c79356b
A
3913 queue_remove(&vm_object_cached_list, object,
3914 vm_object_t, cached_list);
3915 vm_object_cached_count--;
b0d623f7
A
3916
3917 vm_object_cache_unlock();
3918 }
3919#endif
3920 if (named) {
3921 assert(!object->named);
3922 object->named = TRUE;
1c79356b 3923 }
2d21ac55 3924 vm_object_lock_assert_exclusive(object);
1c79356b
A
3925 object->ref_count++;
3926 vm_object_res_reference(object);
b0d623f7
A
3927
3928 vm_object_hash_unlock(lck);
1c79356b
A
3929 vm_object_unlock(object);
3930
2d21ac55 3931 VM_STAT_INCR(hits);
b0d623f7
A
3932 } else
3933 vm_object_hash_unlock(lck);
3934
1c79356b
A
3935 assert(object->ref_count > 0);
3936
2d21ac55 3937 VM_STAT_INCR(lookups);
1c79356b 3938
1c79356b
A
3939 XPR(XPR_VM_OBJECT,
3940 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
b0d623f7 3941 pager, object, must_init, 0, 0);
1c79356b
A
3942
3943 /*
3944 * If we raced to create a vm_object but lost, let's
3945 * throw away ours.
3946 */
3947
3948 if (new_object != VM_OBJECT_NULL)
3949 vm_object_deallocate(new_object);
3950
3951 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
3952 vm_object_hash_entry_free(new_entry);
3953
3954 if (must_init) {
91447636 3955 memory_object_control_t control;
1c79356b
A
3956
3957 /*
3958 * Allocate request port.
3959 */
3960
91447636
A
3961 control = memory_object_control_allocate(object);
3962 assert (control != MEMORY_OBJECT_CONTROL_NULL);
1c79356b
A
3963
3964 vm_object_lock(object);
91447636 3965 assert(object != kernel_object);
1c79356b
A
3966
3967 /*
0b4e3aa0 3968 * Copy the reference we were given.
1c79356b
A
3969 */
3970
0b4e3aa0 3971 memory_object_reference(pager);
1c79356b
A
3972 object->pager_created = TRUE;
3973 object->pager = pager;
3974 object->internal = internal;
3975 object->pager_trusted = internal;
3976 if (!internal) {
3977 /* copy strategy invalid until set by memory manager */
3978 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
3979 }
91447636 3980 object->pager_control = control;
1c79356b
A
3981 object->pager_ready = FALSE;
3982
1c79356b
A
3983 vm_object_unlock(object);
3984
3985 /*
3986 * Let the pager know we're using it.
3987 */
3988
0b4e3aa0 3989 (void) memory_object_init(pager,
91447636 3990 object->pager_control,
0b4e3aa0 3991 PAGE_SIZE);
1c79356b
A
3992
3993 vm_object_lock(object);
0b4e3aa0
A
3994 if (named)
3995 object->named = TRUE;
1c79356b
A
3996 if (internal) {
3997 object->pager_ready = TRUE;
3998 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
3999 }
4000
4001 object->pager_initialized = TRUE;
4002 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
4003 } else {
4004 vm_object_lock(object);
4005 }
4006
4007 /*
4008 * [At this point, the object must be locked]
4009 */
4010
4011 /*
4012 * Wait for the work above to be done by the first
4013 * thread to map this object.
4014 */
4015
4016 while (!object->pager_initialized) {
9bccf70c 4017 vm_object_sleep(object,
1c79356b
A
4018 VM_OBJECT_EVENT_INITIALIZED,
4019 THREAD_UNINT);
1c79356b
A
4020 }
4021 vm_object_unlock(object);
4022
4023 XPR(XPR_VM_OBJECT,
4024 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
b0d623f7 4025 object, object->pager, internal, 0,0);
1c79356b
A
4026 return(object);
4027}
4028
4029/*
4030 * Routine: vm_object_pager_create
4031 * Purpose:
4032 * Create a memory object for an internal object.
4033 * In/out conditions:
4034 * The object is locked on entry and exit;
4035 * it may be unlocked within this call.
4036 * Limitations:
4037 * Only one thread may be performing a
4038 * vm_object_pager_create on an object at
4039 * a time. Presumably, only the pageout
4040 * daemon will be using this routine.
4041 */
4042
4043void
4044vm_object_pager_create(
4045 register vm_object_t object)
4046{
0b4e3aa0 4047 memory_object_t pager;
1c79356b 4048 vm_object_hash_entry_t entry;
b0d623f7 4049 lck_mtx_t *lck;
1c79356b
A
4050#if MACH_PAGEMAP
4051 vm_object_size_t size;
4052 vm_external_map_t map;
4053#endif /* MACH_PAGEMAP */
4054
4055 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
b0d623f7 4056 object, 0,0,0,0);
1c79356b 4057
91447636
A
4058 assert(object != kernel_object);
4059
1c79356b
A
4060 if (memory_manager_default_check() != KERN_SUCCESS)
4061 return;
4062
4063 /*
4064 * Prevent collapse or termination by holding a paging reference
4065 */
4066
4067 vm_object_paging_begin(object);
4068 if (object->pager_created) {
4069 /*
4070 * Someone else got to it first...
4071 * wait for them to finish initializing the ports
4072 */
4073 while (!object->pager_initialized) {
9bccf70c
A
4074 vm_object_sleep(object,
4075 VM_OBJECT_EVENT_INITIALIZED,
4076 THREAD_UNINT);
1c79356b
A
4077 }
4078 vm_object_paging_end(object);
4079 return;
4080 }
4081
4082 /*
4083 * Indicate that a memory object has been assigned
4084 * before dropping the lock, to prevent a race.
4085 */
4086
4087 object->pager_created = TRUE;
4088 object->paging_offset = 0;
4089
4090#if MACH_PAGEMAP
4091 size = object->size;
4092#endif /* MACH_PAGEMAP */
4093 vm_object_unlock(object);
4094
4095#if MACH_PAGEMAP
4096 map = vm_external_create(size);
4097 vm_object_lock(object);
4098 assert(object->size == size);
4099 object->existence_map = map;
4100 vm_object_unlock(object);
4101#endif /* MACH_PAGEMAP */
4102
b0d623f7
A
4103 if ((uint32_t) object->size != object->size) {
4104 panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n",
4105 (uint64_t) object->size);
4106 }
4107
1c79356b 4108 /*
0b4e3aa0 4109 * Create the [internal] pager, and associate it with this object.
1c79356b 4110 *
0b4e3aa0 4111 * We make the association here so that vm_object_enter()
1c79356b
A
4112 * can look up the object to complete initializing it. No
4113 * user will ever map this object.
4114 */
4115 {
0b4e3aa0 4116 memory_object_default_t dmm;
1c79356b 4117
0b4e3aa0 4118 /* acquire a reference for the default memory manager */
2d21ac55 4119 dmm = memory_manager_default_reference();
1c79356b 4120
1c79356b
A
4121 assert(object->temporary);
4122
0b4e3aa0 4123 /* create our new memory object */
b0d623f7
A
4124 assert((vm_size_t) object->size == object->size);
4125 (void) memory_object_create(dmm, (vm_size_t) object->size,
4126 &pager);
0b4e3aa0
A
4127
4128 memory_object_default_deallocate(dmm);
1c79356b
A
4129 }
4130
4131 entry = vm_object_hash_entry_alloc(pager);
4132
b0d623f7
A
4133 lck = vm_object_hash_lock_spin(pager);
4134 vm_object_hash_insert(entry, object);
4135 vm_object_hash_unlock(lck);
1c79356b
A
4136
4137 /*
0b4e3aa0 4138 * A reference was returned by
1c79356b
A
4139 * memory_object_create(), and it is
4140 * copied by vm_object_enter().
4141 */
4142
4143 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
4144 panic("vm_object_pager_create: mismatch");
4145
4146 /*
0b4e3aa0 4147 * Drop the reference we were passed.
1c79356b 4148 */
0b4e3aa0 4149 memory_object_deallocate(pager);
1c79356b
A
4150
4151 vm_object_lock(object);
4152
4153 /*
4154 * Release the paging reference
4155 */
4156 vm_object_paging_end(object);
4157}
4158
4159/*
4160 * Routine: vm_object_remove
4161 * Purpose:
4162 * Eliminate the pager/object association
4163 * for this pager.
4164 * Conditions:
4165 * The object cache must be locked.
4166 */
0b4e3aa0 4167__private_extern__ void
1c79356b
A
4168vm_object_remove(
4169 vm_object_t object)
4170{
0b4e3aa0 4171 memory_object_t pager;
1c79356b 4172
0b4e3aa0 4173 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
1c79356b
A
4174 vm_object_hash_entry_t entry;
4175
0b4e3aa0 4176 entry = vm_object_hash_lookup(pager, FALSE);
1c79356b
A
4177 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
4178 entry->object = VM_OBJECT_NULL;
4179 }
4180
1c79356b
A
4181}
4182
4183/*
4184 * Global variables for vm_object_collapse():
4185 *
4186 * Counts for normal collapses and bypasses.
4187 * Debugging variables, to watch or disable collapse.
4188 */
0b4e3aa0
A
4189static long object_collapses = 0;
4190static long object_bypasses = 0;
1c79356b 4191
0b4e3aa0
A
4192static boolean_t vm_object_collapse_allowed = TRUE;
4193static boolean_t vm_object_bypass_allowed = TRUE;
4194
2d21ac55 4195#if MACH_PAGEMAP
0b4e3aa0
A
4196static int vm_external_discarded;
4197static int vm_external_collapsed;
2d21ac55 4198#endif
1c79356b 4199
91447636
A
4200unsigned long vm_object_collapse_encrypted = 0;
4201
1c79356b 4202/*
0b4e3aa0
A
4203 * Routine: vm_object_do_collapse
4204 * Purpose:
4205 * Collapse an object with the object backing it.
4206 * Pages in the backing object are moved into the
4207 * parent, and the backing object is deallocated.
4208 * Conditions:
4209 * Both objects and the cache are locked; the page
4210 * queues are unlocked.
1c79356b
A
4211 *
4212 */
0b4e3aa0 4213static void
1c79356b
A
4214vm_object_do_collapse(
4215 vm_object_t object,
4216 vm_object_t backing_object)
4217{
4218 vm_page_t p, pp;
4219 vm_object_offset_t new_offset, backing_offset;
4220 vm_object_size_t size;
4221
b0d623f7
A
4222 vm_object_lock_assert_exclusive(object);
4223 vm_object_lock_assert_exclusive(backing_object);
4224
1c79356b
A
4225 backing_offset = object->shadow_offset;
4226 size = object->size;
4227
1c79356b
A
4228 /*
4229 * Move all in-memory pages from backing_object
4230 * to the parent. Pages that have been paged out
4231 * will be overwritten by any of the parent's
4232 * pages that shadow them.
4233 */
4234
4235 while (!queue_empty(&backing_object->memq)) {
4236
4237 p = (vm_page_t) queue_first(&backing_object->memq);
4238
4239 new_offset = (p->offset - backing_offset);
4240
4241 assert(!p->busy || p->absent);
91447636 4242
1c79356b
A
4243 /*
4244 * If the parent has a page here, or if
4245 * this page falls outside the parent,
4246 * dispose of it.
4247 *
4248 * Otherwise, move it as planned.
4249 */
4250
4251 if (p->offset < backing_offset || new_offset >= size) {
4252 VM_PAGE_FREE(p);
4253 } else {
91447636
A
4254 /*
4255 * ENCRYPTED SWAP:
4256 * The encryption key includes the "pager" and the
2d21ac55
A
4257 * "paging_offset". These will not change during the
4258 * object collapse, so we can just move an encrypted
4259 * page from one object to the other in this case.
4260 * We can't decrypt the page here, since we can't drop
91447636 4261 * the object lock.
91447636 4262 */
2d21ac55
A
4263 if (p->encrypted) {
4264 vm_object_collapse_encrypted++;
4265 }
1c79356b
A
4266 pp = vm_page_lookup(object, new_offset);
4267 if (pp == VM_PAGE_NULL) {
4268
4269 /*
4270 * Parent now has no page.
4271 * Move the backing object's page up.
4272 */
4273
2d21ac55 4274 vm_page_rename(p, object, new_offset, TRUE);
1c79356b
A
4275#if MACH_PAGEMAP
4276 } else if (pp->absent) {
4277
4278 /*
4279 * Parent has an absent page...
4280 * it's not being paged in, so
4281 * it must really be missing from
4282 * the parent.
4283 *
4284 * Throw out the absent page...
4285 * any faults looking for that
4286 * page will restart with the new
4287 * one.
4288 */
4289
4290 VM_PAGE_FREE(pp);
2d21ac55 4291 vm_page_rename(p, object, new_offset, TRUE);
1c79356b
A
4292#endif /* MACH_PAGEMAP */
4293 } else {
4294 assert(! pp->absent);
4295
4296 /*
4297 * Parent object has a real page.
4298 * Throw away the backing object's
4299 * page.
4300 */
4301 VM_PAGE_FREE(p);
4302 }
4303 }
4304 }
4305
55e303ae 4306#if !MACH_PAGEMAP
2d21ac55 4307 assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL))
55e303ae 4308 || (!backing_object->pager_created
2d21ac55 4309 && (backing_object->pager == MEMORY_OBJECT_NULL)));
55e303ae
A
4310#else
4311 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
4312#endif /* !MACH_PAGEMAP */
1c79356b 4313
0b4e3aa0 4314 if (backing_object->pager != MEMORY_OBJECT_NULL) {
1c79356b
A
4315 vm_object_hash_entry_t entry;
4316
4317 /*
4318 * Move the pager from backing_object to object.
4319 *
4320 * XXX We're only using part of the paging space
4321 * for keeps now... we ought to discard the
4322 * unused portion.
4323 */
4324
55e303ae 4325 assert(!object->paging_in_progress);
b0d623f7 4326 assert(!object->activity_in_progress);
1c79356b 4327 object->pager = backing_object->pager;
b0d623f7
A
4328
4329 if (backing_object->hashed) {
4330 lck_mtx_t *lck;
4331
4332 lck = vm_object_hash_lock_spin(backing_object->pager);
4333 entry = vm_object_hash_lookup(object->pager, FALSE);
4334 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
4335 entry->object = object;
4336 vm_object_hash_unlock(lck);
4337
4338 object->hashed = TRUE;
4339 }
1c79356b 4340 object->pager_created = backing_object->pager_created;
91447636 4341 object->pager_control = backing_object->pager_control;
1c79356b
A
4342 object->pager_ready = backing_object->pager_ready;
4343 object->pager_initialized = backing_object->pager_initialized;
1c79356b
A
4344 object->paging_offset =
4345 backing_object->paging_offset + backing_offset;
91447636
A
4346 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
4347 memory_object_control_collapse(object->pager_control,
0b4e3aa0 4348 object);
1c79356b
A
4349 }
4350 }
4351
1c79356b
A
4352#if MACH_PAGEMAP
4353 /*
4354 * If the shadow offset is 0, the use the existence map from
4355 * the backing object if there is one. If the shadow offset is
4356 * not zero, toss it.
4357 *
4358 * XXX - If the shadow offset is not 0 then a bit copy is needed
4359 * if the map is to be salvaged. For now, we just just toss the
4360 * old map, giving the collapsed object no map. This means that
4361 * the pager is invoked for zero fill pages. If analysis shows
4362 * that this happens frequently and is a performance hit, then
4363 * this code should be fixed to salvage the map.
4364 */
4365 assert(object->existence_map == VM_EXTERNAL_NULL);
4366 if (backing_offset || (size != backing_object->size)) {
4367 vm_external_discarded++;
4368 vm_external_destroy(backing_object->existence_map,
4369 backing_object->size);
4370 }
4371 else {
4372 vm_external_collapsed++;
4373 object->existence_map = backing_object->existence_map;
4374 }
4375 backing_object->existence_map = VM_EXTERNAL_NULL;
4376#endif /* MACH_PAGEMAP */
4377
4378 /*
4379 * Object now shadows whatever backing_object did.
4380 * Note that the reference to backing_object->shadow
4381 * moves from within backing_object to within object.
4382 */
4383
91447636
A
4384 assert(!object->phys_contiguous);
4385 assert(!backing_object->phys_contiguous);
1c79356b 4386 object->shadow = backing_object->shadow;
91447636
A
4387 if (object->shadow) {
4388 object->shadow_offset += backing_object->shadow_offset;
4389 } else {
4390 /* no shadow, therefore no shadow offset... */
4391 object->shadow_offset = 0;
4392 }
1c79356b 4393 assert((object->shadow == VM_OBJECT_NULL) ||
55e303ae 4394 (object->shadow->copy != backing_object));
1c79356b
A
4395
4396 /*
4397 * Discard backing_object.
4398 *
4399 * Since the backing object has no pages, no
4400 * pager left, and no object references within it,
4401 * all that is necessary is to dispose of it.
4402 */
4403
4404 assert((backing_object->ref_count == 1) &&
4405 (backing_object->resident_page_count == 0) &&
b0d623f7
A
4406 (backing_object->paging_in_progress == 0) &&
4407 (backing_object->activity_in_progress == 0));
1c79356b 4408
1c79356b
A
4409 backing_object->alive = FALSE;
4410 vm_object_unlock(backing_object);
4411
4412 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
b0d623f7 4413 backing_object, 0,0,0,0);
1c79356b 4414
2d21ac55
A
4415 vm_object_lock_destroy(backing_object);
4416
91447636 4417 zfree(vm_object_zone, backing_object);
1c79356b
A
4418
4419 object_collapses++;
4420}
4421
0b4e3aa0 4422static void
1c79356b
A
4423vm_object_do_bypass(
4424 vm_object_t object,
4425 vm_object_t backing_object)
4426{
4427 /*
4428 * Make the parent shadow the next object
4429 * in the chain.
4430 */
4431
b0d623f7 4432 vm_object_lock_assert_exclusive(object);
2d21ac55
A
4433 vm_object_lock_assert_exclusive(backing_object);
4434
1c79356b
A
4435#if TASK_SWAPPER
4436 /*
4437 * Do object reference in-line to
4438 * conditionally increment shadow's
4439 * residence count. If object is not
4440 * resident, leave residence count
4441 * on shadow alone.
4442 */
4443 if (backing_object->shadow != VM_OBJECT_NULL) {
4444 vm_object_lock(backing_object->shadow);
2d21ac55 4445 vm_object_lock_assert_exclusive(backing_object->shadow);
1c79356b
A
4446 backing_object->shadow->ref_count++;
4447 if (object->res_count != 0)
4448 vm_object_res_reference(backing_object->shadow);
4449 vm_object_unlock(backing_object->shadow);
4450 }
4451#else /* TASK_SWAPPER */
4452 vm_object_reference(backing_object->shadow);
4453#endif /* TASK_SWAPPER */
4454
91447636
A
4455 assert(!object->phys_contiguous);
4456 assert(!backing_object->phys_contiguous);
1c79356b 4457 object->shadow = backing_object->shadow;
91447636
A
4458 if (object->shadow) {
4459 object->shadow_offset += backing_object->shadow_offset;
4460 } else {
4461 /* no shadow, therefore no shadow offset... */
4462 object->shadow_offset = 0;
4463 }
1c79356b
A
4464
4465 /*
4466 * Backing object might have had a copy pointer
4467 * to us. If it did, clear it.
4468 */
4469 if (backing_object->copy == object) {
4470 backing_object->copy = VM_OBJECT_NULL;
4471 }
4472
4473 /*
4474 * Drop the reference count on backing_object.
4475#if TASK_SWAPPER
4476 * Since its ref_count was at least 2, it
4477 * will not vanish; so we don't need to call
4478 * vm_object_deallocate.
593a1d5f 4479 * [with a caveat for "named" objects]
1c79356b
A
4480 *
4481 * The res_count on the backing object is
4482 * conditionally decremented. It's possible
4483 * (via vm_pageout_scan) to get here with
4484 * a "swapped" object, which has a 0 res_count,
4485 * in which case, the backing object res_count
4486 * is already down by one.
4487#else
4488 * Don't call vm_object_deallocate unless
4489 * ref_count drops to zero.
4490 *
4491 * The ref_count can drop to zero here if the
4492 * backing object could be bypassed but not
4493 * collapsed, such as when the backing object
4494 * is temporary and cachable.
4495#endif
4496 */
593a1d5f
A
4497 if (backing_object->ref_count > 2 ||
4498 (!backing_object->named && backing_object->ref_count > 1)) {
2d21ac55 4499 vm_object_lock_assert_exclusive(backing_object);
1c79356b
A
4500 backing_object->ref_count--;
4501#if TASK_SWAPPER
4502 if (object->res_count != 0)
4503 vm_object_res_deallocate(backing_object);
4504 assert(backing_object->ref_count > 0);
4505#endif /* TASK_SWAPPER */
4506 vm_object_unlock(backing_object);
4507 } else {
4508
4509 /*
4510 * Drop locks so that we can deallocate
4511 * the backing object.
4512 */
4513
4514#if TASK_SWAPPER
4515 if (object->res_count == 0) {
4516 /* XXX get a reference for the deallocate below */
4517 vm_object_res_reference(backing_object);
4518 }
4519#endif /* TASK_SWAPPER */
4520 vm_object_unlock(object);
4521 vm_object_unlock(backing_object);
4522 vm_object_deallocate(backing_object);
4523
4524 /*
4525 * Relock object. We don't have to reverify
4526 * its state since vm_object_collapse will
4527 * do that for us as it starts at the
4528 * top of its loop.
4529 */
4530
4531 vm_object_lock(object);
4532 }
4533
4534 object_bypasses++;
4535}
0b4e3aa0 4536
1c79356b
A
4537
4538/*
4539 * vm_object_collapse:
4540 *
4541 * Perform an object collapse or an object bypass if appropriate.
4542 * The real work of collapsing and bypassing is performed in
4543 * the routines vm_object_do_collapse and vm_object_do_bypass.
4544 *
4545 * Requires that the object be locked and the page queues be unlocked.
4546 *
4547 */
91447636
A
4548static unsigned long vm_object_collapse_calls = 0;
4549static unsigned long vm_object_collapse_objects = 0;
4550static unsigned long vm_object_collapse_do_collapse = 0;
4551static unsigned long vm_object_collapse_do_bypass = 0;
2d21ac55 4552static unsigned long vm_object_collapse_delays = 0;
0b4e3aa0 4553__private_extern__ void
1c79356b 4554vm_object_collapse(
55e303ae 4555 register vm_object_t object,
0c530ab8
A
4556 register vm_object_offset_t hint_offset,
4557 boolean_t can_bypass)
1c79356b
A
4558{
4559 register vm_object_t backing_object;
55e303ae
A
4560 register unsigned int rcount;
4561 register unsigned int size;
91447636 4562 vm_object_t original_object;
b0d623f7
A
4563 int object_lock_type;
4564 int backing_object_lock_type;
91447636
A
4565
4566 vm_object_collapse_calls++;
0b4e3aa0 4567
0c530ab8
A
4568 if (! vm_object_collapse_allowed &&
4569 ! (can_bypass && vm_object_bypass_allowed)) {
1c79356b
A
4570 return;
4571 }
4572
4573 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
b0d623f7 4574 object, 0,0,0,0);
1c79356b 4575
91447636
A
4576 if (object == VM_OBJECT_NULL)
4577 return;
4578
4579 original_object = object;
4580
b0d623f7
A
4581 /*
4582 * The top object was locked "exclusive" by the caller.
4583 * In the first pass, to determine if we can collapse the shadow chain,
4584 * take a "shared" lock on the shadow objects. If we can collapse,
4585 * we'll have to go down the chain again with exclusive locks.
4586 */
4587 object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4588 backing_object_lock_type = OBJECT_LOCK_SHARED;
4589
4590retry:
4591 object = original_object;
4592 vm_object_lock_assert_exclusive(object);
4593
1c79356b 4594 while (TRUE) {
91447636 4595 vm_object_collapse_objects++;
1c79356b
A
4596 /*
4597 * Verify that the conditions are right for either
4598 * collapse or bypass:
1c79356b 4599 */
1c79356b
A
4600
4601 /*
4602 * There is a backing object, and
4603 */
4604
91447636
A
4605 backing_object = object->shadow;
4606 if (backing_object == VM_OBJECT_NULL) {
4607 if (object != original_object) {
4608 vm_object_unlock(object);
4609 }
1c79356b 4610 return;
91447636 4611 }
b0d623f7
A
4612 if (backing_object_lock_type == OBJECT_LOCK_SHARED) {
4613 vm_object_lock_shared(backing_object);
4614 } else {
4615 vm_object_lock(backing_object);
4616 }
4617
91447636
A
4618 /*
4619 * No pages in the object are currently
4620 * being paged out, and
4621 */
b0d623f7
A
4622 if (object->paging_in_progress != 0 ||
4623 object->activity_in_progress != 0) {
91447636 4624 /* try and collapse the rest of the shadow chain */
91447636
A
4625 if (object != original_object) {
4626 vm_object_unlock(object);
4627 }
4628 object = backing_object;
b0d623f7 4629 object_lock_type = backing_object_lock_type;
91447636
A
4630 continue;
4631 }
4632
1c79356b
A
4633 /*
4634 * ...
4635 * The backing object is not read_only,
4636 * and no pages in the backing object are
4637 * currently being paged out.
4638 * The backing object is internal.
4639 *
4640 */
4641
4642 if (!backing_object->internal ||
b0d623f7
A
4643 backing_object->paging_in_progress != 0 ||
4644 backing_object->activity_in_progress != 0) {
91447636
A
4645 /* try and collapse the rest of the shadow chain */
4646 if (object != original_object) {
4647 vm_object_unlock(object);
4648 }
4649 object = backing_object;
b0d623f7 4650 object_lock_type = backing_object_lock_type;
91447636 4651 continue;
1c79356b
A
4652 }
4653
4654 /*
4655 * The backing object can't be a copy-object:
4656 * the shadow_offset for the copy-object must stay
4657 * as 0. Furthermore (for the 'we have all the
4658 * pages' case), if we bypass backing_object and
4659 * just shadow the next object in the chain, old
4660 * pages from that object would then have to be copied
4661 * BOTH into the (former) backing_object and into the
4662 * parent object.
4663 */
4664 if (backing_object->shadow != VM_OBJECT_NULL &&
55e303ae 4665 backing_object->shadow->copy == backing_object) {
91447636
A
4666 /* try and collapse the rest of the shadow chain */
4667 if (object != original_object) {
4668 vm_object_unlock(object);
4669 }
4670 object = backing_object;
b0d623f7 4671 object_lock_type = backing_object_lock_type;
91447636 4672 continue;
1c79356b
A
4673 }
4674
4675 /*
4676 * We can now try to either collapse the backing
4677 * object (if the parent is the only reference to
4678 * it) or (perhaps) remove the parent's reference
4679 * to it.
1c79356b 4680 *
0b4e3aa0
A
4681 * If there is exactly one reference to the backing
4682 * object, we may be able to collapse it into the
4683 * parent.
1c79356b 4684 *
55e303ae
A
4685 * If MACH_PAGEMAP is defined:
4686 * The parent must not have a pager created for it,
4687 * since collapsing a backing_object dumps new pages
4688 * into the parent that its pager doesn't know about
4689 * (and the collapse code can't merge the existence
4690 * maps).
4691 * Otherwise:
4692 * As long as one of the objects is still not known
4693 * to the pager, we can collapse them.
1c79356b 4694 */
1c79356b 4695 if (backing_object->ref_count == 1 &&
55e303ae
A
4696 (!object->pager_created
4697#if !MACH_PAGEMAP
91447636 4698 || !backing_object->pager_created
55e303ae
A
4699#endif /*!MACH_PAGEMAP */
4700 ) && vm_object_collapse_allowed) {
1c79356b 4701
1c79356b 4702 /*
b0d623f7 4703 * We need the exclusive lock on the VM objects.
1c79356b 4704 */
b0d623f7
A
4705 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) {
4706 /*
4707 * We have an object and its shadow locked
4708 * "shared". We can't just upgrade the locks
4709 * to "exclusive", as some other thread might
4710 * also have these objects locked "shared" and
4711 * attempt to upgrade one or the other to
4712 * "exclusive". The upgrades would block
4713 * forever waiting for the other "shared" locks
4714 * to get released.
4715 * So we have to release the locks and go
4716 * down the shadow chain again (since it could
4717 * have changed) with "exclusive" locking.
4718 */
1c79356b 4719 vm_object_unlock(backing_object);
b0d623f7
A
4720 if (object != original_object)
4721 vm_object_unlock(object);
4722 object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4723 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4724 goto retry;
1c79356b
A
4725 }
4726
b0d623f7
A
4727 XPR(XPR_VM_OBJECT,
4728 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
4729 backing_object, object,
4730 backing_object->pager,
4731 backing_object->pager_control, 0);
4732
1c79356b
A
4733 /*
4734 * Collapse the object with its backing
4735 * object, and try again with the object's
4736 * new backing object.
4737 */
4738
4739 vm_object_do_collapse(object, backing_object);
91447636 4740 vm_object_collapse_do_collapse++;
1c79356b
A
4741 continue;
4742 }
4743
1c79356b
A
4744 /*
4745 * Collapsing the backing object was not possible
4746 * or permitted, so let's try bypassing it.
4747 */
4748
0c530ab8 4749 if (! (can_bypass && vm_object_bypass_allowed)) {
91447636
A
4750 /* try and collapse the rest of the shadow chain */
4751 if (object != original_object) {
4752 vm_object_unlock(object);
4753 }
4754 object = backing_object;
b0d623f7 4755 object_lock_type = backing_object_lock_type;
91447636 4756 continue;
1c79356b
A
4757 }
4758
0b4e3aa0 4759
1c79356b 4760 /*
55e303ae
A
4761 * If the object doesn't have all its pages present,
4762 * we have to make sure no pages in the backing object
4763 * "show through" before bypassing it.
1c79356b 4764 */
55e303ae
A
4765 size = atop(object->size);
4766 rcount = object->resident_page_count;
4767 if (rcount != size) {
55e303ae
A
4768 vm_object_offset_t offset;
4769 vm_object_offset_t backing_offset;
4770 unsigned int backing_rcount;
4771 unsigned int lookups = 0;
4772
4773 /*
4774 * If the backing object has a pager but no pagemap,
4775 * then we cannot bypass it, because we don't know
4776 * what pages it has.
4777 */
4778 if (backing_object->pager_created
1c79356b 4779#if MACH_PAGEMAP
b0d623f7 4780 && (backing_object->existence_map == VM_EXTERNAL_NULL)
1c79356b 4781#endif /* MACH_PAGEMAP */
55e303ae 4782 ) {
91447636
A
4783 /* try and collapse the rest of the shadow chain */
4784 if (object != original_object) {
4785 vm_object_unlock(object);
4786 }
4787 object = backing_object;
b0d623f7 4788 object_lock_type = backing_object_lock_type;
91447636 4789 continue;
55e303ae 4790 }
1c79356b 4791
55e303ae
A
4792 /*
4793 * If the object has a pager but no pagemap,
4794 * then we cannot bypass it, because we don't know
4795 * what pages it has.
4796 */
4797 if (object->pager_created
0b4e3aa0 4798#if MACH_PAGEMAP
b0d623f7 4799 && (object->existence_map == VM_EXTERNAL_NULL)
0b4e3aa0 4800#endif /* MACH_PAGEMAP */
55e303ae 4801 ) {
91447636
A
4802 /* try and collapse the rest of the shadow chain */
4803 if (object != original_object) {
4804 vm_object_unlock(object);
4805 }
4806 object = backing_object;
b0d623f7 4807 object_lock_type = backing_object_lock_type;
91447636 4808 continue;
55e303ae 4809 }
0b4e3aa0 4810
55e303ae
A
4811 /*
4812 * If all of the pages in the backing object are
4813 * shadowed by the parent object, the parent
4814 * object no longer has to shadow the backing
4815 * object; it can shadow the next one in the
4816 * chain.
4817 *
4818 * If the backing object has existence info,
4819 * we must check examine its existence info
4820 * as well.
4821 *
4822 */
1c79356b 4823
55e303ae
A
4824 backing_offset = object->shadow_offset;
4825 backing_rcount = backing_object->resident_page_count;
1c79356b 4826
2d21ac55 4827#if MACH_PAGEMAP
55e303ae
A
4828#define EXISTS_IN_OBJECT(obj, off, rc) \
4829 (vm_external_state_get((obj)->existence_map, \
4830 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
4831 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
2d21ac55
A
4832#else
4833#define EXISTS_IN_OBJECT(obj, off, rc) \
4834 (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
4835#endif /* MACH_PAGEMAP */
55e303ae
A
4836
4837 /*
4838 * Check the hint location first
4839 * (since it is often the quickest way out of here).
4840 */
4841 if (object->cow_hint != ~(vm_offset_t)0)
4842 hint_offset = (vm_object_offset_t)object->cow_hint;
4843 else
4844 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
4845 (hint_offset - 8 * PAGE_SIZE_64) : 0;
4846
4847 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
4848 backing_offset, backing_rcount) &&
4849 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
4850 /* dependency right at the hint */
b0d623f7 4851 object->cow_hint = (vm_offset_t) hint_offset; /* atomic */
91447636
A
4852 /* try and collapse the rest of the shadow chain */
4853 if (object != original_object) {
4854 vm_object_unlock(object);
4855 }
4856 object = backing_object;
b0d623f7 4857 object_lock_type = backing_object_lock_type;
91447636 4858 continue;
0b4e3aa0 4859 }
55e303ae
A
4860
4861 /*
4862 * If the object's window onto the backing_object
4863 * is large compared to the number of resident
4864 * pages in the backing object, it makes sense to
4865 * walk the backing_object's resident pages first.
4866 *
4867 * NOTE: Pages may be in both the existence map and
4868 * resident. So, we can't permanently decrement
4869 * the rcount here because the second loop may
4870 * find the same pages in the backing object'
4871 * existence map that we found here and we would
4872 * double-decrement the rcount. We also may or
4873 * may not have found the
4874 */
2d21ac55
A
4875 if (backing_rcount &&
4876#if MACH_PAGEMAP
4877 size > ((backing_object->existence_map) ?
4878 backing_rcount : (backing_rcount >> 1))
4879#else
4880 size > (backing_rcount >> 1)
4881#endif /* MACH_PAGEMAP */
4882 ) {
55e303ae
A
4883 unsigned int rc = rcount;
4884 vm_page_t p;
4885
4886 backing_rcount = backing_object->resident_page_count;
4887 p = (vm_page_t)queue_first(&backing_object->memq);
4888 do {
4889 /* Until we get more than one lookup lock */
4890 if (lookups > 256) {
2d21ac55 4891 vm_object_collapse_delays++;
55e303ae 4892 lookups = 0;
2d21ac55 4893 mutex_pause(0);
55e303ae
A
4894 }
4895
4896 offset = (p->offset - backing_offset);
4897 if (offset < object->size &&
4898 offset != hint_offset &&
4899 !EXISTS_IN_OBJECT(object, offset, rc)) {
4900 /* found a dependency */
b0d623f7
A
4901 object->cow_hint = (vm_offset_t) offset; /* atomic */
4902
91447636 4903 break;
55e303ae 4904 }
91447636 4905 p = (vm_page_t) queue_next(&p->listq);
55e303ae
A
4906
4907 } while (--backing_rcount);
91447636
A
4908 if (backing_rcount != 0 ) {
4909 /* try and collapse the rest of the shadow chain */
4910 if (object != original_object) {
4911 vm_object_unlock(object);
4912 }
4913 object = backing_object;
b0d623f7 4914 object_lock_type = backing_object_lock_type;
91447636
A
4915 continue;
4916 }
0b4e3aa0 4917 }
55e303ae
A
4918
4919 /*
4920 * Walk through the offsets looking for pages in the
4921 * backing object that show through to the object.
4922 */
b0d623f7
A
4923 if (backing_rcount
4924#if MACH_PAGEMAP
4925 || backing_object->existence_map
2d21ac55 4926#endif /* MACH_PAGEMAP */
b0d623f7 4927 ) {
55e303ae
A
4928 offset = hint_offset;
4929
4930 while((offset =
4931 (offset + PAGE_SIZE_64 < object->size) ?
4932 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
4933
4934 /* Until we get more than one lookup lock */
4935 if (lookups > 256) {
2d21ac55 4936 vm_object_collapse_delays++;
55e303ae 4937 lookups = 0;
2d21ac55 4938 mutex_pause(0);
55e303ae
A
4939 }
4940
4941 if (EXISTS_IN_OBJECT(backing_object, offset +
4942 backing_offset, backing_rcount) &&
4943 !EXISTS_IN_OBJECT(object, offset, rcount)) {
4944 /* found a dependency */
b0d623f7 4945 object->cow_hint = (vm_offset_t) offset; /* atomic */
91447636 4946 break;
55e303ae
A
4947 }
4948 }
91447636
A
4949 if (offset != hint_offset) {
4950 /* try and collapse the rest of the shadow chain */
4951 if (object != original_object) {
4952 vm_object_unlock(object);
4953 }
4954 object = backing_object;
b0d623f7 4955 object_lock_type = backing_object_lock_type;
91447636
A
4956 continue;
4957 }
0b4e3aa0
A
4958 }
4959 }
1c79356b 4960
b0d623f7
A
4961 /*
4962 * We need "exclusive" locks on the 2 VM objects.
4963 */
4964 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) {
4965 vm_object_unlock(backing_object);
4966 if (object != original_object)
4967 vm_object_unlock(object);
4968 object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4969 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4970 goto retry;
4971 }
4972
55e303ae
A
4973 /* reset the offset hint for any objects deeper in the chain */
4974 object->cow_hint = (vm_offset_t)0;
1c79356b
A
4975
4976 /*
4977 * All interesting pages in the backing object
4978 * already live in the parent or its pager.
4979 * Thus we can bypass the backing object.
4980 */
4981
4982 vm_object_do_bypass(object, backing_object);
91447636 4983 vm_object_collapse_do_bypass++;
1c79356b
A
4984
4985 /*
4986 * Try again with this object's new backing object.
4987 */
4988
4989 continue;
4990 }
91447636
A
4991
4992 if (object != original_object) {
4993 vm_object_unlock(object);
4994 }
1c79356b
A
4995}
4996
4997/*
4998 * Routine: vm_object_page_remove: [internal]
4999 * Purpose:
5000 * Removes all physical pages in the specified
5001 * object range from the object's list of pages.
5002 *
5003 * In/out conditions:
5004 * The object must be locked.
5005 * The object must not have paging_in_progress, usually
5006 * guaranteed by not having a pager.
5007 */
5008unsigned int vm_object_page_remove_lookup = 0;
5009unsigned int vm_object_page_remove_iterate = 0;
5010
0b4e3aa0 5011__private_extern__ void
1c79356b
A
5012vm_object_page_remove(
5013 register vm_object_t object,
5014 register vm_object_offset_t start,
5015 register vm_object_offset_t end)
5016{
5017 register vm_page_t p, next;
5018
5019 /*
5020 * One and two page removals are most popular.
5021 * The factor of 16 here is somewhat arbitrary.
5022 * It balances vm_object_lookup vs iteration.
5023 */
5024
55e303ae 5025 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
1c79356b
A
5026 vm_object_page_remove_lookup++;
5027
5028 for (; start < end; start += PAGE_SIZE_64) {
5029 p = vm_page_lookup(object, start);
5030 if (p != VM_PAGE_NULL) {
5031 assert(!p->cleaning && !p->pageout);
2d21ac55 5032 if (!p->fictitious && p->pmapped)
91447636 5033 pmap_disconnect(p->phys_page);
1c79356b
A
5034 VM_PAGE_FREE(p);
5035 }
5036 }
5037 } else {
5038 vm_object_page_remove_iterate++;
5039
5040 p = (vm_page_t) queue_first(&object->memq);
5041 while (!queue_end(&object->memq, (queue_entry_t) p)) {
5042 next = (vm_page_t) queue_next(&p->listq);
5043 if ((start <= p->offset) && (p->offset < end)) {
5044 assert(!p->cleaning && !p->pageout);
2d21ac55 5045 if (!p->fictitious && p->pmapped)
91447636 5046 pmap_disconnect(p->phys_page);
1c79356b
A
5047 VM_PAGE_FREE(p);
5048 }
5049 p = next;
5050 }
5051 }
5052}
5053
0b4e3aa0 5054
1c79356b
A
5055/*
5056 * Routine: vm_object_coalesce
5057 * Function: Coalesces two objects backing up adjoining
5058 * regions of memory into a single object.
5059 *
5060 * returns TRUE if objects were combined.
5061 *
5062 * NOTE: Only works at the moment if the second object is NULL -
5063 * if it's not, which object do we lock first?
5064 *
5065 * Parameters:
5066 * prev_object First object to coalesce
5067 * prev_offset Offset into prev_object
5068 * next_object Second object into coalesce
5069 * next_offset Offset into next_object
5070 *
5071 * prev_size Size of reference to prev_object
5072 * next_size Size of reference to next_object
5073 *
5074 * Conditions:
5075 * The object(s) must *not* be locked. The map must be locked
5076 * to preserve the reference to the object(s).
5077 */
0b4e3aa0 5078static int vm_object_coalesce_count = 0;
1c79356b 5079
0b4e3aa0 5080__private_extern__ boolean_t
1c79356b
A
5081vm_object_coalesce(
5082 register vm_object_t prev_object,
5083 vm_object_t next_object,
5084 vm_object_offset_t prev_offset,
91447636 5085 __unused vm_object_offset_t next_offset,
1c79356b
A
5086 vm_object_size_t prev_size,
5087 vm_object_size_t next_size)
5088{
5089 vm_object_size_t newsize;
5090
5091#ifdef lint
5092 next_offset++;
5093#endif /* lint */
5094
5095 if (next_object != VM_OBJECT_NULL) {
5096 return(FALSE);
5097 }
5098
5099 if (prev_object == VM_OBJECT_NULL) {
5100 return(TRUE);
5101 }
5102
5103 XPR(XPR_VM_OBJECT,
5104 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
b0d623f7 5105 prev_object, prev_offset, prev_size, next_size, 0);
1c79356b
A
5106
5107 vm_object_lock(prev_object);
5108
5109 /*
5110 * Try to collapse the object first
5111 */
0c530ab8 5112 vm_object_collapse(prev_object, prev_offset, TRUE);
1c79356b
A
5113
5114 /*
5115 * Can't coalesce if pages not mapped to
5116 * prev_entry may be in use any way:
5117 * . more than one reference
5118 * . paged out
5119 * . shadows another object
5120 * . has a copy elsewhere
2d21ac55 5121 * . is purgeable
1c79356b
A
5122 * . paging references (pages might be in page-list)
5123 */
5124
5125 if ((prev_object->ref_count > 1) ||
5126 prev_object->pager_created ||
5127 (prev_object->shadow != VM_OBJECT_NULL) ||
5128 (prev_object->copy != VM_OBJECT_NULL) ||
5129 (prev_object->true_share != FALSE) ||
2d21ac55 5130 (prev_object->purgable != VM_PURGABLE_DENY) ||
b0d623f7
A
5131 (prev_object->paging_in_progress != 0) ||
5132 (prev_object->activity_in_progress != 0)) {
1c79356b
A
5133 vm_object_unlock(prev_object);
5134 return(FALSE);
5135 }
5136
5137 vm_object_coalesce_count++;
5138
5139 /*
5140 * Remove any pages that may still be in the object from
5141 * a previous deallocation.
5142 */
5143 vm_object_page_remove(prev_object,
5144 prev_offset + prev_size,
5145 prev_offset + prev_size + next_size);
5146
5147 /*
5148 * Extend the object if necessary.
5149 */
5150 newsize = prev_offset + prev_size + next_size;
5151 if (newsize > prev_object->size) {
5152#if MACH_PAGEMAP
5153 /*
5154 * We cannot extend an object that has existence info,
5155 * since the existence info might then fail to cover
5156 * the entire object.
5157 *
5158 * This assertion must be true because the object
5159 * has no pager, and we only create existence info
5160 * for objects with pagers.
5161 */
5162 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
5163#endif /* MACH_PAGEMAP */
5164 prev_object->size = newsize;
5165 }
5166
5167 vm_object_unlock(prev_object);
5168 return(TRUE);
5169}
5170
5171/*
5172 * Attach a set of physical pages to an object, so that they can
5173 * be mapped by mapping the object. Typically used to map IO memory.
5174 *
5175 * The mapping function and its private data are used to obtain the
5176 * physical addresses for each page to be mapped.
5177 */
5178void
5179vm_object_page_map(
5180 vm_object_t object,
5181 vm_object_offset_t offset,
5182 vm_object_size_t size,
5183 vm_object_offset_t (*map_fn)(void *map_fn_data,
5184 vm_object_offset_t offset),
5185 void *map_fn_data) /* private to map_fn */
5186{
b0d623f7 5187 int64_t num_pages;
1c79356b
A
5188 int i;
5189 vm_page_t m;
5190 vm_page_t old_page;
5191 vm_object_offset_t addr;
5192
55e303ae 5193 num_pages = atop_64(size);
1c79356b
A
5194
5195 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
5196
5197 addr = (*map_fn)(map_fn_data, offset);
5198
5199 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
5200 vm_page_more_fictitious();
5201
5202 vm_object_lock(object);
5203 if ((old_page = vm_page_lookup(object, offset))
5204 != VM_PAGE_NULL)
5205 {
b0d623f7 5206 VM_PAGE_FREE(old_page);
1c79356b
A
5207 }
5208
b0d623f7 5209 assert((ppnum_t) addr == addr);
0b4c1975 5210 vm_page_init(m, (ppnum_t) addr, FALSE);
b0d623f7
A
5211 /*
5212 * private normally requires lock_queues but since we
5213 * are initializing the page, its not necessary here
5214 */
1c79356b
A
5215 m->private = TRUE; /* don`t free page */
5216 m->wire_count = 1;
5217 vm_page_insert(m, object, offset);
5218
5219 PAGE_WAKEUP_DONE(m);
5220 vm_object_unlock(object);
5221 }
5222}
5223
5224#include <mach_kdb.h>
5225
5226#if MACH_KDB
5227#include <ddb/db_output.h>
5228#include <vm/vm_print.h>
5229
5230#define printf kdbprintf
5231
5232extern boolean_t vm_object_cached(
5233 vm_object_t object);
5234
5235extern void print_bitstring(
5236 char byte);
5237
5238boolean_t vm_object_print_pages = FALSE;
5239
5240void
5241print_bitstring(
5242 char byte)
5243{
5244 printf("%c%c%c%c%c%c%c%c",
5245 ((byte & (1 << 0)) ? '1' : '0'),
5246 ((byte & (1 << 1)) ? '1' : '0'),
5247 ((byte & (1 << 2)) ? '1' : '0'),
5248 ((byte & (1 << 3)) ? '1' : '0'),
5249 ((byte & (1 << 4)) ? '1' : '0'),
5250 ((byte & (1 << 5)) ? '1' : '0'),
5251 ((byte & (1 << 6)) ? '1' : '0'),
5252 ((byte & (1 << 7)) ? '1' : '0'));
5253}
5254
5255boolean_t
5256vm_object_cached(
b0d623f7 5257 __unused register vm_object_t object)
1c79356b 5258{
b0d623f7 5259#if VM_OBJECT_CACHE
1c79356b
A
5260 register vm_object_t o;
5261
5262 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
5263 if (object == o) {
5264 return TRUE;
5265 }
5266 }
b0d623f7 5267#endif
1c79356b
A
5268 return FALSE;
5269}
5270
5271#if MACH_PAGEMAP
5272/*
5273 * vm_external_print: [ debug ]
5274 */
5275void
5276vm_external_print(
91447636 5277 vm_external_map_t emap,
b0d623f7 5278 vm_object_size_t size)
1c79356b 5279{
91447636 5280 if (emap == VM_EXTERNAL_NULL) {
1c79356b
A
5281 printf("0 ");
5282 } else {
b0d623f7
A
5283 vm_object_size_t existence_size = stob(size);
5284 printf("{ size=%lld, map=[", (uint64_t) existence_size);
1c79356b 5285 if (existence_size > 0) {
91447636 5286 print_bitstring(emap[0]);
1c79356b
A
5287 }
5288 if (existence_size > 1) {
91447636 5289 print_bitstring(emap[1]);
1c79356b
A
5290 }
5291 if (existence_size > 2) {
5292 printf("...");
91447636 5293 print_bitstring(emap[existence_size-1]);
1c79356b
A
5294 }
5295 printf("] }\n");
5296 }
5297 return;
5298}
5299#endif /* MACH_PAGEMAP */
5300
5301int
5302vm_follow_object(
5303 vm_object_t object)
5304{
0b4e3aa0
A
5305 int count = 0;
5306 int orig_db_indent = db_indent;
1c79356b 5307
0b4e3aa0
A
5308 while (TRUE) {
5309 if (object == VM_OBJECT_NULL) {
5310 db_indent = orig_db_indent;
5311 return count;
5312 }
1c79356b 5313
0b4e3aa0 5314 count += 1;
1c79356b 5315
0b4e3aa0
A
5316 iprintf("object 0x%x", object);
5317 printf(", shadow=0x%x", object->shadow);
5318 printf(", copy=0x%x", object->copy);
5319 printf(", pager=0x%x", object->pager);
5320 printf(", ref=%d\n", object->ref_count);
5321
5322 db_indent += 2;
5323 object = object->shadow;
5324 }
1c79356b 5325
1c79356b
A
5326}
5327
5328/*
5329 * vm_object_print: [ debug ]
5330 */
5331void
2d21ac55
A
5332vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
5333 __unused db_expr_t arg_count, __unused char *modif)
1c79356b 5334{
91447636 5335 vm_object_t object;
1c79356b 5336 register vm_page_t p;
91447636 5337 const char *s;
1c79356b
A
5338
5339 register int count;
5340
91447636 5341 object = (vm_object_t) (long) db_addr;
1c79356b
A
5342 if (object == VM_OBJECT_NULL)
5343 return;
5344
5345 iprintf("object 0x%x\n", object);
5346
5347 db_indent += 2;
5348
5349 iprintf("size=0x%x", object->size);
91447636 5350 printf(", memq_hint=%p", object->memq_hint);
1c79356b
A
5351 printf(", ref_count=%d\n", object->ref_count);
5352 iprintf("");
5353#if TASK_SWAPPER
5354 printf("res_count=%d, ", object->res_count);
5355#endif /* TASK_SWAPPER */
5356 printf("resident_page_count=%d\n", object->resident_page_count);
5357
5358 iprintf("shadow=0x%x", object->shadow);
5359 if (object->shadow) {
5360 register int i = 0;
5361 vm_object_t shadow = object;
91447636 5362 while((shadow = shadow->shadow))
1c79356b
A
5363 i++;
5364 printf(" (depth %d)", i);
5365 }
5366 printf(", copy=0x%x", object->copy);
5367 printf(", shadow_offset=0x%x", object->shadow_offset);
5368 printf(", last_alloc=0x%x\n", object->last_alloc);
5369
5370 iprintf("pager=0x%x", object->pager);
5371 printf(", paging_offset=0x%x", object->paging_offset);
91447636 5372 printf(", pager_control=0x%x\n", object->pager_control);
1c79356b
A
5373
5374 iprintf("copy_strategy=%d[", object->copy_strategy);
5375 switch (object->copy_strategy) {
5376 case MEMORY_OBJECT_COPY_NONE:
5377 printf("copy_none");
5378 break;
5379
5380 case MEMORY_OBJECT_COPY_CALL:
5381 printf("copy_call");
5382 break;
5383
5384 case MEMORY_OBJECT_COPY_DELAY:
5385 printf("copy_delay");
5386 break;
5387
5388 case MEMORY_OBJECT_COPY_SYMMETRIC:
5389 printf("copy_symmetric");
5390 break;
5391
5392 case MEMORY_OBJECT_COPY_INVALID:
5393 printf("copy_invalid");
5394 break;
5395
5396 default:
5397 printf("?");
5398 }
5399 printf("]");
1c79356b
A
5400
5401 iprintf("all_wanted=0x%x<", object->all_wanted);
5402 s = "";
5403 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
5404 printf("%sinit", s);
5405 s = ",";
5406 }
5407 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
5408 printf("%sready", s);
5409 s = ",";
5410 }
5411 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
5412 printf("%spaging", s);
5413 s = ",";
5414 }
1c79356b
A
5415 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
5416 printf("%slock", s);
5417 s = ",";
5418 }
5419 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
5420 printf("%suncaching", s);
5421 s = ",";
5422 }
5423 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
5424 printf("%scopy_call", s);
5425 s = ",";
5426 }
5427 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
5428 printf("%scaching", s);
5429 s = ",";
5430 }
5431 printf(">");
5432 printf(", paging_in_progress=%d\n", object->paging_in_progress);
b0d623f7 5433 printf(", activity_in_progress=%d\n", object->activity_in_progress);
1c79356b
A
5434
5435 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
5436 (object->pager_created ? "" : "!"),
5437 (object->pager_initialized ? "" : "!"),
5438 (object->pager_ready ? "" : "!"),
5439 (object->can_persist ? "" : "!"),
5440 (object->pager_trusted ? "" : "!"),
5441 (object->pageout ? "" : "!"),
5442 (object->internal ? "internal" : "external"),
5443 (object->temporary ? "temporary" : "permanent"));
2d21ac55 5444 iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
1c79356b 5445 (object->alive ? "" : "!"),
2d21ac55
A
5446 ((object->purgable != VM_PURGABLE_DENY) ? "" : "!"),
5447 ((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"),
5448 ((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"),
1c79356b
A
5449 (object->shadowed ? "" : "!"),
5450 (vm_object_cached(object) ? "" : "!"),
5451 (object->private ? "" : "!"));
5452 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
5453 (object->advisory_pageout ? "" : "!"),
5454 (object->silent_overwrite ? "" : "!"));
5455
5456#if MACH_PAGEMAP
5457 iprintf("existence_map=");
5458 vm_external_print(object->existence_map, object->size);
5459#endif /* MACH_PAGEMAP */
5460#if MACH_ASSERT
5461 iprintf("paging_object=0x%x\n", object->paging_object);
5462#endif /* MACH_ASSERT */
5463
5464 if (vm_object_print_pages) {
5465 count = 0;
5466 p = (vm_page_t) queue_first(&object->memq);
5467 while (!queue_end(&object->memq, (queue_entry_t) p)) {
5468 if (count == 0) {
5469 iprintf("memory:=");
5470 } else if (count == 2) {
5471 printf("\n");
5472 iprintf(" ...");
5473 count = 0;
5474 } else {
5475 printf(",");
5476 }
5477 count++;
5478
91447636 5479 printf("(off=0x%llX,page=%p)", p->offset, p);
1c79356b
A
5480 p = (vm_page_t) queue_next(&p->listq);
5481 }
5482 if (count != 0) {
5483 printf("\n");
5484 }
5485 }
5486 db_indent -= 2;
5487}
5488
5489
5490/*
5491 * vm_object_find [ debug ]
5492 *
5493 * Find all tasks which reference the given vm_object.
5494 */
5495
5496boolean_t vm_object_find(vm_object_t object);
5497boolean_t vm_object_print_verbose = FALSE;
5498
5499boolean_t
5500vm_object_find(
5501 vm_object_t object)
5502{
5503 task_t task;
5504 vm_map_t map;
5505 vm_map_entry_t entry;
1c79356b
A
5506 boolean_t found = FALSE;
5507
2d21ac55 5508 queue_iterate(&tasks, task, task_t, tasks) {
1c79356b
A
5509 map = task->map;
5510 for (entry = vm_map_first_entry(map);
5511 entry && entry != vm_map_to_entry(map);
5512 entry = entry->vme_next) {
5513
5514 vm_object_t obj;
5515
5516 /*
5517 * For the time being skip submaps,
5518 * only the kernel can have submaps,
5519 * and unless we are interested in
5520 * kernel objects, we can simply skip
5521 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
5522 * for a full solution.
5523 */
5524 if (entry->is_sub_map)
5525 continue;
5526 if (entry)
5527 obj = entry->object.vm_object;
5528 else
5529 continue;
5530
5531 while (obj != VM_OBJECT_NULL) {
5532 if (obj == object) {
5533 if (!found) {
5534 printf("TASK\t\tMAP\t\tENTRY\n");
5535 found = TRUE;
5536 }
5537 printf("0x%x\t0x%x\t0x%x\n",
5538 task, map, entry);
5539 }
5540 obj = obj->shadow;
5541 }
5542 }
5543 }
5544
5545 return(found);
5546}
5547
5548#endif /* MACH_KDB */
5549
0b4e3aa0
A
5550kern_return_t
5551vm_object_populate_with_private(
55e303ae 5552 vm_object_t object,
0b4e3aa0 5553 vm_object_offset_t offset,
55e303ae
A
5554 ppnum_t phys_page,
5555 vm_size_t size)
0b4e3aa0 5556{
55e303ae 5557 ppnum_t base_page;
0b4e3aa0
A
5558 vm_object_offset_t base_offset;
5559
5560
5561 if(!object->private)
5562 return KERN_FAILURE;
5563
55e303ae 5564 base_page = phys_page;
0b4e3aa0
A
5565
5566 vm_object_lock(object);
5567 if(!object->phys_contiguous) {
5568 vm_page_t m;
55e303ae 5569 if((base_offset = trunc_page_64(offset)) != offset) {
0b4e3aa0
A
5570 vm_object_unlock(object);
5571 return KERN_FAILURE;
5572 }
5573 base_offset += object->paging_offset;
5574 while(size) {
5575 m = vm_page_lookup(object, base_offset);
5576 if(m != VM_PAGE_NULL) {
5577 if(m->fictitious) {
b0d623f7
A
5578 if (m->phys_page != vm_page_guard_addr) {
5579
2d21ac55 5580 vm_page_lockspin_queues();
2d21ac55 5581 m->private = TRUE;
b0d623f7
A
5582 vm_page_unlock_queues();
5583
5584 m->fictitious = FALSE;
2d21ac55
A
5585 m->phys_page = base_page;
5586 if(!m->busy) {
5587 m->busy = TRUE;
5588 }
5589 if(!m->absent) {
5590 m->absent = TRUE;
5591 }
5592 m->list_req_pending = TRUE;
0b4e3aa0 5593 }
55e303ae 5594 } else if (m->phys_page != base_page) {
2d21ac55
A
5595 if (m->pmapped) {
5596 /*
5597 * pmap call to clear old mapping
5598 */
5599 pmap_disconnect(m->phys_page);
5600 }
55e303ae 5601 m->phys_page = base_page;
0b4e3aa0 5602 }
91447636
A
5603
5604 /*
5605 * ENCRYPTED SWAP:
5606 * We're not pointing to the same
5607 * physical page any longer and the
5608 * contents of the new one are not
5609 * supposed to be encrypted.
5610 * XXX What happens to the original
5611 * physical page. Is it lost ?
5612 */
5613 m->encrypted = FALSE;
5614
0b4e3aa0 5615 } else {
b0d623f7 5616 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
0b4e3aa0 5617 vm_page_more_fictitious();
b0d623f7
A
5618
5619 /*
5620 * private normally requires lock_queues but since we
5621 * are initializing the page, its not necessary here
5622 */
0b4e3aa0 5623 m->private = TRUE;
b0d623f7 5624 m->fictitious = FALSE;
55e303ae 5625 m->phys_page = base_page;
0b4e3aa0
A
5626 m->list_req_pending = TRUE;
5627 m->absent = TRUE;
5628 m->unusual = TRUE;
b0d623f7 5629
0b4e3aa0
A
5630 vm_page_insert(m, object, base_offset);
5631 }
55e303ae 5632 base_page++; /* Go to the next physical page */
0b4e3aa0
A
5633 base_offset += PAGE_SIZE;
5634 size -= PAGE_SIZE;
5635 }
5636 } else {
5637 /* NOTE: we should check the original settings here */
5638 /* if we have a size > zero a pmap call should be made */
5639 /* to disable the range */
5640
5641 /* pmap_? */
5642
5643 /* shadows on contiguous memory are not allowed */
5644 /* we therefore can use the offset field */
935ed37a 5645 object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
0b4e3aa0
A
5646 object->size = size;
5647 }
5648 vm_object_unlock(object);
5649 return KERN_SUCCESS;
5650}
5651
1c79356b
A
5652/*
5653 * memory_object_free_from_cache:
5654 *
5655 * Walk the vm_object cache list, removing and freeing vm_objects
0c530ab8 5656 * which are backed by the pager identified by the caller, (pager_ops).
1c79356b
A
5657 * Remove up to "count" objects, if there are that may available
5658 * in the cache.
0b4e3aa0 5659 *
1c79356b
A
5660 * Walk the list at most once, return the number of vm_objects
5661 * actually freed.
1c79356b
A
5662 */
5663
0b4e3aa0 5664__private_extern__ kern_return_t
1c79356b 5665memory_object_free_from_cache(
91447636 5666 __unused host_t host,
b0d623f7 5667 __unused memory_object_pager_ops_t pager_ops,
1c79356b
A
5668 int *count)
5669{
b0d623f7 5670#if VM_OBJECT_CACHE
1c79356b 5671 int object_released = 0;
1c79356b
A
5672
5673 register vm_object_t object = VM_OBJECT_NULL;
5674 vm_object_t shadow;
5675
5676/*
5677 if(host == HOST_NULL)
5678 return(KERN_INVALID_ARGUMENT);
5679*/
5680
5681 try_again:
5682 vm_object_cache_lock();
5683
5684 queue_iterate(&vm_object_cached_list, object,
5685 vm_object_t, cached_list) {
0c530ab8
A
5686 if (object->pager &&
5687 (pager_ops == object->pager->mo_pager_ops)) {
1c79356b
A
5688 vm_object_lock(object);
5689 queue_remove(&vm_object_cached_list, object,
5690 vm_object_t, cached_list);
5691 vm_object_cached_count--;
5692
b0d623f7 5693 vm_object_cache_unlock();
1c79356b
A
5694 /*
5695 * Since this object is in the cache, we know
0b4e3aa0
A
5696 * that it is initialized and has only a pager's
5697 * (implicit) reference. Take a reference to avoid
5698 * recursive deallocations.
1c79356b
A
5699 */
5700
5701 assert(object->pager_initialized);
5702 assert(object->ref_count == 0);
2d21ac55 5703 vm_object_lock_assert_exclusive(object);
1c79356b
A
5704 object->ref_count++;
5705
5706 /*
5707 * Terminate the object.
5708 * If the object had a shadow, we let
5709 * vm_object_deallocate deallocate it.
5710 * "pageout" objects have a shadow, but
5711 * maintain a "paging reference" rather
5712 * than a normal reference.
5713 * (We are careful here to limit recursion.)
5714 */
5715 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
b0d623f7 5716
1c79356b
A
5717 if ((vm_object_terminate(object) == KERN_SUCCESS)
5718 && (shadow != VM_OBJECT_NULL)) {
5719 vm_object_deallocate(shadow);
5720 }
5721
5722 if(object_released++ == *count)
5723 return KERN_SUCCESS;
5724 goto try_again;
5725 }
5726 }
5727 vm_object_cache_unlock();
5728 *count = object_released;
b0d623f7
A
5729#else
5730 *count = 0;
5731#endif
1c79356b
A
5732 return KERN_SUCCESS;
5733}
5734
0b4e3aa0 5735
1c79356b
A
5736
5737kern_return_t
0b4e3aa0
A
5738memory_object_create_named(
5739 memory_object_t pager,
5740 memory_object_offset_t size,
5741 memory_object_control_t *control)
1c79356b 5742{
0b4e3aa0
A
5743 vm_object_t object;
5744 vm_object_hash_entry_t entry;
b0d623f7 5745 lck_mtx_t *lck;
1c79356b 5746
0b4e3aa0
A
5747 *control = MEMORY_OBJECT_CONTROL_NULL;
5748 if (pager == MEMORY_OBJECT_NULL)
5749 return KERN_INVALID_ARGUMENT;
1c79356b 5750
b0d623f7 5751 lck = vm_object_hash_lock_spin(pager);
0b4e3aa0 5752 entry = vm_object_hash_lookup(pager, FALSE);
b0d623f7 5753
0b4e3aa0
A
5754 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
5755 (entry->object != VM_OBJECT_NULL)) {
5756 if (entry->object->named == TRUE)
5757 panic("memory_object_create_named: caller already holds the right"); }
b0d623f7 5758 vm_object_hash_unlock(lck);
1c79356b 5759
b0d623f7 5760 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) {
0b4e3aa0
A
5761 return(KERN_INVALID_OBJECT);
5762 }
5763
5764 /* wait for object (if any) to be ready */
5765 if (object != VM_OBJECT_NULL) {
5766 vm_object_lock(object);
5767 object->named = TRUE;
5768 while (!object->pager_ready) {
9bccf70c
A
5769 vm_object_sleep(object,
5770 VM_OBJECT_EVENT_PAGER_READY,
5771 THREAD_UNINT);
0b4e3aa0 5772 }
91447636 5773 *control = object->pager_control;
0b4e3aa0
A
5774 vm_object_unlock(object);
5775 }
5776 return (KERN_SUCCESS);
5777}
1c79356b 5778
1c79356b 5779
0b4e3aa0
A
5780/*
5781 * Routine: memory_object_recover_named [user interface]
5782 * Purpose:
5783 * Attempt to recover a named reference for a VM object.
5784 * VM will verify that the object has not already started
5785 * down the termination path, and if it has, will optionally
5786 * wait for that to finish.
5787 * Returns:
5788 * KERN_SUCCESS - we recovered a named reference on the object
5789 * KERN_FAILURE - we could not recover a reference (object dead)
5790 * KERN_INVALID_ARGUMENT - bad memory object control
5791 */
5792kern_return_t
5793memory_object_recover_named(
5794 memory_object_control_t control,
5795 boolean_t wait_on_terminating)
5796{
5797 vm_object_t object;
1c79356b 5798
0b4e3aa0
A
5799 object = memory_object_control_to_vm_object(control);
5800 if (object == VM_OBJECT_NULL) {
0b4e3aa0
A
5801 return (KERN_INVALID_ARGUMENT);
5802 }
0b4e3aa0
A
5803restart:
5804 vm_object_lock(object);
1c79356b 5805
0b4e3aa0 5806 if (object->terminating && wait_on_terminating) {
0b4e3aa0
A
5807 vm_object_wait(object,
5808 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
5809 THREAD_UNINT);
0b4e3aa0
A
5810 goto restart;
5811 }
5812
5813 if (!object->alive) {
0b4e3aa0
A
5814 vm_object_unlock(object);
5815 return KERN_FAILURE;
1c79356b
A
5816 }
5817
0b4e3aa0 5818 if (object->named == TRUE) {
0b4e3aa0
A
5819 vm_object_unlock(object);
5820 return KERN_SUCCESS;
5821 }
b0d623f7
A
5822#if VM_OBJECT_CACHE
5823 if ((object->ref_count == 0) && (!object->terminating)) {
5824 if (!vm_object_cache_lock_try()) {
5825 vm_object_unlock(object);
5826 goto restart;
5827 }
0b4e3aa0
A
5828 queue_remove(&vm_object_cached_list, object,
5829 vm_object_t, cached_list);
b0d623f7
A
5830 vm_object_cached_count--;
5831 XPR(XPR_VM_OBJECT_CACHE,
5832 "memory_object_recover_named: removing %X, head (%X, %X)\n",
5833 object,
5834 vm_object_cached_list.next,
5835 vm_object_cached_list.prev, 0,0);
5836
5837 vm_object_cache_unlock();
0b4e3aa0 5838 }
b0d623f7 5839#endif
0b4e3aa0 5840 object->named = TRUE;
2d21ac55 5841 vm_object_lock_assert_exclusive(object);
0b4e3aa0
A
5842 object->ref_count++;
5843 vm_object_res_reference(object);
5844 while (!object->pager_ready) {
9bccf70c
A
5845 vm_object_sleep(object,
5846 VM_OBJECT_EVENT_PAGER_READY,
5847 THREAD_UNINT);
0b4e3aa0
A
5848 }
5849 vm_object_unlock(object);
5850 return (KERN_SUCCESS);
1c79356b
A
5851}
5852
0b4e3aa0
A
5853
5854/*
5855 * vm_object_release_name:
5856 *
5857 * Enforces name semantic on memory_object reference count decrement
5858 * This routine should not be called unless the caller holds a name
5859 * reference gained through the memory_object_create_named.
5860 *
5861 * If the TERMINATE_IDLE flag is set, the call will return if the
5862 * reference count is not 1. i.e. idle with the only remaining reference
5863 * being the name.
5864 * If the decision is made to proceed the name field flag is set to
5865 * false and the reference count is decremented. If the RESPECT_CACHE
5866 * flag is set and the reference count has gone to zero, the
5867 * memory_object is checked to see if it is cacheable otherwise when
5868 * the reference count is zero, it is simply terminated.
5869 */
5870
5871__private_extern__ kern_return_t
5872vm_object_release_name(
5873 vm_object_t object,
5874 int flags)
1c79356b 5875{
0b4e3aa0
A
5876 vm_object_t shadow;
5877 boolean_t original_object = TRUE;
1c79356b 5878
0b4e3aa0 5879 while (object != VM_OBJECT_NULL) {
1c79356b 5880
0b4e3aa0 5881 vm_object_lock(object);
b0d623f7 5882
0b4e3aa0 5883 assert(object->alive);
b0d623f7 5884 if (original_object)
0b4e3aa0
A
5885 assert(object->named);
5886 assert(object->ref_count > 0);
5887
5888 /*
5889 * We have to wait for initialization before
5890 * destroying or caching the object.
5891 */
5892
5893 if (object->pager_created && !object->pager_initialized) {
5894 assert(!object->can_persist);
5895 vm_object_assert_wait(object,
5896 VM_OBJECT_EVENT_INITIALIZED,
5897 THREAD_UNINT);
5898 vm_object_unlock(object);
9bccf70c 5899 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0 5900 continue;
1c79356b
A
5901 }
5902
0b4e3aa0
A
5903 if (((object->ref_count > 1)
5904 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
5905 || (object->terminating)) {
5906 vm_object_unlock(object);
0b4e3aa0
A
5907 return KERN_FAILURE;
5908 } else {
5909 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
5910 vm_object_unlock(object);
0b4e3aa0 5911 return KERN_SUCCESS;
1c79356b 5912 }
0b4e3aa0
A
5913 }
5914
5915 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
5916 (object->ref_count == 1)) {
b0d623f7 5917 if (original_object)
0b4e3aa0 5918 object->named = FALSE;
1c79356b 5919 vm_object_unlock(object);
0b4e3aa0
A
5920 /* let vm_object_deallocate push this thing into */
5921 /* the cache, if that it is where it is bound */
5922 vm_object_deallocate(object);
5923 return KERN_SUCCESS;
5924 }
5925 VM_OBJ_RES_DECR(object);
5926 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
b0d623f7
A
5927
5928 if (object->ref_count == 1) {
5929 if (vm_object_terminate(object) != KERN_SUCCESS) {
5930 if (original_object) {
0b4e3aa0
A
5931 return KERN_FAILURE;
5932 } else {
5933 return KERN_SUCCESS;
5934 }
5935 }
5936 if (shadow != VM_OBJECT_NULL) {
5937 original_object = FALSE;
5938 object = shadow;
5939 continue;
5940 }
5941 return KERN_SUCCESS;
5942 } else {
2d21ac55 5943 vm_object_lock_assert_exclusive(object);
0b4e3aa0
A
5944 object->ref_count--;
5945 assert(object->ref_count > 0);
5946 if(original_object)
5947 object->named = FALSE;
5948 vm_object_unlock(object);
0b4e3aa0 5949 return KERN_SUCCESS;
1c79356b 5950 }
1c79356b 5951 }
91447636
A
5952 /*NOTREACHED*/
5953 assert(0);
5954 return KERN_FAILURE;
1c79356b
A
5955}
5956
0b4e3aa0
A
5957
5958__private_extern__ kern_return_t
5959vm_object_lock_request(
5960 vm_object_t object,
5961 vm_object_offset_t offset,
5962 vm_object_size_t size,
5963 memory_object_return_t should_return,
5964 int flags,
5965 vm_prot_t prot)
1c79356b 5966{
91447636
A
5967 __unused boolean_t should_flush;
5968
5969 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
1c79356b 5970
0b4e3aa0
A
5971 XPR(XPR_MEMORY_OBJECT,
5972 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
b0d623f7 5973 object, offset, size,
0b4e3aa0 5974 (((should_return&1)<<1)|should_flush), prot);
1c79356b 5975
0b4e3aa0
A
5976 /*
5977 * Check for bogus arguments.
5978 */
5979 if (object == VM_OBJECT_NULL)
5980 return (KERN_INVALID_ARGUMENT);
1c79356b 5981
0b4e3aa0
A
5982 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
5983 return (KERN_INVALID_ARGUMENT);
1c79356b 5984
55e303ae 5985 size = round_page_64(size);
0b4e3aa0
A
5986
5987 /*
5988 * Lock the object, and acquire a paging reference to
5989 * prevent the memory_object reference from being released.
5990 */
5991 vm_object_lock(object);
5992 vm_object_paging_begin(object);
0b4e3aa0
A
5993
5994 (void)vm_object_update(object,
91447636 5995 offset, size, NULL, NULL, should_return, flags, prot);
0b4e3aa0
A
5996
5997 vm_object_paging_end(object);
5998 vm_object_unlock(object);
5999
6000 return (KERN_SUCCESS);
6001}
6002
91447636 6003/*
2d21ac55 6004 * Empty a purgeable object by grabbing the physical pages assigned to it and
91447636
A
6005 * putting them on the free queue without writing them to backing store, etc.
6006 * When the pages are next touched they will be demand zero-fill pages. We
6007 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
6008 * skip referenced/dirty pages, pages on the active queue, etc. We're more
2d21ac55 6009 * than happy to grab these since this is a purgeable object. We mark the
91447636
A
6010 * object as "empty" after reaping its pages.
6011 *
b0d623f7
A
6012 * On entry the object must be locked and it must be
6013 * purgeable with no delayed copies pending.
91447636 6014 */
b0d623f7 6015void
91447636
A
6016vm_object_purge(vm_object_t object)
6017{
b0d623f7 6018 vm_object_lock_assert_exclusive(object);
0b4e3aa0 6019
b0d623f7
A
6020 if (object->purgable == VM_PURGABLE_DENY)
6021 return;
91447636
A
6022
6023 assert(object->copy == VM_OBJECT_NULL);
6024 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
593a1d5f 6025
b0d623f7
A
6026 if(object->purgable == VM_PURGABLE_VOLATILE) {
6027 unsigned int delta;
6028 assert(object->resident_page_count >=
6029 object->wired_page_count);
6030 delta = (object->resident_page_count -
6031 object->wired_page_count);
6032 if (delta != 0) {
6033 assert(vm_page_purgeable_count >=
6034 delta);
6035 OSAddAtomic(-delta,
6036 (SInt32 *)&vm_page_purgeable_count);
91447636 6037 }
b0d623f7
A
6038 if (object->wired_page_count != 0) {
6039 assert(vm_page_purgeable_wired_count >=
6040 object->wired_page_count);
6041 OSAddAtomic(-object->wired_page_count,
6042 (SInt32 *)&vm_page_purgeable_wired_count);
91447636
A
6043 }
6044 }
b0d623f7
A
6045 object->purgable = VM_PURGABLE_EMPTY;
6046
6047 vm_object_reap_pages(object, REAP_PURGEABLE);
91447636 6048}
b0d623f7 6049
91447636
A
6050
6051/*
2d21ac55
A
6052 * vm_object_purgeable_control() allows the caller to control and investigate the
6053 * state of a purgeable object. A purgeable object is created via a call to
6054 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
6055 * never be coalesced with any other object -- even other purgeable objects --
6056 * and will thus always remain a distinct object. A purgeable object has
91447636 6057 * special semantics when its reference count is exactly 1. If its reference
2d21ac55 6058 * count is greater than 1, then a purgeable object will behave like a normal
91447636
A
6059 * object and attempts to use this interface will result in an error return
6060 * of KERN_INVALID_ARGUMENT.
6061 *
2d21ac55 6062 * A purgeable object may be put into a "volatile" state which will make the
91447636
A
6063 * object's pages elligable for being reclaimed without paging to backing
6064 * store if the system runs low on memory. If the pages in a volatile
2d21ac55
A
6065 * purgeable object are reclaimed, the purgeable object is said to have been
6066 * "emptied." When a purgeable object is emptied the system will reclaim as
91447636
A
6067 * many pages from the object as it can in a convenient manner (pages already
6068 * en route to backing store or busy for other reasons are left as is). When
2d21ac55 6069 * a purgeable object is made volatile, its pages will generally be reclaimed
91447636
A
6070 * before other pages in the application's working set. This semantic is
6071 * generally used by applications which can recreate the data in the object
6072 * faster than it can be paged in. One such example might be media assets
6073 * which can be reread from a much faster RAID volume.
6074 *
2d21ac55 6075 * A purgeable object may be designated as "non-volatile" which means it will
91447636
A
6076 * behave like all other objects in the system with pages being written to and
6077 * read from backing store as needed to satisfy system memory needs. If the
6078 * object was emptied before the object was made non-volatile, that fact will
2d21ac55 6079 * be returned as the old state of the purgeable object (see
91447636
A
6080 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
6081 * were reclaimed as part of emptying the object will be refaulted in as
6082 * zero-fill on demand. It is up to the application to note that an object
6083 * was emptied and recreate the objects contents if necessary. When a
2d21ac55
A
6084 * purgeable object is made non-volatile, its pages will generally not be paged
6085 * out to backing store in the immediate future. A purgeable object may also
91447636
A
6086 * be manually emptied.
6087 *
6088 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
2d21ac55 6089 * volatile purgeable object may be queried at any time. This information may
91447636
A
6090 * be used as a control input to let the application know when the system is
6091 * experiencing memory pressure and is reclaiming memory.
6092 *
2d21ac55 6093 * The specified address may be any address within the purgeable object. If
91447636
A
6094 * the specified address does not represent any object in the target task's
6095 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
2d21ac55 6096 * object containing the specified address is not a purgeable object, then
91447636
A
6097 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
6098 * returned.
6099 *
6100 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
6101 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
2d21ac55
A
6102 * state is used to set the new state of the purgeable object and return its
6103 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
91447636
A
6104 * object is returned in the parameter state.
6105 *
6106 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
6107 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
6108 * the non-volatile, volatile and volatile/empty states described above.
2d21ac55 6109 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
91447636
A
6110 * immediately reclaim as many pages in the object as can be conveniently
6111 * collected (some may have already been written to backing store or be
6112 * otherwise busy).
6113 *
2d21ac55
A
6114 * The process of making a purgeable object non-volatile and determining its
6115 * previous state is atomic. Thus, if a purgeable object is made
91447636 6116 * VM_PURGABLE_NONVOLATILE and the old state is returned as
2d21ac55 6117 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
91447636
A
6118 * completely intact and will remain so until the object is made volatile
6119 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
6120 * was reclaimed while it was in a volatile state and its previous contents
6121 * have been lost.
6122 */
6123/*
6124 * The object must be locked.
6125 */
6126kern_return_t
6127vm_object_purgable_control(
6128 vm_object_t object,
6129 vm_purgable_t control,
6130 int *state)
6131{
6132 int old_state;
2d21ac55 6133 int new_state;
91447636
A
6134
6135 if (object == VM_OBJECT_NULL) {
6136 /*
2d21ac55 6137 * Object must already be present or it can't be purgeable.
91447636
A
6138 */
6139 return KERN_INVALID_ARGUMENT;
6140 }
6141
6142 /*
2d21ac55 6143 * Get current state of the purgeable object.
91447636 6144 */
2d21ac55
A
6145 old_state = object->purgable;
6146 if (old_state == VM_PURGABLE_DENY)
91447636
A
6147 return KERN_INVALID_ARGUMENT;
6148
2d21ac55 6149 /* purgeable cant have delayed copies - now or in the future */
91447636
A
6150 assert(object->copy == VM_OBJECT_NULL);
6151 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6152
6153 /*
6154 * Execute the desired operation.
6155 */
6156 if (control == VM_PURGABLE_GET_STATE) {
6157 *state = old_state;
6158 return KERN_SUCCESS;
6159 }
6160
b0d623f7
A
6161 if ((*state) & VM_PURGABLE_DEBUG_EMPTY) {
6162 object->volatile_empty = TRUE;
6163 }
6164 if ((*state) & VM_PURGABLE_DEBUG_FAULT) {
6165 object->volatile_fault = TRUE;
6166 }
6167
2d21ac55 6168 new_state = *state & VM_PURGABLE_STATE_MASK;
b0d623f7
A
6169 if (new_state == VM_PURGABLE_VOLATILE &&
6170 object->volatile_empty) {
6171 new_state = VM_PURGABLE_EMPTY;
6172 }
6173
2d21ac55
A
6174 switch (new_state) {
6175 case VM_PURGABLE_DENY:
91447636 6176 case VM_PURGABLE_NONVOLATILE:
2d21ac55
A
6177 object->purgable = new_state;
6178
b0d623f7
A
6179 if (old_state == VM_PURGABLE_VOLATILE) {
6180 unsigned int delta;
6181
6182 assert(object->resident_page_count >=
6183 object->wired_page_count);
6184 delta = (object->resident_page_count -
6185 object->wired_page_count);
6186
6187 assert(vm_page_purgeable_count >= delta);
6188
6189 if (delta != 0) {
6190 OSAddAtomic(-delta,
6191 (SInt32 *)&vm_page_purgeable_count);
6192 }
6193 if (object->wired_page_count != 0) {
6194 assert(vm_page_purgeable_wired_count >=
6195 object->wired_page_count);
6196 OSAddAtomic(-object->wired_page_count,
6197 (SInt32 *)&vm_page_purgeable_wired_count);
6198 }
6199
2d21ac55 6200 vm_page_lock_queues();
b0d623f7
A
6201
6202 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
6203 purgeable_q_t queue = vm_purgeable_object_remove(object);
6204 assert(queue);
6205
6206 vm_purgeable_token_delete_first(queue);
6207 assert(queue->debug_count_objects>=0);
6208
2d21ac55 6209 vm_page_unlock_queues();
91447636 6210 }
91447636
A
6211 break;
6212
6213 case VM_PURGABLE_VOLATILE:
b0d623f7
A
6214 if (object->volatile_fault) {
6215 vm_page_t p;
6216 int refmod;
6217
6218 queue_iterate(&object->memq, p, vm_page_t, listq) {
6219 if (p->busy ||
6220 VM_PAGE_WIRED(p) ||
6221 p->fictitious) {
6222 continue;
6223 }
6224 refmod = pmap_disconnect(p->phys_page);
6225 if ((refmod & VM_MEM_MODIFIED) &&
6226 !p->dirty) {
6227 p->dirty = TRUE;
6228 }
6229 }
6230 }
6231
593a1d5f
A
6232 if (old_state == VM_PURGABLE_EMPTY &&
6233 object->resident_page_count == 0)
2d21ac55 6234 break;
b0d623f7 6235
2d21ac55
A
6236 purgeable_q_t queue;
6237
6238 /* find the correct queue */
6239 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
593a1d5f 6240 queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
2d21ac55
A
6241 else {
6242 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
6243 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
6244 else
6245 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
91447636 6246 }
2d21ac55 6247
593a1d5f
A
6248 if (old_state == VM_PURGABLE_NONVOLATILE ||
6249 old_state == VM_PURGABLE_EMPTY) {
b0d623f7
A
6250 unsigned int delta;
6251
2d21ac55
A
6252 /* try to add token... this can fail */
6253 vm_page_lock_queues();
91447636 6254
2d21ac55
A
6255 kern_return_t result = vm_purgeable_token_add(queue);
6256 if (result != KERN_SUCCESS) {
6257 vm_page_unlock_queues();
6258 return result;
91447636 6259 }
2d21ac55
A
6260 vm_page_unlock_queues();
6261
b0d623f7
A
6262 assert(object->resident_page_count >=
6263 object->wired_page_count);
6264 delta = (object->resident_page_count -
6265 object->wired_page_count);
6266
6267 if (delta != 0) {
6268 OSAddAtomic(delta,
6269 &vm_page_purgeable_count);
6270 }
6271 if (object->wired_page_count != 0) {
6272 OSAddAtomic(object->wired_page_count,
6273 &vm_page_purgeable_wired_count);
6274 }
6275
2d21ac55
A
6276 object->purgable = new_state;
6277
6278 /* object should not be on a queue */
6279 assert(object->objq.next == NULL && object->objq.prev == NULL);
91447636 6280 }
2d21ac55
A
6281 else if (old_state == VM_PURGABLE_VOLATILE) {
6282 /*
6283 * if reassigning priorities / purgeable groups, we don't change the
6284 * token queue. So moving priorities will not make pages stay around longer.
6285 * Reasoning is that the algorithm gives most priority to the most important
6286 * object. If a new token is added, the most important object' priority is boosted.
6287 * This biases the system already for purgeable queues that move a lot.
6288 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
6289 */
6290 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
6291
6292 purgeable_q_t old_queue=vm_purgeable_object_remove(object);
6293 assert(old_queue);
6294
6295 if (old_queue != queue) {
6296 kern_return_t result;
6297
6298 /* Changing queue. Have to move token. */
6299 vm_page_lock_queues();
6300 vm_purgeable_token_delete_first(old_queue);
6301 result = vm_purgeable_token_add(queue);
6302 vm_page_unlock_queues();
91447636 6303
2d21ac55
A
6304 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */
6305 }
6306 };
6307 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT );
6308
6309 assert(queue->debug_count_objects>=0);
6310
91447636
A
6311 break;
6312
6313
6314 case VM_PURGABLE_EMPTY:
b0d623f7
A
6315 if (object->volatile_fault) {
6316 vm_page_t p;
6317 int refmod;
6318
6319 queue_iterate(&object->memq, p, vm_page_t, listq) {
6320 if (p->busy ||
6321 VM_PAGE_WIRED(p) ||
6322 p->fictitious) {
6323 continue;
6324 }
6325 refmod = pmap_disconnect(p->phys_page);
6326 if ((refmod & VM_MEM_MODIFIED) &&
6327 !p->dirty) {
6328 p->dirty = TRUE;
6329 }
2d21ac55 6330 }
b0d623f7
A
6331 }
6332
6333 if (old_state != new_state) {
6334 assert(old_state == VM_PURGABLE_NONVOLATILE ||
6335 old_state == VM_PURGABLE_VOLATILE);
6336 if (old_state == VM_PURGABLE_VOLATILE) {
6337 purgeable_q_t old_queue;
2d21ac55 6338
b0d623f7
A
6339 /* object should be on a queue */
6340 assert(object->objq.next != NULL &&
6341 object->objq.prev != NULL);
6342 old_queue = vm_purgeable_object_remove(object);
6343 assert(old_queue);
2d21ac55 6344 vm_page_lock_queues();
b0d623f7
A
6345 vm_purgeable_token_delete_first(old_queue);
6346 vm_page_unlock_queues();
2d21ac55
A
6347 }
6348 (void) vm_object_purge(object);
91447636 6349 }
91447636
A
6350 break;
6351
6352 }
6353 *state = old_state;
6354
6355 return KERN_SUCCESS;
6356}
0b4e3aa0
A
6357
6358#if TASK_SWAPPER
6359/*
6360 * vm_object_res_deallocate
6361 *
6362 * (recursively) decrement residence counts on vm objects and their shadows.
6363 * Called from vm_object_deallocate and when swapping out an object.
6364 *
6365 * The object is locked, and remains locked throughout the function,
6366 * even as we iterate down the shadow chain. Locks on intermediate objects
6367 * will be dropped, but not the original object.
6368 *
6369 * NOTE: this function used to use recursion, rather than iteration.
6370 */
6371
6372__private_extern__ void
6373vm_object_res_deallocate(
6374 vm_object_t object)
6375{
6376 vm_object_t orig_object = object;
6377 /*
6378 * Object is locked so it can be called directly
6379 * from vm_object_deallocate. Original object is never
6380 * unlocked.
6381 */
6382 assert(object->res_count > 0);
6383 while (--object->res_count == 0) {
6384 assert(object->ref_count >= object->res_count);
6385 vm_object_deactivate_all_pages(object);
6386 /* iterate on shadow, if present */
6387 if (object->shadow != VM_OBJECT_NULL) {
6388 vm_object_t tmp_object = object->shadow;
6389 vm_object_lock(tmp_object);
6390 if (object != orig_object)
6391 vm_object_unlock(object);
6392 object = tmp_object;
6393 assert(object->res_count > 0);
6394 } else
6395 break;
6396 }
6397 if (object != orig_object)
1c79356b 6398 vm_object_unlock(object);
0b4e3aa0
A
6399}
6400
6401/*
6402 * vm_object_res_reference
6403 *
6404 * Internal function to increment residence count on a vm object
6405 * and its shadows. It is called only from vm_object_reference, and
6406 * when swapping in a vm object, via vm_map_swap.
6407 *
6408 * The object is locked, and remains locked throughout the function,
6409 * even as we iterate down the shadow chain. Locks on intermediate objects
6410 * will be dropped, but not the original object.
6411 *
6412 * NOTE: this function used to use recursion, rather than iteration.
6413 */
6414
6415__private_extern__ void
6416vm_object_res_reference(
6417 vm_object_t object)
6418{
6419 vm_object_t orig_object = object;
6420 /*
6421 * Object is locked, so this can be called directly
6422 * from vm_object_reference. This lock is never released.
6423 */
6424 while ((++object->res_count == 1) &&
6425 (object->shadow != VM_OBJECT_NULL)) {
6426 vm_object_t tmp_object = object->shadow;
6427
6428 assert(object->ref_count >= object->res_count);
6429 vm_object_lock(tmp_object);
6430 if (object != orig_object)
6431 vm_object_unlock(object);
6432 object = tmp_object;
1c79356b 6433 }
0b4e3aa0
A
6434 if (object != orig_object)
6435 vm_object_unlock(object);
6436 assert(orig_object->ref_count >= orig_object->res_count);
1c79356b 6437}
0b4e3aa0
A
6438#endif /* TASK_SWAPPER */
6439
6440/*
6441 * vm_object_reference:
6442 *
6443 * Gets another reference to the given object.
6444 */
6445#ifdef vm_object_reference
6446#undef vm_object_reference
6447#endif
6448__private_extern__ void
6449vm_object_reference(
6450 register vm_object_t object)
6451{
6452 if (object == VM_OBJECT_NULL)
6453 return;
6454
6455 vm_object_lock(object);
6456 assert(object->ref_count > 0);
6457 vm_object_reference_locked(object);
6458 vm_object_unlock(object);
6459}
6460
1c79356b
A
6461#ifdef MACH_BSD
6462/*
6463 * Scale the vm_object_cache
6464 * This is required to make sure that the vm_object_cache is big
6465 * enough to effectively cache the mapped file.
6466 * This is really important with UBC as all the regular file vnodes
6467 * have memory object associated with them. Havving this cache too
6468 * small results in rapid reclaim of vnodes and hurts performance a LOT!
6469 *
6470 * This is also needed as number of vnodes can be dynamically scaled.
6471 */
6472kern_return_t
91447636
A
6473adjust_vm_object_cache(
6474 __unused vm_size_t oval,
b0d623f7 6475 __unused vm_size_t nval)
1c79356b 6476{
b0d623f7 6477#if VM_OBJECT_CACHE
1c79356b
A
6478 vm_object_cached_max = nval;
6479 vm_object_cache_trim(FALSE);
b0d623f7 6480#endif
1c79356b
A
6481 return (KERN_SUCCESS);
6482}
6483#endif /* MACH_BSD */
6484
91447636
A
6485
6486/*
6487 * vm_object_transpose
6488 *
6489 * This routine takes two VM objects of the same size and exchanges
6490 * their backing store.
6491 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
6492 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
6493 *
6494 * The VM objects must not be locked by caller.
6495 */
b0d623f7 6496unsigned int vm_object_transpose_count = 0;
91447636
A
6497kern_return_t
6498vm_object_transpose(
6499 vm_object_t object1,
6500 vm_object_t object2,
6501 vm_object_size_t transpose_size)
6502{
6503 vm_object_t tmp_object;
6504 kern_return_t retval;
6505 boolean_t object1_locked, object2_locked;
91447636
A
6506 vm_page_t page;
6507 vm_object_offset_t page_offset;
b0d623f7
A
6508 lck_mtx_t *hash_lck;
6509 vm_object_hash_entry_t hash_entry;
91447636
A
6510
6511 tmp_object = VM_OBJECT_NULL;
6512 object1_locked = FALSE; object2_locked = FALSE;
91447636
A
6513
6514 if (object1 == object2 ||
6515 object1 == VM_OBJECT_NULL ||
6516 object2 == VM_OBJECT_NULL) {
6517 /*
6518 * If the 2 VM objects are the same, there's
6519 * no point in exchanging their backing store.
6520 */
6521 retval = KERN_INVALID_VALUE;
6522 goto done;
6523 }
6524
b0d623f7
A
6525 /*
6526 * Since we need to lock both objects at the same time,
6527 * make sure we always lock them in the same order to
6528 * avoid deadlocks.
6529 */
6530 if (object1 > object2) {
6531 tmp_object = object1;
6532 object1 = object2;
6533 object2 = tmp_object;
6534 }
6535
6536 /*
6537 * Allocate a temporary VM object to hold object1's contents
6538 * while we copy object2 to object1.
6539 */
6540 tmp_object = vm_object_allocate(transpose_size);
6541 vm_object_lock(tmp_object);
6542 tmp_object->can_persist = FALSE;
6543
6544
6545 /*
6546 * Grab control of the 1st VM object.
6547 */
91447636
A
6548 vm_object_lock(object1);
6549 object1_locked = TRUE;
2d21ac55
A
6550 if (!object1->alive || object1->terminating ||
6551 object1->copy || object1->shadow || object1->shadowed ||
6552 object1->purgable != VM_PURGABLE_DENY) {
91447636
A
6553 /*
6554 * We don't deal with copy or shadow objects (yet).
6555 */
6556 retval = KERN_INVALID_VALUE;
6557 goto done;
6558 }
6559 /*
b0d623f7
A
6560 * We're about to mess with the object's backing store and
6561 * taking a "paging_in_progress" reference wouldn't be enough
91447636
A
6562 * to prevent any paging activity on this object, so the caller should
6563 * have "quiesced" the objects beforehand, via a UPL operation with
6564 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
6565 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
b0d623f7
A
6566 *
6567 * Wait for any paging operation to complete (but only paging, not
6568 * other kind of activities not linked to the pager). After we're
6569 * statisfied that there's no more paging in progress, we keep the
6570 * object locked, to guarantee that no one tries to access its pager.
91447636 6571 */
b0d623f7 6572 vm_object_paging_only_wait(object1, THREAD_UNINT);
91447636
A
6573
6574 /*
6575 * Same as above for the 2nd object...
6576 */
6577 vm_object_lock(object2);
6578 object2_locked = TRUE;
2d21ac55
A
6579 if (! object2->alive || object2->terminating ||
6580 object2->copy || object2->shadow || object2->shadowed ||
6581 object2->purgable != VM_PURGABLE_DENY) {
91447636
A
6582 retval = KERN_INVALID_VALUE;
6583 goto done;
6584 }
b0d623f7 6585 vm_object_paging_only_wait(object2, THREAD_UNINT);
91447636 6586
91447636
A
6587
6588 if (object1->size != object2->size ||
6589 object1->size != transpose_size) {
6590 /*
6591 * If the 2 objects don't have the same size, we can't
6592 * exchange their backing stores or one would overflow.
6593 * If their size doesn't match the caller's
6594 * "transpose_size", we can't do it either because the
6595 * transpose operation will affect the entire span of
6596 * the objects.
6597 */
6598 retval = KERN_INVALID_VALUE;
6599 goto done;
6600 }
6601
6602
6603 /*
6604 * Transpose the lists of resident pages.
2d21ac55 6605 * This also updates the resident_page_count and the memq_hint.
91447636
A
6606 */
6607 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
6608 /*
6609 * No pages in object1, just transfer pages
6610 * from object2 to object1. No need to go through
6611 * an intermediate object.
6612 */
6613 while (!queue_empty(&object2->memq)) {
6614 page = (vm_page_t) queue_first(&object2->memq);
2d21ac55 6615 vm_page_rename(page, object1, page->offset, FALSE);
91447636
A
6616 }
6617 assert(queue_empty(&object2->memq));
6618 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
6619 /*
6620 * No pages in object2, just transfer pages
6621 * from object1 to object2. No need to go through
6622 * an intermediate object.
6623 */
6624 while (!queue_empty(&object1->memq)) {
6625 page = (vm_page_t) queue_first(&object1->memq);
2d21ac55 6626 vm_page_rename(page, object2, page->offset, FALSE);
91447636
A
6627 }
6628 assert(queue_empty(&object1->memq));
6629 } else {
6630 /* transfer object1's pages to tmp_object */
91447636
A
6631 while (!queue_empty(&object1->memq)) {
6632 page = (vm_page_t) queue_first(&object1->memq);
6633 page_offset = page->offset;
b0d623f7 6634 vm_page_remove(page, TRUE);
91447636
A
6635 page->offset = page_offset;
6636 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
6637 }
91447636
A
6638 assert(queue_empty(&object1->memq));
6639 /* transfer object2's pages to object1 */
6640 while (!queue_empty(&object2->memq)) {
6641 page = (vm_page_t) queue_first(&object2->memq);
2d21ac55 6642 vm_page_rename(page, object1, page->offset, FALSE);
91447636
A
6643 }
6644 assert(queue_empty(&object2->memq));
6645 /* transfer tmp_object's pages to object1 */
6646 while (!queue_empty(&tmp_object->memq)) {
6647 page = (vm_page_t) queue_first(&tmp_object->memq);
6648 queue_remove(&tmp_object->memq, page,
6649 vm_page_t, listq);
6650 vm_page_insert(page, object2, page->offset);
6651 }
6652 assert(queue_empty(&tmp_object->memq));
6653 }
6654
91447636
A
6655#define __TRANSPOSE_FIELD(field) \
6656MACRO_BEGIN \
6657 tmp_object->field = object1->field; \
6658 object1->field = object2->field; \
6659 object2->field = tmp_object->field; \
6660MACRO_END
6661
b0d623f7 6662 /* "Lock" refers to the object not its contents */
2d21ac55
A
6663 /* "size" should be identical */
6664 assert(object1->size == object2->size);
b0d623f7 6665 /* "memq_hint" was updated above when transposing pages */
2d21ac55
A
6666 /* "ref_count" refers to the object not its contents */
6667#if TASK_SWAPPER
6668 /* "res_count" refers to the object not its contents */
6669#endif
6670 /* "resident_page_count" was updated above when transposing pages */
b0d623f7
A
6671 /* "wired_page_count" was updated above when transposing pages */
6672 /* "reusable_page_count" was updated above when transposing pages */
2d21ac55 6673 /* there should be no "copy" */
91447636
A
6674 assert(!object1->copy);
6675 assert(!object2->copy);
2d21ac55 6676 /* there should be no "shadow" */
91447636
A
6677 assert(!object1->shadow);
6678 assert(!object2->shadow);
91447636
A
6679 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
6680 __TRANSPOSE_FIELD(pager);
6681 __TRANSPOSE_FIELD(paging_offset);
91447636
A
6682 __TRANSPOSE_FIELD(pager_control);
6683 /* update the memory_objects' pointers back to the VM objects */
6684 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
6685 memory_object_control_collapse(object1->pager_control,
6686 object1);
6687 }
6688 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
6689 memory_object_control_collapse(object2->pager_control,
6690 object2);
6691 }
2d21ac55
A
6692 __TRANSPOSE_FIELD(copy_strategy);
6693 /* "paging_in_progress" refers to the object not its contents */
b0d623f7
A
6694 assert(!object1->paging_in_progress);
6695 assert(!object2->paging_in_progress);
6696 assert(object1->activity_in_progress);
6697 assert(object2->activity_in_progress);
2d21ac55 6698 /* "all_wanted" refers to the object not its contents */
91447636
A
6699 __TRANSPOSE_FIELD(pager_created);
6700 __TRANSPOSE_FIELD(pager_initialized);
6701 __TRANSPOSE_FIELD(pager_ready);
6702 __TRANSPOSE_FIELD(pager_trusted);
2d21ac55 6703 __TRANSPOSE_FIELD(can_persist);
91447636
A
6704 __TRANSPOSE_FIELD(internal);
6705 __TRANSPOSE_FIELD(temporary);
6706 __TRANSPOSE_FIELD(private);
6707 __TRANSPOSE_FIELD(pageout);
2d21ac55
A
6708 /* "alive" should be set */
6709 assert(object1->alive);
6710 assert(object2->alive);
6711 /* "purgeable" should be non-purgeable */
6712 assert(object1->purgable == VM_PURGABLE_DENY);
6713 assert(object2->purgable == VM_PURGABLE_DENY);
6714 /* "shadowed" refers to the the object not its contents */
6715 __TRANSPOSE_FIELD(silent_overwrite);
6716 __TRANSPOSE_FIELD(advisory_pageout);
91447636 6717 __TRANSPOSE_FIELD(true_share);
2d21ac55
A
6718 /* "terminating" should not be set */
6719 assert(!object1->terminating);
6720 assert(!object2->terminating);
6721 __TRANSPOSE_FIELD(named);
6722 /* "shadow_severed" refers to the object not its contents */
91447636
A
6723 __TRANSPOSE_FIELD(phys_contiguous);
6724 __TRANSPOSE_FIELD(nophyscache);
b0d623f7
A
6725 /* "cached_list.next" points to transposed object */
6726 object1->cached_list.next = (queue_entry_t) object2;
6727 object2->cached_list.next = (queue_entry_t) object1;
6728 /* "cached_list.prev" should be NULL */
2d21ac55 6729 assert(object1->cached_list.prev == NULL);
2d21ac55 6730 assert(object2->cached_list.prev == NULL);
2d21ac55
A
6731 /* "msr_q" is linked to the object not its contents */
6732 assert(queue_empty(&object1->msr_q));
6733 assert(queue_empty(&object2->msr_q));
91447636
A
6734 __TRANSPOSE_FIELD(last_alloc);
6735 __TRANSPOSE_FIELD(sequential);
2d21ac55
A
6736 __TRANSPOSE_FIELD(pages_created);
6737 __TRANSPOSE_FIELD(pages_used);
6738#if MACH_PAGEMAP
91447636 6739 __TRANSPOSE_FIELD(existence_map);
2d21ac55 6740#endif
91447636 6741 __TRANSPOSE_FIELD(cow_hint);
2d21ac55
A
6742#if MACH_ASSERT
6743 __TRANSPOSE_FIELD(paging_object);
6744#endif
91447636 6745 __TRANSPOSE_FIELD(wimg_bits);
2d21ac55 6746 __TRANSPOSE_FIELD(code_signed);
b0d623f7
A
6747 if (object1->hashed) {
6748 hash_lck = vm_object_hash_lock_spin(object2->pager);
6749 hash_entry = vm_object_hash_lookup(object2->pager, FALSE);
6750 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
6751 hash_entry->object = object2;
6752 vm_object_hash_unlock(hash_lck);
6753 }
6754 if (object2->hashed) {
6755 hash_lck = vm_object_hash_lock_spin(object1->pager);
6756 hash_entry = vm_object_hash_lookup(object1->pager, FALSE);
6757 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
6758 hash_entry->object = object1;
6759 vm_object_hash_unlock(hash_lck);
6760 }
6761 __TRANSPOSE_FIELD(hashed);
6762 object1->transposed = TRUE;
6763 object2->transposed = TRUE;
6764 __TRANSPOSE_FIELD(mapping_in_progress);
6765 __TRANSPOSE_FIELD(volatile_empty);
6766 __TRANSPOSE_FIELD(volatile_fault);
6767 __TRANSPOSE_FIELD(all_reusable);
6768 assert(object1->blocked_access);
6769 assert(object2->blocked_access);
6770 assert(object1->__object2_unused_bits == 0);
6771 assert(object2->__object2_unused_bits == 0);
6772#if UPL_DEBUG
2d21ac55
A
6773 /* "uplq" refers to the object not its contents (see upl_transpose()) */
6774#endif
b0d623f7
A
6775 assert(object1->objq.next == NULL);
6776 assert(object1->objq.prev == NULL);
6777 assert(object2->objq.next == NULL);
6778 assert(object2->objq.prev == NULL);
91447636
A
6779
6780#undef __TRANSPOSE_FIELD
6781
6782 retval = KERN_SUCCESS;
6783
6784done:
6785 /*
6786 * Cleanup.
6787 */
6788 if (tmp_object != VM_OBJECT_NULL) {
91447636
A
6789 vm_object_unlock(tmp_object);
6790 /*
6791 * Re-initialize the temporary object to avoid
6792 * deallocating a real pager.
6793 */
6794 _vm_object_allocate(transpose_size, tmp_object);
6795 vm_object_deallocate(tmp_object);
6796 tmp_object = VM_OBJECT_NULL;
6797 }
6798
6799 if (object1_locked) {
6800 vm_object_unlock(object1);
6801 object1_locked = FALSE;
6802 }
6803 if (object2_locked) {
6804 vm_object_unlock(object2);
6805 object2_locked = FALSE;
6806 }
b0d623f7
A
6807
6808 vm_object_transpose_count++;
91447636
A
6809
6810 return retval;
6811}
0c530ab8
A
6812
6813
2d21ac55 6814/*
b0d623f7 6815 * vm_object_cluster_size
2d21ac55
A
6816 *
6817 * Determine how big a cluster we should issue an I/O for...
6818 *
6819 * Inputs: *start == offset of page needed
6820 * *length == maximum cluster pager can handle
6821 * Outputs: *start == beginning offset of cluster
6822 * *length == length of cluster to try
6823 *
6824 * The original *start will be encompassed by the cluster
6825 *
6826 */
6827extern int speculative_reads_disabled;
b0d623f7
A
6828#if CONFIG_EMBEDDED
6829unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
6830unsigned int preheat_pages_min = 8;
6831unsigned int preheat_pages_mult = 4;
6832#else
6833unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
6834unsigned int preheat_pages_min = 8;
6835unsigned int preheat_pages_mult = 4;
6836#endif
2d21ac55 6837
b0d623f7
A
6838uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1];
6839uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1];
2d21ac55 6840
2d21ac55
A
6841
6842__private_extern__ void
6843vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
b0d623f7 6844 vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming)
2d21ac55
A
6845{
6846 vm_size_t pre_heat_size;
6847 vm_size_t tail_size;
6848 vm_size_t head_size;
6849 vm_size_t max_length;
6850 vm_size_t cluster_size;
6851 vm_object_offset_t object_size;
6852 vm_object_offset_t orig_start;
6853 vm_object_offset_t target_start;
6854 vm_object_offset_t offset;
6855 vm_behavior_t behavior;
6856 boolean_t look_behind = TRUE;
6857 boolean_t look_ahead = TRUE;
b0d623f7 6858 uint32_t throttle_limit;
2d21ac55
A
6859 int sequential_run;
6860 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
b0d623f7
A
6861 unsigned int max_ph_size;
6862 unsigned int min_ph_size;
6863 unsigned int ph_mult;
2d21ac55
A
6864
6865 assert( !(*length & PAGE_MASK));
6866 assert( !(*start & PAGE_MASK_64));
6867
b0d623f7
A
6868 if ( (ph_mult = preheat_pages_mult) < 1 )
6869 ph_mult = 1;
6870 if ( (min_ph_size = preheat_pages_min) < 1 )
6871 min_ph_size = 1;
6872 if ( (max_ph_size = preheat_pages_max) > MAX_UPL_TRANSFER )
6873 max_ph_size = MAX_UPL_TRANSFER;
6874
6875 if ( (max_length = *length) > (max_ph_size * PAGE_SIZE) )
6876 max_length = (max_ph_size * PAGE_SIZE);
6877
2d21ac55
A
6878 /*
6879 * we'll always return a cluster size of at least
6880 * 1 page, since the original fault must always
6881 * be processed
6882 */
6883 *length = PAGE_SIZE;
b0d623f7 6884 *io_streaming = 0;
2d21ac55
A
6885
6886 if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
6887 /*
6888 * no cluster... just fault the page in
6889 */
6890 return;
6891 }
6892 orig_start = *start;
6893 target_start = orig_start;
b0d623f7 6894 cluster_size = round_page(fault_info->cluster_size);
2d21ac55
A
6895 behavior = fault_info->behavior;
6896
6897 vm_object_lock(object);
6898
6899 if (object->internal)
6900 object_size = object->size;
6901 else if (object->pager != MEMORY_OBJECT_NULL)
6902 vnode_pager_get_object_size(object->pager, &object_size);
6903 else
6904 goto out; /* pager is gone for this object, nothing more to do */
6905
6906 object_size = round_page_64(object_size);
6907
6908 if (orig_start >= object_size) {
6909 /*
6910 * fault occurred beyond the EOF...
6911 * we need to punt w/o changing the
6912 * starting offset
6913 */
6914 goto out;
6915 }
6916 if (object->pages_used > object->pages_created) {
6917 /*
6918 * must have wrapped our 32 bit counters
6919 * so reset
6920 */
6921 object->pages_used = object->pages_created = 0;
6922 }
6923 if ((sequential_run = object->sequential)) {
6924 if (sequential_run < 0) {
6925 sequential_behavior = VM_BEHAVIOR_RSEQNTL;
6926 sequential_run = 0 - sequential_run;
6927 } else {
6928 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
6929 }
b0d623f7 6930
2d21ac55
A
6931 }
6932 switch(behavior) {
6933
6934 default:
6935 behavior = VM_BEHAVIOR_DEFAULT;
6936
6937 case VM_BEHAVIOR_DEFAULT:
6938 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
6939 goto out;
6940
b0d623f7 6941 if (sequential_run >= (3 * PAGE_SIZE)) {
2d21ac55
A
6942 pre_heat_size = sequential_run + PAGE_SIZE;
6943
b0d623f7 6944 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL)
2d21ac55
A
6945 look_behind = FALSE;
6946 else
6947 look_ahead = FALSE;
b0d623f7
A
6948
6949 *io_streaming = 1;
2d21ac55 6950 } else {
2d21ac55 6951
b0d623f7 6952 if (object->pages_created < 32 * ph_mult) {
2d21ac55
A
6953 /*
6954 * prime the pump
6955 */
b0d623f7 6956 pre_heat_size = PAGE_SIZE * 8 * ph_mult;
2d21ac55
A
6957 break;
6958 }
b0d623f7
A
6959 /*
6960 * Linear growth in PH size: The maximum size is max_length...
6961 * this cacluation will result in a size that is neither a
6962 * power of 2 nor a multiple of PAGE_SIZE... so round
6963 * it up to the nearest PAGE_SIZE boundary
6964 */
6965 pre_heat_size = (ph_mult * (max_length * object->pages_used) / object->pages_created);
6966
6967 if (pre_heat_size < PAGE_SIZE * min_ph_size)
6968 pre_heat_size = PAGE_SIZE * min_ph_size;
6969 else
6970 pre_heat_size = round_page(pre_heat_size);
2d21ac55
A
6971 }
6972 break;
6973
6974 case VM_BEHAVIOR_RANDOM:
6975 if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
6976 goto out;
6977 break;
6978
6979 case VM_BEHAVIOR_SEQUENTIAL:
6980 if ((pre_heat_size = cluster_size) == 0)
6981 pre_heat_size = sequential_run + PAGE_SIZE;
6982 look_behind = FALSE;
b0d623f7 6983 *io_streaming = 1;
2d21ac55
A
6984
6985 break;
6986
6987 case VM_BEHAVIOR_RSEQNTL:
6988 if ((pre_heat_size = cluster_size) == 0)
6989 pre_heat_size = sequential_run + PAGE_SIZE;
6990 look_ahead = FALSE;
b0d623f7 6991 *io_streaming = 1;
2d21ac55
A
6992
6993 break;
6994
6995 }
b0d623f7
A
6996 throttle_limit = (uint32_t) max_length;
6997 assert(throttle_limit == max_length);
6998
6999 if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) {
7000 if (max_length > throttle_limit)
7001 max_length = throttle_limit;
7002 }
2d21ac55
A
7003 if (pre_heat_size > max_length)
7004 pre_heat_size = max_length;
7005
b0d623f7
A
7006 if (behavior == VM_BEHAVIOR_DEFAULT) {
7007 if (vm_page_free_count < vm_page_throttle_limit)
7008 pre_heat_size = trunc_page(pre_heat_size / 8);
7009 else if (vm_page_free_count < vm_page_free_target)
7010 pre_heat_size = trunc_page(pre_heat_size / 2);
2d21ac55 7011
b0d623f7
A
7012 if (pre_heat_size <= PAGE_SIZE)
7013 goto out;
7014 }
2d21ac55 7015 if (look_ahead == TRUE) {
b0d623f7
A
7016 if (look_behind == TRUE) {
7017 /*
7018 * if we get here its due to a random access...
7019 * so we want to center the original fault address
7020 * within the cluster we will issue... make sure
7021 * to calculate 'head_size' as a multiple of PAGE_SIZE...
7022 * 'pre_heat_size' is a multiple of PAGE_SIZE but not
7023 * necessarily an even number of pages so we need to truncate
7024 * the result to a PAGE_SIZE boundary
7025 */
7026 head_size = trunc_page(pre_heat_size / 2);
2d21ac55 7027
b0d623f7
A
7028 if (target_start > head_size)
7029 target_start -= head_size;
7030 else
7031 target_start = 0;
2d21ac55 7032
b0d623f7
A
7033 /*
7034 * 'target_start' at this point represents the beginning offset
7035 * of the cluster we are considering... 'orig_start' will be in
7036 * the center of this cluster if we didn't have to clip the start
7037 * due to running into the start of the file
7038 */
7039 }
7040 if ((target_start + pre_heat_size) > object_size)
7041 pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start));
7042 /*
7043 * at this point caclulate the number of pages beyond the original fault
7044 * address that we want to consider... this is guaranteed not to extend beyond
7045 * the current EOF...
7046 */
7047 assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start));
7048 tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE;
2d21ac55
A
7049 } else {
7050 if (pre_heat_size > target_start)
b0d623f7 7051 pre_heat_size = (vm_size_t) target_start; /* XXX: 32-bit vs 64-bit ? Joe ? */
2d21ac55
A
7052 tail_size = 0;
7053 }
b0d623f7
A
7054 assert( !(target_start & PAGE_MASK_64));
7055 assert( !(pre_heat_size & PAGE_MASK));
7056
2d21ac55
A
7057 pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
7058
7059 if (pre_heat_size <= PAGE_SIZE)
7060 goto out;
7061
7062 if (look_behind == TRUE) {
7063 /*
7064 * take a look at the pages before the original
b0d623f7
A
7065 * faulting offset... recalculate this in case
7066 * we had to clip 'pre_heat_size' above to keep
7067 * from running past the EOF.
2d21ac55
A
7068 */
7069 head_size = pre_heat_size - tail_size - PAGE_SIZE;
7070
7071 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
7072 /*
7073 * don't poke below the lowest offset
7074 */
7075 if (offset < fault_info->lo_offset)
7076 break;
7077 /*
7078 * for external objects and internal objects w/o an existence map
7079 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
7080 */
7081#if MACH_PAGEMAP
7082 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
7083 /*
7084 * we know for a fact that the pager can't provide the page
7085 * so don't include it or any pages beyond it in this cluster
7086 */
7087 break;
7088 }
7089#endif
7090 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
7091 /*
7092 * don't bridge resident pages
7093 */
7094 break;
7095 }
7096 *start = offset;
7097 *length += PAGE_SIZE;
7098 }
7099 }
7100 if (look_ahead == TRUE) {
7101 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
7102 /*
7103 * don't poke above the highest offset
7104 */
7105 if (offset >= fault_info->hi_offset)
7106 break;
b0d623f7
A
7107 assert(offset < object_size);
7108
2d21ac55
A
7109 /*
7110 * for external objects and internal objects w/o an existence map
7111 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
7112 */
7113#if MACH_PAGEMAP
7114 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
7115 /*
7116 * we know for a fact that the pager can't provide the page
7117 * so don't include it or any pages beyond it in this cluster
7118 */
7119 break;
7120 }
7121#endif
7122 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
7123 /*
7124 * don't bridge resident pages
7125 */
7126 break;
7127 }
7128 *length += PAGE_SIZE;
7129 }
7130 }
7131out:
b0d623f7
A
7132 if (*length > max_length)
7133 *length = max_length;
7134
2d21ac55
A
7135 pre_heat_cluster[*length / PAGE_SIZE]++;
7136
7137 vm_object_unlock(object);
7138}
7139
7140
7141/*
7142 * Allow manipulation of individual page state. This is actually part of
7143 * the UPL regimen but takes place on the VM object rather than on a UPL
7144 */
0c530ab8
A
7145
7146kern_return_t
7147vm_object_page_op(
7148 vm_object_t object,
7149 vm_object_offset_t offset,
7150 int ops,
7151 ppnum_t *phys_entry,
7152 int *flags)
7153{
7154 vm_page_t dst_page;
7155
7156 vm_object_lock(object);
7157
7158 if(ops & UPL_POP_PHYSICAL) {
7159 if(object->phys_contiguous) {
7160 if (phys_entry) {
7161 *phys_entry = (ppnum_t)
935ed37a 7162 (object->shadow_offset >> PAGE_SHIFT);
0c530ab8
A
7163 }
7164 vm_object_unlock(object);
7165 return KERN_SUCCESS;
7166 } else {
7167 vm_object_unlock(object);
7168 return KERN_INVALID_OBJECT;
7169 }
7170 }
7171 if(object->phys_contiguous) {
7172 vm_object_unlock(object);
7173 return KERN_INVALID_OBJECT;
7174 }
7175
7176 while(TRUE) {
7177 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
7178 vm_object_unlock(object);
7179 return KERN_FAILURE;
7180 }
7181
7182 /* Sync up on getting the busy bit */
7183 if((dst_page->busy || dst_page->cleaning) &&
7184 (((ops & UPL_POP_SET) &&
7185 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
7186 /* someone else is playing with the page, we will */
7187 /* have to wait */
7188 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
7189 continue;
7190 }
7191
7192 if (ops & UPL_POP_DUMP) {
2d21ac55 7193 if (dst_page->pmapped == TRUE)
0c530ab8 7194 pmap_disconnect(dst_page->phys_page);
0c530ab8 7195
b0d623f7 7196 VM_PAGE_FREE(dst_page);
0c530ab8
A
7197 break;
7198 }
7199
7200 if (flags) {
7201 *flags = 0;
7202
7203 /* Get the condition of flags before requested ops */
7204 /* are undertaken */
7205
7206 if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
7207 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
7208 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
7209 if(dst_page->absent) *flags |= UPL_POP_ABSENT;
7210 if(dst_page->busy) *flags |= UPL_POP_BUSY;
7211 }
7212
7213 /* The caller should have made a call either contingent with */
7214 /* or prior to this call to set UPL_POP_BUSY */
7215 if(ops & UPL_POP_SET) {
7216 /* The protection granted with this assert will */
7217 /* not be complete. If the caller violates the */
7218 /* convention and attempts to change page state */
7219 /* without first setting busy we may not see it */
7220 /* because the page may already be busy. However */
7221 /* if such violations occur we will assert sooner */
7222 /* or later. */
7223 assert(dst_page->busy || (ops & UPL_POP_BUSY));
7224 if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
7225 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
7226 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
7227 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
7228 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
7229 }
7230
7231 if(ops & UPL_POP_CLR) {
7232 assert(dst_page->busy);
7233 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
7234 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
7235 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
7236 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
7237 if (ops & UPL_POP_BUSY) {
7238 dst_page->busy = FALSE;
7239 PAGE_WAKEUP(dst_page);
7240 }
7241 }
7242
7243 if (dst_page->encrypted) {
7244 /*
7245 * ENCRYPTED SWAP:
7246 * We need to decrypt this encrypted page before the
7247 * caller can access its contents.
7248 * But if the caller really wants to access the page's
7249 * contents, they have to keep the page "busy".
7250 * Otherwise, the page could get recycled or re-encrypted
7251 * at any time.
7252 */
7253 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
7254 dst_page->busy) {
7255 /*
7256 * The page is stable enough to be accessed by
7257 * the caller, so make sure its contents are
7258 * not encrypted.
7259 */
7260 vm_page_decrypt(dst_page, 0);
7261 } else {
7262 /*
7263 * The page is not busy, so don't bother
7264 * decrypting it, since anything could
7265 * happen to it between now and when the
7266 * caller wants to access it.
7267 * We should not give the caller access
7268 * to this page.
7269 */
7270 assert(!phys_entry);
7271 }
7272 }
7273
7274 if (phys_entry) {
7275 /*
7276 * The physical page number will remain valid
7277 * only if the page is kept busy.
7278 * ENCRYPTED SWAP: make sure we don't let the
7279 * caller access an encrypted page.
7280 */
7281 assert(dst_page->busy);
7282 assert(!dst_page->encrypted);
7283 *phys_entry = dst_page->phys_page;
7284 }
7285
7286 break;
7287 }
7288
7289 vm_object_unlock(object);
7290 return KERN_SUCCESS;
7291
7292}
7293
7294/*
7295 * vm_object_range_op offers performance enhancement over
7296 * vm_object_page_op for page_op functions which do not require page
7297 * level state to be returned from the call. Page_op was created to provide
7298 * a low-cost alternative to page manipulation via UPLs when only a single
7299 * page was involved. The range_op call establishes the ability in the _op
7300 * family of functions to work on multiple pages where the lack of page level
7301 * state handling allows the caller to avoid the overhead of the upl structures.
7302 */
7303
7304kern_return_t
7305vm_object_range_op(
7306 vm_object_t object,
7307 vm_object_offset_t offset_beg,
7308 vm_object_offset_t offset_end,
7309 int ops,
b0d623f7 7310 uint32_t *range)
0c530ab8
A
7311{
7312 vm_object_offset_t offset;
7313 vm_page_t dst_page;
7314
b0d623f7
A
7315 if (offset_end - offset_beg > (uint32_t) -1) {
7316 /* range is too big and would overflow "*range" */
7317 return KERN_INVALID_ARGUMENT;
7318 }
0c530ab8
A
7319 if (object->resident_page_count == 0) {
7320 if (range) {
b0d623f7 7321 if (ops & UPL_ROP_PRESENT) {
0c530ab8 7322 *range = 0;
b0d623f7
A
7323 } else {
7324 *range = (uint32_t) (offset_end - offset_beg);
7325 assert(*range == (offset_end - offset_beg));
7326 }
0c530ab8
A
7327 }
7328 return KERN_SUCCESS;
7329 }
7330 vm_object_lock(object);
7331
7332 if (object->phys_contiguous) {
7333 vm_object_unlock(object);
7334 return KERN_INVALID_OBJECT;
7335 }
7336
2d21ac55 7337 offset = offset_beg & ~PAGE_MASK_64;
0c530ab8
A
7338
7339 while (offset < offset_end) {
7340 dst_page = vm_page_lookup(object, offset);
7341 if (dst_page != VM_PAGE_NULL) {
7342 if (ops & UPL_ROP_DUMP) {
7343 if (dst_page->busy || dst_page->cleaning) {
7344 /*
7345 * someone else is playing with the
7346 * page, we will have to wait
7347 */
2d21ac55 7348 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
0c530ab8
A
7349 /*
7350 * need to relook the page up since it's
7351 * state may have changed while we slept
7352 * it might even belong to a different object
7353 * at this point
7354 */
7355 continue;
7356 }
2d21ac55 7357 if (dst_page->pmapped == TRUE)
0c530ab8 7358 pmap_disconnect(dst_page->phys_page);
0c530ab8 7359
b0d623f7 7360 VM_PAGE_FREE(dst_page);
2d21ac55 7361
b0d623f7 7362 } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent)
0c530ab8
A
7363 break;
7364 } else if (ops & UPL_ROP_PRESENT)
7365 break;
7366
7367 offset += PAGE_SIZE;
7368 }
7369 vm_object_unlock(object);
7370
2d21ac55
A
7371 if (range) {
7372 if (offset > offset_end)
7373 offset = offset_end;
b0d623f7
A
7374 if(offset > offset_beg) {
7375 *range = (uint32_t) (offset - offset_beg);
7376 assert(*range == (offset - offset_beg));
7377 } else {
7378 *range = 0;
7379 }
2d21ac55 7380 }
0c530ab8
A
7381 return KERN_SUCCESS;
7382}
2d21ac55
A
7383
7384
7385uint32_t scan_object_collision = 0;
7386
7387void
7388vm_object_lock(vm_object_t object)
7389{
7390 if (object == vm_pageout_scan_wants_object) {
7391 scan_object_collision++;
7392 mutex_pause(2);
7393 }
7394 lck_rw_lock_exclusive(&object->Lock);
7395}
7396
7397boolean_t
b0d623f7 7398vm_object_lock_avoid(vm_object_t object)
2d21ac55
A
7399{
7400 if (object == vm_pageout_scan_wants_object) {
7401 scan_object_collision++;
b0d623f7 7402 return TRUE;
2d21ac55 7403 }
b0d623f7
A
7404 return FALSE;
7405}
7406
7407boolean_t
7408_vm_object_lock_try(vm_object_t object)
7409{
2d21ac55
A
7410 return (lck_rw_try_lock_exclusive(&object->Lock));
7411}
7412
b0d623f7
A
7413boolean_t
7414vm_object_lock_try(vm_object_t object)
7415{
0b4c1975
A
7416 // called from hibernate path so check before blocking
7417 if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled()) {
b0d623f7
A
7418 mutex_pause(2);
7419 }
7420 return _vm_object_lock_try(object);
7421}
2d21ac55
A
7422void
7423vm_object_lock_shared(vm_object_t object)
7424{
b0d623f7 7425 if (vm_object_lock_avoid(object)) {
2d21ac55
A
7426 mutex_pause(2);
7427 }
7428 lck_rw_lock_shared(&object->Lock);
7429}
7430
7431boolean_t
7432vm_object_lock_try_shared(vm_object_t object)
7433{
b0d623f7 7434 if (vm_object_lock_avoid(object)) {
2d21ac55
A
7435 mutex_pause(2);
7436 }
7437 return (lck_rw_try_lock_shared(&object->Lock));
7438}