]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
7f48b127e1cf6cce597b596fc6e9c1deecf70c05
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Virtual memory object module.
63 */
64
65 #include <debug.h>
66 #include <mach_pagemap.h>
67 #include <task_swapper.h>
68
69 #include <mach/mach_types.h>
70 #include <mach/memory_object.h>
71 #include <mach/memory_object_default.h>
72 #include <mach/memory_object_control_server.h>
73 #include <mach/vm_param.h>
74
75 #include <ipc/ipc_types.h>
76 #include <ipc/ipc_port.h>
77
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/lock.h>
81 #include <kern/queue.h>
82 #include <kern/xpr.h>
83 #include <kern/zalloc.h>
84 #include <kern/host.h>
85 #include <kern/host_statistics.h>
86 #include <kern/processor.h>
87 #include <kern/misc_protos.h>
88
89 #include <vm/memory_object.h>
90 #include <vm/vm_fault.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_page.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_protos.h>
96 #include <vm/vm_purgeable_internal.h>
97
98 #if CONFIG_EMBEDDED
99 #include <sys/kern_memorystatus.h>
100 #endif
101
102 /*
103 * Virtual memory objects maintain the actual data
104 * associated with allocated virtual memory. A given
105 * page of memory exists within exactly one object.
106 *
107 * An object is only deallocated when all "references"
108 * are given up.
109 *
110 * Associated with each object is a list of all resident
111 * memory pages belonging to that object; this list is
112 * maintained by the "vm_page" module, but locked by the object's
113 * lock.
114 *
115 * Each object also records the memory object reference
116 * that is used by the kernel to request and write
117 * back data (the memory object, field "pager"), etc...
118 *
119 * Virtual memory objects are allocated to provide
120 * zero-filled memory (vm_allocate) or map a user-defined
121 * memory object into a virtual address space (vm_map).
122 *
123 * Virtual memory objects that refer to a user-defined
124 * memory object are called "permanent", because all changes
125 * made in virtual memory are reflected back to the
126 * memory manager, which may then store it permanently.
127 * Other virtual memory objects are called "temporary",
128 * meaning that changes need be written back only when
129 * necessary to reclaim pages, and that storage associated
130 * with the object can be discarded once it is no longer
131 * mapped.
132 *
133 * A permanent memory object may be mapped into more
134 * than one virtual address space. Moreover, two threads
135 * may attempt to make the first mapping of a memory
136 * object concurrently. Only one thread is allowed to
137 * complete this mapping; all others wait for the
138 * "pager_initialized" field is asserted, indicating
139 * that the first thread has initialized all of the
140 * necessary fields in the virtual memory object structure.
141 *
142 * The kernel relies on a *default memory manager* to
143 * provide backing storage for the zero-filled virtual
144 * memory objects. The pager memory objects associated
145 * with these temporary virtual memory objects are only
146 * requested from the default memory manager when it
147 * becomes necessary. Virtual memory objects
148 * that depend on the default memory manager are called
149 * "internal". The "pager_created" field is provided to
150 * indicate whether these ports have ever been allocated.
151 *
152 * The kernel may also create virtual memory objects to
153 * hold changed pages after a copy-on-write operation.
154 * In this case, the virtual memory object (and its
155 * backing storage -- its memory object) only contain
156 * those pages that have been changed. The "shadow"
157 * field refers to the virtual memory object that contains
158 * the remainder of the contents. The "shadow_offset"
159 * field indicates where in the "shadow" these contents begin.
160 * The "copy" field refers to a virtual memory object
161 * to which changed pages must be copied before changing
162 * this object, in order to implement another form
163 * of copy-on-write optimization.
164 *
165 * The virtual memory object structure also records
166 * the attributes associated with its memory object.
167 * The "pager_ready", "can_persist" and "copy_strategy"
168 * fields represent those attributes. The "cached_list"
169 * field is used in the implementation of the persistence
170 * attribute.
171 *
172 * ZZZ Continue this comment.
173 */
174
175 /* Forward declarations for internal functions. */
176 static kern_return_t vm_object_terminate(
177 vm_object_t object);
178
179 extern void vm_object_remove(
180 vm_object_t object);
181
182 static kern_return_t vm_object_copy_call(
183 vm_object_t src_object,
184 vm_object_offset_t src_offset,
185 vm_object_size_t size,
186 vm_object_t *_result_object);
187
188 static void vm_object_do_collapse(
189 vm_object_t object,
190 vm_object_t backing_object);
191
192 static void vm_object_do_bypass(
193 vm_object_t object,
194 vm_object_t backing_object);
195
196 static void vm_object_release_pager(
197 memory_object_t pager,
198 boolean_t hashed);
199
200 static zone_t vm_object_zone; /* vm backing store zone */
201
202 /*
203 * All wired-down kernel memory belongs to a single virtual
204 * memory object (kernel_object) to avoid wasting data structures.
205 */
206 static struct vm_object kernel_object_store;
207 vm_object_t kernel_object;
208
209
210 /*
211 * The submap object is used as a placeholder for vm_map_submap
212 * operations. The object is declared in vm_map.c because it
213 * is exported by the vm_map module. The storage is declared
214 * here because it must be initialized here.
215 */
216 static struct vm_object vm_submap_object_store;
217
218 /*
219 * Virtual memory objects are initialized from
220 * a template (see vm_object_allocate).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see _vm_object_allocate()).
225 */
226 static struct vm_object vm_object_template;
227
228 unsigned int vm_page_purged_wired = 0;
229 unsigned int vm_page_purged_busy = 0;
230 unsigned int vm_page_purged_others = 0;
231
232 #if VM_OBJECT_CACHE
233 /*
234 * Virtual memory objects that are not referenced by
235 * any address maps, but that are allowed to persist
236 * (an attribute specified by the associated memory manager),
237 * are kept in a queue (vm_object_cached_list).
238 *
239 * When an object from this queue is referenced again,
240 * for example to make another address space mapping,
241 * it must be removed from the queue. That is, the
242 * queue contains *only* objects with zero references.
243 *
244 * The kernel may choose to terminate objects from this
245 * queue in order to reclaim storage. The current policy
246 * is to permit a fixed maximum number of unreferenced
247 * objects (vm_object_cached_max).
248 *
249 * A spin lock (accessed by routines
250 * vm_object_cache_{lock,lock_try,unlock}) governs the
251 * object cache. It must be held when objects are
252 * added to or removed from the cache (in vm_object_terminate).
253 * The routines that acquire a reference to a virtual
254 * memory object based on one of the memory object ports
255 * must also lock the cache.
256 *
257 * Ideally, the object cache should be more isolated
258 * from the reference mechanism, so that the lock need
259 * not be held to make simple references.
260 */
261 static vm_object_t vm_object_cache_trim(
262 boolean_t called_from_vm_object_deallocate);
263
264 static queue_head_t vm_object_cached_list;
265 static int vm_object_cached_count=0;
266 static int vm_object_cached_high; /* highest # cached objects */
267 static int vm_object_cached_max = 512; /* may be patched*/
268
269 static lck_mtx_t vm_object_cached_lock_data;
270 static lck_mtx_ext_t vm_object_cached_lock_data_ext;
271
272 #define vm_object_cache_lock() \
273 lck_mtx_lock(&vm_object_cached_lock_data)
274 #define vm_object_cache_lock_try() \
275 lck_mtx_try_lock(&vm_object_cached_lock_data)
276 #define vm_object_cache_lock_spin() \
277 lck_mtx_lock_spin(&vm_object_cached_lock_data)
278 #define vm_object_cache_unlock() \
279 lck_mtx_unlock(&vm_object_cached_lock_data)
280
281 #endif /* VM_OBJECT_CACHE */
282
283
284 static void vm_object_deactivate_all_pages(
285 vm_object_t object);
286
287
288 #define VM_OBJECT_HASH_COUNT 1024
289 #define VM_OBJECT_HASH_LOCK_COUNT 512
290
291 static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT];
292 static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT];
293
294 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
295 static struct zone *vm_object_hash_zone;
296
297 struct vm_object_hash_entry {
298 queue_chain_t hash_link; /* hash chain link */
299 memory_object_t pager; /* pager we represent */
300 vm_object_t object; /* corresponding object */
301 boolean_t waiting; /* someone waiting for
302 * termination */
303 };
304
305 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
306 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
307
308 #define VM_OBJECT_HASH_SHIFT 5
309 #define vm_object_hash(pager) \
310 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT))
311
312 #define vm_object_lock_hash(pager) \
313 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT))
314
315 void vm_object_hash_entry_free(
316 vm_object_hash_entry_t entry);
317
318 static void vm_object_reap(vm_object_t object);
319 static void vm_object_reap_async(vm_object_t object);
320 static void vm_object_reaper_thread(void);
321
322 static lck_mtx_t vm_object_reaper_lock_data;
323 static lck_mtx_ext_t vm_object_reaper_lock_data_ext;
324
325 static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */
326 unsigned int vm_object_reap_count = 0;
327 unsigned int vm_object_reap_count_async = 0;
328
329 #define vm_object_reaper_lock() \
330 lck_mtx_lock(&vm_object_reaper_lock_data)
331 #define vm_object_reaper_lock_spin() \
332 lck_mtx_lock_spin(&vm_object_reaper_lock_data)
333 #define vm_object_reaper_unlock() \
334 lck_mtx_unlock(&vm_object_reaper_lock_data)
335
336
337
338 static lck_mtx_t *
339 vm_object_hash_lock_spin(
340 memory_object_t pager)
341 {
342 int index;
343
344 index = vm_object_lock_hash(pager);
345
346 lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]);
347
348 return (&vm_object_hashed_lock_data[index]);
349 }
350
351 static void
352 vm_object_hash_unlock(lck_mtx_t *lck)
353 {
354 lck_mtx_unlock(lck);
355 }
356
357
358 /*
359 * vm_object_hash_lookup looks up a pager in the hashtable
360 * and returns the corresponding entry, with optional removal.
361 */
362 static vm_object_hash_entry_t
363 vm_object_hash_lookup(
364 memory_object_t pager,
365 boolean_t remove_entry)
366 {
367 queue_t bucket;
368 vm_object_hash_entry_t entry;
369
370 bucket = &vm_object_hashtable[vm_object_hash(pager)];
371
372 entry = (vm_object_hash_entry_t)queue_first(bucket);
373 while (!queue_end(bucket, (queue_entry_t)entry)) {
374 if (entry->pager == pager) {
375 if (remove_entry) {
376 queue_remove(bucket, entry,
377 vm_object_hash_entry_t, hash_link);
378 }
379 return(entry);
380 }
381 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
382 }
383 return(VM_OBJECT_HASH_ENTRY_NULL);
384 }
385
386 /*
387 * vm_object_hash_enter enters the specified
388 * pager / cache object association in the hashtable.
389 */
390
391 static void
392 vm_object_hash_insert(
393 vm_object_hash_entry_t entry,
394 vm_object_t object)
395 {
396 queue_t bucket;
397
398 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
399
400 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
401
402 entry->object = object;
403 object->hashed = TRUE;
404 }
405
406 static vm_object_hash_entry_t
407 vm_object_hash_entry_alloc(
408 memory_object_t pager)
409 {
410 vm_object_hash_entry_t entry;
411
412 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
413 entry->pager = pager;
414 entry->object = VM_OBJECT_NULL;
415 entry->waiting = FALSE;
416
417 return(entry);
418 }
419
420 void
421 vm_object_hash_entry_free(
422 vm_object_hash_entry_t entry)
423 {
424 zfree(vm_object_hash_zone, entry);
425 }
426
427 /*
428 * vm_object_allocate:
429 *
430 * Returns a new object with the given size.
431 */
432
433 __private_extern__ void
434 _vm_object_allocate(
435 vm_object_size_t size,
436 vm_object_t object)
437 {
438 XPR(XPR_VM_OBJECT,
439 "vm_object_allocate, object 0x%X size 0x%X\n",
440 object, size, 0,0,0);
441
442 *object = vm_object_template;
443 queue_init(&object->memq);
444 queue_init(&object->msr_q);
445 #if UPL_DEBUG
446 queue_init(&object->uplq);
447 #endif /* UPL_DEBUG */
448 vm_object_lock_init(object);
449 object->size = size;
450 }
451
452 __private_extern__ vm_object_t
453 vm_object_allocate(
454 vm_object_size_t size)
455 {
456 register vm_object_t object;
457
458 object = (vm_object_t) zalloc(vm_object_zone);
459
460 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
461
462 if (object != VM_OBJECT_NULL)
463 _vm_object_allocate(size, object);
464
465 return object;
466 }
467
468
469 lck_grp_t vm_object_lck_grp;
470 lck_grp_attr_t vm_object_lck_grp_attr;
471 lck_attr_t vm_object_lck_attr;
472 lck_attr_t kernel_object_lck_attr;
473
474 /*
475 * vm_object_bootstrap:
476 *
477 * Initialize the VM objects module.
478 */
479 __private_extern__ void
480 vm_object_bootstrap(void)
481 {
482 register int i;
483
484 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
485 round_page(512*1024),
486 round_page(12*1024),
487 "vm objects");
488
489 vm_object_init_lck_grp();
490
491 #if VM_OBJECT_CACHE
492 queue_init(&vm_object_cached_list);
493
494 lck_mtx_init_ext(&vm_object_cached_lock_data,
495 &vm_object_cached_lock_data_ext,
496 &vm_object_lck_grp,
497 &vm_object_lck_attr);
498 #endif
499 queue_init(&vm_object_reaper_queue);
500
501 for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) {
502 lck_mtx_init_ext(&vm_object_hashed_lock_data[i],
503 &vm_object_hashed_lock_data_ext[i],
504 &vm_object_lck_grp,
505 &vm_object_lck_attr);
506 }
507 lck_mtx_init_ext(&vm_object_reaper_lock_data,
508 &vm_object_reaper_lock_data_ext,
509 &vm_object_lck_grp,
510 &vm_object_lck_attr);
511
512 vm_object_hash_zone =
513 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
514 round_page(512*1024),
515 round_page(12*1024),
516 "vm object hash entries");
517
518 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
519 queue_init(&vm_object_hashtable[i]);
520
521
522 /*
523 * Fill in a template object, for quick initialization
524 */
525
526 /* memq; Lock; init after allocation */
527 vm_object_template.memq.prev = NULL;
528 vm_object_template.memq.next = NULL;
529 #if 0
530 /*
531 * We can't call vm_object_lock_init() here because that will
532 * allocate some memory and VM is not fully initialized yet.
533 * The lock will be initialized for each allocated object in
534 * _vm_object_allocate(), so we don't need to initialize it in
535 * the vm_object_template.
536 */
537 vm_object_lock_init(&vm_object_template);
538 #endif
539 vm_object_template.size = 0;
540 vm_object_template.memq_hint = VM_PAGE_NULL;
541 vm_object_template.ref_count = 1;
542 #if TASK_SWAPPER
543 vm_object_template.res_count = 1;
544 #endif /* TASK_SWAPPER */
545 vm_object_template.resident_page_count = 0;
546 vm_object_template.wired_page_count = 0;
547 vm_object_template.reusable_page_count = 0;
548 vm_object_template.copy = VM_OBJECT_NULL;
549 vm_object_template.shadow = VM_OBJECT_NULL;
550 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
551 vm_object_template.pager = MEMORY_OBJECT_NULL;
552 vm_object_template.paging_offset = 0;
553 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
554 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
555 vm_object_template.paging_in_progress = 0;
556 vm_object_template.activity_in_progress = 0;
557
558 /* Begin bitfields */
559 vm_object_template.all_wanted = 0; /* all bits FALSE */
560 vm_object_template.pager_created = FALSE;
561 vm_object_template.pager_initialized = FALSE;
562 vm_object_template.pager_ready = FALSE;
563 vm_object_template.pager_trusted = FALSE;
564 vm_object_template.can_persist = FALSE;
565 vm_object_template.internal = TRUE;
566 vm_object_template.temporary = TRUE;
567 vm_object_template.private = FALSE;
568 vm_object_template.pageout = FALSE;
569 vm_object_template.alive = TRUE;
570 vm_object_template.purgable = VM_PURGABLE_DENY;
571 vm_object_template.shadowed = FALSE;
572 vm_object_template.silent_overwrite = FALSE;
573 vm_object_template.advisory_pageout = FALSE;
574 vm_object_template.true_share = FALSE;
575 vm_object_template.terminating = FALSE;
576 vm_object_template.named = FALSE;
577 vm_object_template.shadow_severed = FALSE;
578 vm_object_template.phys_contiguous = FALSE;
579 vm_object_template.nophyscache = FALSE;
580 /* End bitfields */
581
582 vm_object_template.cached_list.prev = NULL;
583 vm_object_template.cached_list.next = NULL;
584 vm_object_template.msr_q.prev = NULL;
585 vm_object_template.msr_q.next = NULL;
586
587 vm_object_template.last_alloc = (vm_object_offset_t) 0;
588 vm_object_template.sequential = (vm_object_offset_t) 0;
589 vm_object_template.pages_created = 0;
590 vm_object_template.pages_used = 0;
591
592 #if MACH_PAGEMAP
593 vm_object_template.existence_map = VM_EXTERNAL_NULL;
594 #endif /* MACH_PAGEMAP */
595 vm_object_template.cow_hint = ~(vm_offset_t)0;
596 #if MACH_ASSERT
597 vm_object_template.paging_object = VM_OBJECT_NULL;
598 #endif /* MACH_ASSERT */
599
600 /* cache bitfields */
601 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
602 vm_object_template.code_signed = FALSE;
603 vm_object_template.hashed = FALSE;
604 vm_object_template.transposed = FALSE;
605 vm_object_template.mapping_in_progress = FALSE;
606 vm_object_template.volatile_empty = FALSE;
607 vm_object_template.volatile_fault = FALSE;
608 vm_object_template.all_reusable = FALSE;
609 vm_object_template.blocked_access = FALSE;
610 vm_object_template.__object2_unused_bits = 0;
611 #if UPL_DEBUG
612 vm_object_template.uplq.prev = NULL;
613 vm_object_template.uplq.next = NULL;
614 #endif /* UPL_DEBUG */
615 #ifdef VM_PIP_DEBUG
616 bzero(&vm_object_template.pip_holders,
617 sizeof (vm_object_template.pip_holders));
618 #endif /* VM_PIP_DEBUG */
619
620 vm_object_template.objq.next=NULL;
621 vm_object_template.objq.prev=NULL;
622
623
624 /*
625 * Initialize the "kernel object"
626 */
627
628 kernel_object = &kernel_object_store;
629
630 /*
631 * Note that in the following size specifications, we need to add 1 because
632 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
633 */
634
635 #ifdef ppc
636 _vm_object_allocate(vm_last_addr + 1,
637 kernel_object);
638 #else
639 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
640 kernel_object);
641 #endif
642 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
643
644 /*
645 * Initialize the "submap object". Make it as large as the
646 * kernel object so that no limit is imposed on submap sizes.
647 */
648
649 vm_submap_object = &vm_submap_object_store;
650 #ifdef ppc
651 _vm_object_allocate(vm_last_addr + 1,
652 vm_submap_object);
653 #else
654 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
655 vm_submap_object);
656 #endif
657 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
658
659 /*
660 * Create an "extra" reference to this object so that we never
661 * try to deallocate it; zfree doesn't like to be called with
662 * non-zone memory.
663 */
664 vm_object_reference(vm_submap_object);
665
666 #if MACH_PAGEMAP
667 vm_external_module_initialize();
668 #endif /* MACH_PAGEMAP */
669 }
670
671 void
672 vm_object_reaper_init(void)
673 {
674 kern_return_t kr;
675 thread_t thread;
676
677 kr = kernel_thread_start_priority(
678 (thread_continue_t) vm_object_reaper_thread,
679 NULL,
680 BASEPRI_PREEMPT - 1,
681 &thread);
682 if (kr != KERN_SUCCESS) {
683 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
684 }
685 thread_deallocate(thread);
686 }
687
688 __private_extern__ void
689 vm_object_init(void)
690 {
691 /*
692 * Finish initializing the kernel object.
693 */
694 }
695
696
697 __private_extern__ void
698 vm_object_init_lck_grp(void)
699 {
700 /*
701 * initialze the vm_object lock world
702 */
703 lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
704 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
705 lck_attr_setdefault(&vm_object_lck_attr);
706 lck_attr_setdefault(&kernel_object_lck_attr);
707 lck_attr_cleardebug(&kernel_object_lck_attr);
708 }
709
710 #if VM_OBJECT_CACHE
711 #define MIGHT_NOT_CACHE_SHADOWS 1
712 #if MIGHT_NOT_CACHE_SHADOWS
713 static int cache_shadows = TRUE;
714 #endif /* MIGHT_NOT_CACHE_SHADOWS */
715 #endif
716
717 /*
718 * vm_object_deallocate:
719 *
720 * Release a reference to the specified object,
721 * gained either through a vm_object_allocate
722 * or a vm_object_reference call. When all references
723 * are gone, storage associated with this object
724 * may be relinquished.
725 *
726 * No object may be locked.
727 */
728 unsigned long vm_object_deallocate_shared_successes = 0;
729 unsigned long vm_object_deallocate_shared_failures = 0;
730 unsigned long vm_object_deallocate_shared_swap_failures = 0;
731 __private_extern__ void
732 vm_object_deallocate(
733 register vm_object_t object)
734 {
735 #if VM_OBJECT_CACHE
736 boolean_t retry_cache_trim = FALSE;
737 uint32_t try_failed_count = 0;
738 #endif
739 vm_object_t shadow = VM_OBJECT_NULL;
740
741 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
742 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
743
744 if (object == VM_OBJECT_NULL)
745 return;
746
747 if (object == kernel_object) {
748 vm_object_lock_shared(object);
749
750 OSAddAtomic(-1, &object->ref_count);
751
752 if (object->ref_count == 0) {
753 panic("vm_object_deallocate: losing kernel_object\n");
754 }
755 vm_object_unlock(object);
756 return;
757 }
758
759 if (object->ref_count > 2 ||
760 (!object->named && object->ref_count > 1)) {
761 UInt32 original_ref_count;
762 volatile UInt32 *ref_count_p;
763 Boolean atomic_swap;
764
765 /*
766 * The object currently looks like it is not being
767 * kept alive solely by the reference we're about to release.
768 * Let's try and release our reference without taking
769 * all the locks we would need if we had to terminate the
770 * object (cache lock + exclusive object lock).
771 * Lock the object "shared" to make sure we don't race with
772 * anyone holding it "exclusive".
773 */
774 vm_object_lock_shared(object);
775 ref_count_p = (volatile UInt32 *) &object->ref_count;
776 original_ref_count = object->ref_count;
777 /*
778 * Test again as "ref_count" could have changed.
779 * "named" shouldn't change.
780 */
781 if (original_ref_count > 2 ||
782 (!object->named && original_ref_count > 1)) {
783 atomic_swap = OSCompareAndSwap(
784 original_ref_count,
785 original_ref_count - 1,
786 (UInt32 *) &object->ref_count);
787 if (atomic_swap == FALSE) {
788 vm_object_deallocate_shared_swap_failures++;
789 }
790
791 } else {
792 atomic_swap = FALSE;
793 }
794 vm_object_unlock(object);
795
796 if (atomic_swap) {
797 /*
798 * ref_count was updated atomically !
799 */
800 vm_object_deallocate_shared_successes++;
801 return;
802 }
803
804 /*
805 * Someone else updated the ref_count at the same
806 * time and we lost the race. Fall back to the usual
807 * slow but safe path...
808 */
809 vm_object_deallocate_shared_failures++;
810 }
811
812 while (object != VM_OBJECT_NULL) {
813
814 vm_object_lock(object);
815
816 assert(object->ref_count > 0);
817
818 /*
819 * If the object has a named reference, and only
820 * that reference would remain, inform the pager
821 * about the last "mapping" reference going away.
822 */
823 if ((object->ref_count == 2) && (object->named)) {
824 memory_object_t pager = object->pager;
825
826 /* Notify the Pager that there are no */
827 /* more mappers for this object */
828
829 if (pager != MEMORY_OBJECT_NULL) {
830 vm_object_mapping_wait(object, THREAD_UNINT);
831 vm_object_mapping_begin(object);
832 vm_object_unlock(object);
833
834 memory_object_last_unmap(pager);
835
836 vm_object_lock(object);
837 vm_object_mapping_end(object);
838 }
839 /*
840 * recheck the ref_count since we dropped the object lock
841 * to call 'memory_object_last_unmap'... it's possible
842 * additional references got taken and we only want
843 * to deactivate the pages if this 'named' object will only
844 * referenced by the backing pager once we drop our reference
845 * below
846 */
847 if (!object->terminating && object->ref_count == 2)
848 vm_object_deactivate_all_pages(object);
849
850 assert(object->ref_count > 0);
851 }
852
853 /*
854 * Lose the reference. If other references
855 * remain, then we are done, unless we need
856 * to retry a cache trim.
857 * If it is the last reference, then keep it
858 * until any pending initialization is completed.
859 */
860
861 /* if the object is terminating, it cannot go into */
862 /* the cache and we obviously should not call */
863 /* terminate again. */
864
865 if ((object->ref_count > 1) || object->terminating) {
866 vm_object_lock_assert_exclusive(object);
867 object->ref_count--;
868 vm_object_res_deallocate(object);
869
870 if (object->ref_count == 1 &&
871 object->shadow != VM_OBJECT_NULL) {
872 /*
873 * There's only one reference left on this
874 * VM object. We can't tell if it's a valid
875 * one (from a mapping for example) or if this
876 * object is just part of a possibly stale and
877 * useless shadow chain.
878 * We would like to try and collapse it into
879 * its parent, but we don't have any pointers
880 * back to this parent object.
881 * But we can try and collapse this object with
882 * its own shadows, in case these are useless
883 * too...
884 * We can't bypass this object though, since we
885 * don't know if this last reference on it is
886 * meaningful or not.
887 */
888 vm_object_collapse(object, 0, FALSE);
889 }
890 vm_object_unlock(object);
891 #if VM_OBJECT_CACHE
892 if (retry_cache_trim &&
893 ((object = vm_object_cache_trim(TRUE)) !=
894 VM_OBJECT_NULL)) {
895 continue;
896 }
897 #endif
898 return;
899 }
900
901 /*
902 * We have to wait for initialization
903 * before destroying or caching the object.
904 */
905
906 if (object->pager_created && ! object->pager_initialized) {
907 assert(! object->can_persist);
908 vm_object_assert_wait(object,
909 VM_OBJECT_EVENT_INITIALIZED,
910 THREAD_UNINT);
911 vm_object_unlock(object);
912
913 thread_block(THREAD_CONTINUE_NULL);
914 continue;
915 }
916
917 #if VM_OBJECT_CACHE
918 /*
919 * If this object can persist, then enter it in
920 * the cache. Otherwise, terminate it.
921 *
922 * NOTE: Only permanent objects are cached, and
923 * permanent objects cannot have shadows. This
924 * affects the residence counting logic in a minor
925 * way (can do it in-line, mostly).
926 */
927
928 if ((object->can_persist) && (object->alive)) {
929 /*
930 * Now it is safe to decrement reference count,
931 * and to return if reference count is > 0.
932 */
933
934 vm_object_lock_assert_exclusive(object);
935 if (--object->ref_count > 0) {
936 vm_object_res_deallocate(object);
937 vm_object_unlock(object);
938
939 if (retry_cache_trim &&
940 ((object = vm_object_cache_trim(TRUE)) !=
941 VM_OBJECT_NULL)) {
942 continue;
943 }
944 return;
945 }
946
947 #if MIGHT_NOT_CACHE_SHADOWS
948 /*
949 * Remove shadow now if we don't
950 * want to cache shadows.
951 */
952 if (! cache_shadows) {
953 shadow = object->shadow;
954 object->shadow = VM_OBJECT_NULL;
955 }
956 #endif /* MIGHT_NOT_CACHE_SHADOWS */
957
958 /*
959 * Enter the object onto the queue of
960 * cached objects, and deactivate
961 * all of its pages.
962 */
963 assert(object->shadow == VM_OBJECT_NULL);
964 VM_OBJ_RES_DECR(object);
965 XPR(XPR_VM_OBJECT,
966 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
967 object,
968 vm_object_cached_list.next,
969 vm_object_cached_list.prev,0,0);
970
971
972 vm_object_unlock(object);
973
974 try_failed_count = 0;
975 for (;;) {
976 vm_object_cache_lock();
977
978 /*
979 * if we try to take a regular lock here
980 * we risk deadlocking against someone
981 * holding a lock on this object while
982 * trying to vm_object_deallocate a different
983 * object
984 */
985 if (vm_object_lock_try(object))
986 break;
987 vm_object_cache_unlock();
988 try_failed_count++;
989
990 mutex_pause(try_failed_count); /* wait a bit */
991 }
992 vm_object_cached_count++;
993 if (vm_object_cached_count > vm_object_cached_high)
994 vm_object_cached_high = vm_object_cached_count;
995 queue_enter(&vm_object_cached_list, object,
996 vm_object_t, cached_list);
997 vm_object_cache_unlock();
998
999 vm_object_deactivate_all_pages(object);
1000 vm_object_unlock(object);
1001
1002 #if MIGHT_NOT_CACHE_SHADOWS
1003 /*
1004 * If we have a shadow that we need
1005 * to deallocate, do so now, remembering
1006 * to trim the cache later.
1007 */
1008 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
1009 object = shadow;
1010 retry_cache_trim = TRUE;
1011 continue;
1012 }
1013 #endif /* MIGHT_NOT_CACHE_SHADOWS */
1014
1015 /*
1016 * Trim the cache. If the cache trim
1017 * returns with a shadow for us to deallocate,
1018 * then remember to retry the cache trim
1019 * when we are done deallocating the shadow.
1020 * Otherwise, we are done.
1021 */
1022
1023 object = vm_object_cache_trim(TRUE);
1024 if (object == VM_OBJECT_NULL) {
1025 return;
1026 }
1027 retry_cache_trim = TRUE;
1028 } else
1029 #endif /* VM_OBJECT_CACHE */
1030 {
1031 /*
1032 * This object is not cachable; terminate it.
1033 */
1034 XPR(XPR_VM_OBJECT,
1035 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
1036 object, object->resident_page_count,
1037 object->paging_in_progress,
1038 (void *)current_thread(),object->ref_count);
1039
1040 VM_OBJ_RES_DECR(object); /* XXX ? */
1041 /*
1042 * Terminate this object. If it had a shadow,
1043 * then deallocate it; otherwise, if we need
1044 * to retry a cache trim, do so now; otherwise,
1045 * we are done. "pageout" objects have a shadow,
1046 * but maintain a "paging reference" rather than
1047 * a normal reference.
1048 */
1049 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
1050
1051 if (vm_object_terminate(object) != KERN_SUCCESS) {
1052 return;
1053 }
1054 if (shadow != VM_OBJECT_NULL) {
1055 object = shadow;
1056 continue;
1057 }
1058 #if VM_OBJECT_CACHE
1059 if (retry_cache_trim &&
1060 ((object = vm_object_cache_trim(TRUE)) !=
1061 VM_OBJECT_NULL)) {
1062 continue;
1063 }
1064 #endif
1065 return;
1066 }
1067 }
1068 #if VM_OBJECT_CACHE
1069 assert(! retry_cache_trim);
1070 #endif
1071 }
1072
1073
1074 #if VM_OBJECT_CACHE
1075 /*
1076 * Check to see whether we really need to trim
1077 * down the cache. If so, remove an object from
1078 * the cache, terminate it, and repeat.
1079 *
1080 * Called with, and returns with, cache lock unlocked.
1081 */
1082 vm_object_t
1083 vm_object_cache_trim(
1084 boolean_t called_from_vm_object_deallocate)
1085 {
1086 register vm_object_t object = VM_OBJECT_NULL;
1087 vm_object_t shadow;
1088
1089 for (;;) {
1090
1091 /*
1092 * If we no longer need to trim the cache,
1093 * then we are done.
1094 */
1095 if (vm_object_cached_count <= vm_object_cached_max)
1096 return VM_OBJECT_NULL;
1097
1098 vm_object_cache_lock();
1099 if (vm_object_cached_count <= vm_object_cached_max) {
1100 vm_object_cache_unlock();
1101 return VM_OBJECT_NULL;
1102 }
1103
1104 /*
1105 * We must trim down the cache, so remove
1106 * the first object in the cache.
1107 */
1108 XPR(XPR_VM_OBJECT,
1109 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
1110 vm_object_cached_list.next,
1111 vm_object_cached_list.prev, 0, 0, 0);
1112
1113 object = (vm_object_t) queue_first(&vm_object_cached_list);
1114 if(object == (vm_object_t) &vm_object_cached_list) {
1115 /* something's wrong with the calling parameter or */
1116 /* the value of vm_object_cached_count, just fix */
1117 /* and return */
1118 if(vm_object_cached_max < 0)
1119 vm_object_cached_max = 0;
1120 vm_object_cached_count = 0;
1121 vm_object_cache_unlock();
1122 return VM_OBJECT_NULL;
1123 }
1124 vm_object_lock(object);
1125 queue_remove(&vm_object_cached_list, object, vm_object_t,
1126 cached_list);
1127 vm_object_cached_count--;
1128
1129 vm_object_cache_unlock();
1130 /*
1131 * Since this object is in the cache, we know
1132 * that it is initialized and has no references.
1133 * Take a reference to avoid recursive deallocations.
1134 */
1135
1136 assert(object->pager_initialized);
1137 assert(object->ref_count == 0);
1138 vm_object_lock_assert_exclusive(object);
1139 object->ref_count++;
1140
1141 /*
1142 * Terminate the object.
1143 * If the object had a shadow, we let vm_object_deallocate
1144 * deallocate it. "pageout" objects have a shadow, but
1145 * maintain a "paging reference" rather than a normal
1146 * reference.
1147 * (We are careful here to limit recursion.)
1148 */
1149 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
1150
1151 if(vm_object_terminate(object) != KERN_SUCCESS)
1152 continue;
1153
1154 if (shadow != VM_OBJECT_NULL) {
1155 if (called_from_vm_object_deallocate) {
1156 return shadow;
1157 } else {
1158 vm_object_deallocate(shadow);
1159 }
1160 }
1161 }
1162 }
1163 #endif
1164
1165
1166 /*
1167 * Routine: vm_object_terminate
1168 * Purpose:
1169 * Free all resources associated with a vm_object.
1170 * In/out conditions:
1171 * Upon entry, the object must be locked,
1172 * and the object must have exactly one reference.
1173 *
1174 * The shadow object reference is left alone.
1175 *
1176 * The object must be unlocked if its found that pages
1177 * must be flushed to a backing object. If someone
1178 * manages to map the object while it is being flushed
1179 * the object is returned unlocked and unchanged. Otherwise,
1180 * upon exit, the cache will be unlocked, and the
1181 * object will cease to exist.
1182 */
1183 static kern_return_t
1184 vm_object_terminate(
1185 vm_object_t object)
1186 {
1187 vm_object_t shadow_object;
1188
1189 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
1190 object, object->ref_count, 0, 0, 0);
1191
1192 if (!object->pageout && (!object->temporary || object->can_persist) &&
1193 (object->pager != NULL || object->shadow_severed)) {
1194 /*
1195 * Clear pager_trusted bit so that the pages get yanked
1196 * out of the object instead of cleaned in place. This
1197 * prevents a deadlock in XMM and makes more sense anyway.
1198 */
1199 object->pager_trusted = FALSE;
1200
1201 vm_object_reap_pages(object, REAP_TERMINATE);
1202 }
1203 /*
1204 * Make sure the object isn't already being terminated
1205 */
1206 if (object->terminating) {
1207 vm_object_lock_assert_exclusive(object);
1208 object->ref_count--;
1209 assert(object->ref_count > 0);
1210 vm_object_unlock(object);
1211 return KERN_FAILURE;
1212 }
1213
1214 /*
1215 * Did somebody get a reference to the object while we were
1216 * cleaning it?
1217 */
1218 if (object->ref_count != 1) {
1219 vm_object_lock_assert_exclusive(object);
1220 object->ref_count--;
1221 assert(object->ref_count > 0);
1222 vm_object_res_deallocate(object);
1223 vm_object_unlock(object);
1224 return KERN_FAILURE;
1225 }
1226
1227 /*
1228 * Make sure no one can look us up now.
1229 */
1230
1231 object->terminating = TRUE;
1232 object->alive = FALSE;
1233
1234 if (object->hashed) {
1235 lck_mtx_t *lck;
1236
1237 lck = vm_object_hash_lock_spin(object->pager);
1238 vm_object_remove(object);
1239 vm_object_hash_unlock(lck);
1240 }
1241 /*
1242 * Detach the object from its shadow if we are the shadow's
1243 * copy. The reference we hold on the shadow must be dropped
1244 * by our caller.
1245 */
1246 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1247 !(object->pageout)) {
1248 vm_object_lock(shadow_object);
1249 if (shadow_object->copy == object)
1250 shadow_object->copy = VM_OBJECT_NULL;
1251 vm_object_unlock(shadow_object);
1252 }
1253
1254 if (object->paging_in_progress != 0 ||
1255 object->activity_in_progress != 0) {
1256 /*
1257 * There are still some paging_in_progress references
1258 * on this object, meaning that there are some paging
1259 * or other I/O operations in progress for this VM object.
1260 * Such operations take some paging_in_progress references
1261 * up front to ensure that the object doesn't go away, but
1262 * they may also need to acquire a reference on the VM object,
1263 * to map it in kernel space, for example. That means that
1264 * they may end up releasing the last reference on the VM
1265 * object, triggering its termination, while still holding
1266 * paging_in_progress references. Waiting for these
1267 * pending paging_in_progress references to go away here would
1268 * deadlock.
1269 *
1270 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1271 * complete the VM object termination if it still holds
1272 * paging_in_progress references at this point.
1273 *
1274 * No new paging_in_progress should appear now that the
1275 * VM object is "terminating" and not "alive".
1276 */
1277 vm_object_reap_async(object);
1278 vm_object_unlock(object);
1279 /*
1280 * Return KERN_FAILURE to let the caller know that we
1281 * haven't completed the termination and it can't drop this
1282 * object's reference on its shadow object yet.
1283 * The reaper thread will take care of that once it has
1284 * completed this object's termination.
1285 */
1286 return KERN_FAILURE;
1287 }
1288 /*
1289 * complete the VM object termination
1290 */
1291 vm_object_reap(object);
1292 object = VM_OBJECT_NULL;
1293
1294 /*
1295 * the object lock was released by vm_object_reap()
1296 *
1297 * KERN_SUCCESS means that this object has been terminated
1298 * and no longer needs its shadow object but still holds a
1299 * reference on it.
1300 * The caller is responsible for dropping that reference.
1301 * We can't call vm_object_deallocate() here because that
1302 * would create a recursion.
1303 */
1304 return KERN_SUCCESS;
1305 }
1306
1307
1308 /*
1309 * vm_object_reap():
1310 *
1311 * Complete the termination of a VM object after it's been marked
1312 * as "terminating" and "!alive" by vm_object_terminate().
1313 *
1314 * The VM object must be locked by caller.
1315 * The lock will be released on return and the VM object is no longer valid.
1316 */
1317 void
1318 vm_object_reap(
1319 vm_object_t object)
1320 {
1321 memory_object_t pager;
1322
1323 vm_object_lock_assert_exclusive(object);
1324 assert(object->paging_in_progress == 0);
1325 assert(object->activity_in_progress == 0);
1326
1327 vm_object_reap_count++;
1328
1329 pager = object->pager;
1330 object->pager = MEMORY_OBJECT_NULL;
1331
1332 if (pager != MEMORY_OBJECT_NULL)
1333 memory_object_control_disable(object->pager_control);
1334
1335 object->ref_count--;
1336 #if TASK_SWAPPER
1337 assert(object->res_count == 0);
1338 #endif /* TASK_SWAPPER */
1339
1340 assert (object->ref_count == 0);
1341
1342 /*
1343 * remove from purgeable queue if it's on
1344 */
1345 if (object->objq.next || object->objq.prev) {
1346 purgeable_q_t queue = vm_purgeable_object_remove(object);
1347 assert(queue);
1348
1349 /* Must take page lock for this - using it to protect token queue */
1350 vm_page_lock_queues();
1351 vm_purgeable_token_delete_first(queue);
1352
1353 assert(queue->debug_count_objects>=0);
1354 vm_page_unlock_queues();
1355 }
1356
1357 /*
1358 * Clean or free the pages, as appropriate.
1359 * It is possible for us to find busy/absent pages,
1360 * if some faults on this object were aborted.
1361 */
1362 if (object->pageout) {
1363 assert(object->shadow != VM_OBJECT_NULL);
1364
1365 vm_pageout_object_terminate(object);
1366
1367 } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) {
1368
1369 vm_object_reap_pages(object, REAP_REAP);
1370 }
1371 assert(queue_empty(&object->memq));
1372 assert(object->paging_in_progress == 0);
1373 assert(object->activity_in_progress == 0);
1374 assert(object->ref_count == 0);
1375
1376 /*
1377 * If the pager has not already been released by
1378 * vm_object_destroy, we need to terminate it and
1379 * release our reference to it here.
1380 */
1381 if (pager != MEMORY_OBJECT_NULL) {
1382 vm_object_unlock(object);
1383 vm_object_release_pager(pager, object->hashed);
1384 vm_object_lock(object);
1385 }
1386
1387 /* kick off anyone waiting on terminating */
1388 object->terminating = FALSE;
1389 vm_object_paging_begin(object);
1390 vm_object_paging_end(object);
1391 vm_object_unlock(object);
1392
1393 #if MACH_PAGEMAP
1394 vm_external_destroy(object->existence_map, object->size);
1395 #endif /* MACH_PAGEMAP */
1396
1397 object->shadow = VM_OBJECT_NULL;
1398
1399 vm_object_lock_destroy(object);
1400 /*
1401 * Free the space for the object.
1402 */
1403 zfree(vm_object_zone, object);
1404 object = VM_OBJECT_NULL;
1405 }
1406
1407
1408
1409 #define V_O_R_MAX_BATCH 128
1410
1411
1412 #define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \
1413 MACRO_BEGIN \
1414 if (_local_free_q) { \
1415 if (do_disconnect) { \
1416 vm_page_t m; \
1417 for (m = _local_free_q; \
1418 m != VM_PAGE_NULL; \
1419 m = (vm_page_t) m->pageq.next) { \
1420 if (m->pmapped) { \
1421 pmap_disconnect(m->phys_page); \
1422 } \
1423 } \
1424 } \
1425 vm_page_free_list(_local_free_q, TRUE); \
1426 _local_free_q = VM_PAGE_NULL; \
1427 } \
1428 MACRO_END
1429
1430
1431 void
1432 vm_object_reap_pages(
1433 vm_object_t object,
1434 int reap_type)
1435 {
1436 vm_page_t p;
1437 vm_page_t next;
1438 vm_page_t local_free_q = VM_PAGE_NULL;
1439 int loop_count;
1440 boolean_t disconnect_on_release;
1441
1442 if (reap_type == REAP_DATA_FLUSH) {
1443 /*
1444 * We need to disconnect pages from all pmaps before
1445 * releasing them to the free list
1446 */
1447 disconnect_on_release = TRUE;
1448 } else {
1449 /*
1450 * Either the caller has already disconnected the pages
1451 * from all pmaps, or we disconnect them here as we add
1452 * them to out local list of pages to be released.
1453 * No need to re-disconnect them when we release the pages
1454 * to the free list.
1455 */
1456 disconnect_on_release = FALSE;
1457 }
1458
1459 restart_after_sleep:
1460 if (queue_empty(&object->memq))
1461 return;
1462 loop_count = V_O_R_MAX_BATCH + 1;
1463
1464 vm_page_lockspin_queues();
1465
1466 next = (vm_page_t)queue_first(&object->memq);
1467
1468 while (!queue_end(&object->memq, (queue_entry_t)next)) {
1469
1470 p = next;
1471 next = (vm_page_t)queue_next(&next->listq);
1472
1473 if (--loop_count == 0) {
1474
1475 vm_page_unlock_queues();
1476
1477 if (local_free_q) {
1478 /*
1479 * Free the pages we reclaimed so far
1480 * and take a little break to avoid
1481 * hogging the page queue lock too long
1482 */
1483 VM_OBJ_REAP_FREELIST(local_free_q,
1484 disconnect_on_release);
1485 } else
1486 mutex_pause(0);
1487
1488 loop_count = V_O_R_MAX_BATCH + 1;
1489
1490 vm_page_lockspin_queues();
1491 }
1492 if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) {
1493
1494 if (reap_type == REAP_DATA_FLUSH && (p->pageout == TRUE && p->list_req_pending == TRUE)) {
1495 p->list_req_pending = FALSE;
1496 p->cleaning = FALSE;
1497 p->pageout = FALSE;
1498 /*
1499 * need to drop the laundry count...
1500 * we may also need to remove it
1501 * from the I/O paging queue...
1502 * vm_pageout_throttle_up handles both cases
1503 *
1504 * the laundry and pageout_queue flags are cleared...
1505 */
1506 #if CONFIG_EMBEDDED
1507 if (p->laundry)
1508 vm_pageout_throttle_up(p);
1509 #else
1510 vm_pageout_throttle_up(p);
1511 #endif
1512
1513 /*
1514 * toss the wire count we picked up
1515 * when we intially set this page up
1516 * to be cleaned...
1517 */
1518 vm_page_unwire(p);
1519 PAGE_WAKEUP(p);
1520
1521 } else if (p->busy || p->cleaning) {
1522
1523 vm_page_unlock_queues();
1524 /*
1525 * free the pages reclaimed so far
1526 */
1527 VM_OBJ_REAP_FREELIST(local_free_q,
1528 disconnect_on_release);
1529
1530 PAGE_SLEEP(object, p, THREAD_UNINT);
1531
1532 goto restart_after_sleep;
1533 }
1534 }
1535 switch (reap_type) {
1536
1537 case REAP_DATA_FLUSH:
1538 if (VM_PAGE_WIRED(p)) {
1539 /*
1540 * this is an odd case... perhaps we should
1541 * zero-fill this page since we're conceptually
1542 * tossing its data at this point, but leaving
1543 * it on the object to honor the 'wire' contract
1544 */
1545 continue;
1546 }
1547 break;
1548
1549 case REAP_PURGEABLE:
1550 if (VM_PAGE_WIRED(p)) {
1551 /* can't purge a wired page */
1552 vm_page_purged_wired++;
1553 continue;
1554 }
1555
1556 if (p->busy) {
1557 /*
1558 * We can't reclaim a busy page but we can
1559 * make it pageable (it's not wired) to make
1560 * sure that it gets considered by
1561 * vm_pageout_scan() later.
1562 */
1563 vm_page_deactivate(p);
1564 vm_page_purged_busy++;
1565 continue;
1566 }
1567
1568 if (p->cleaning || p->laundry || p->list_req_pending) {
1569 /*
1570 * page is being acted upon,
1571 * so don't mess with it
1572 */
1573 vm_page_purged_others++;
1574 continue;
1575 }
1576 assert(p->object != kernel_object);
1577
1578 /*
1579 * we can discard this page...
1580 */
1581 if (p->pmapped == TRUE) {
1582 int refmod_state;
1583 /*
1584 * unmap the page
1585 */
1586 refmod_state = pmap_disconnect(p->phys_page);
1587 if (refmod_state & VM_MEM_MODIFIED) {
1588 p->dirty = TRUE;
1589 }
1590 }
1591 if (p->dirty || p->precious) {
1592 /*
1593 * we saved the cost of cleaning this page !
1594 */
1595 vm_page_purged_count++;
1596 }
1597
1598 break;
1599
1600 case REAP_TERMINATE:
1601 if (p->absent || p->private) {
1602 /*
1603 * For private pages, VM_PAGE_FREE just
1604 * leaves the page structure around for
1605 * its owner to clean up. For absent
1606 * pages, the structure is returned to
1607 * the appropriate pool.
1608 */
1609 break;
1610 }
1611 if (p->fictitious) {
1612 assert (p->phys_page == vm_page_guard_addr);
1613 break;
1614 }
1615 if (!p->dirty && p->wpmapped)
1616 p->dirty = pmap_is_modified(p->phys_page);
1617
1618 if ((p->dirty || p->precious) && !p->error && object->alive) {
1619
1620 p->busy = TRUE;
1621
1622 VM_PAGE_QUEUES_REMOVE(p);
1623
1624 vm_page_unlock_queues();
1625 /*
1626 * free the pages reclaimed so far
1627 */
1628 VM_OBJ_REAP_FREELIST(local_free_q,
1629 disconnect_on_release);
1630
1631 /*
1632 * flush page... page will be freed
1633 * upon completion of I/O
1634 */
1635 vm_pageout_cluster(p);
1636 vm_object_paging_wait(object, THREAD_UNINT);
1637
1638 goto restart_after_sleep;
1639 }
1640 break;
1641
1642 case REAP_REAP:
1643 break;
1644 }
1645 vm_page_free_prepare_queues(p);
1646 assert(p->pageq.next == NULL && p->pageq.prev == NULL);
1647 /*
1648 * Add this page to our list of reclaimed pages,
1649 * to be freed later.
1650 */
1651 p->pageq.next = (queue_entry_t) local_free_q;
1652 local_free_q = p;
1653 }
1654 vm_page_unlock_queues();
1655
1656 /*
1657 * Free the remaining reclaimed pages
1658 */
1659 VM_OBJ_REAP_FREELIST(local_free_q,
1660 disconnect_on_release);
1661 }
1662
1663
1664 void
1665 vm_object_reap_async(
1666 vm_object_t object)
1667 {
1668 vm_object_lock_assert_exclusive(object);
1669
1670 vm_object_reaper_lock_spin();
1671
1672 vm_object_reap_count_async++;
1673
1674 /* enqueue the VM object... */
1675 queue_enter(&vm_object_reaper_queue, object,
1676 vm_object_t, cached_list);
1677
1678 vm_object_reaper_unlock();
1679
1680 /* ... and wake up the reaper thread */
1681 thread_wakeup((event_t) &vm_object_reaper_queue);
1682 }
1683
1684
1685 void
1686 vm_object_reaper_thread(void)
1687 {
1688 vm_object_t object, shadow_object;
1689
1690 vm_object_reaper_lock_spin();
1691
1692 while (!queue_empty(&vm_object_reaper_queue)) {
1693 queue_remove_first(&vm_object_reaper_queue,
1694 object,
1695 vm_object_t,
1696 cached_list);
1697
1698 vm_object_reaper_unlock();
1699 vm_object_lock(object);
1700
1701 assert(object->terminating);
1702 assert(!object->alive);
1703
1704 /*
1705 * The pageout daemon might be playing with our pages.
1706 * Now that the object is dead, it won't touch any more
1707 * pages, but some pages might already be on their way out.
1708 * Hence, we wait until the active paging activities have
1709 * ceased before we break the association with the pager
1710 * itself.
1711 */
1712 while (object->paging_in_progress != 0 ||
1713 object->activity_in_progress != 0) {
1714 vm_object_wait(object,
1715 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1716 THREAD_UNINT);
1717 vm_object_lock(object);
1718 }
1719
1720 shadow_object =
1721 object->pageout ? VM_OBJECT_NULL : object->shadow;
1722
1723 vm_object_reap(object);
1724 /* cache is unlocked and object is no longer valid */
1725 object = VM_OBJECT_NULL;
1726
1727 if (shadow_object != VM_OBJECT_NULL) {
1728 /*
1729 * Drop the reference "object" was holding on
1730 * its shadow object.
1731 */
1732 vm_object_deallocate(shadow_object);
1733 shadow_object = VM_OBJECT_NULL;
1734 }
1735 vm_object_reaper_lock_spin();
1736 }
1737
1738 /* wait for more work... */
1739 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT);
1740
1741 vm_object_reaper_unlock();
1742
1743 thread_block((thread_continue_t) vm_object_reaper_thread);
1744 /*NOTREACHED*/
1745 }
1746
1747 /*
1748 * Routine: vm_object_pager_wakeup
1749 * Purpose: Wake up anyone waiting for termination of a pager.
1750 */
1751
1752 static void
1753 vm_object_pager_wakeup(
1754 memory_object_t pager)
1755 {
1756 vm_object_hash_entry_t entry;
1757 boolean_t waiting = FALSE;
1758 lck_mtx_t *lck;
1759
1760 /*
1761 * If anyone was waiting for the memory_object_terminate
1762 * to be queued, wake them up now.
1763 */
1764 lck = vm_object_hash_lock_spin(pager);
1765 entry = vm_object_hash_lookup(pager, TRUE);
1766 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1767 waiting = entry->waiting;
1768 vm_object_hash_unlock(lck);
1769
1770 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1771 if (waiting)
1772 thread_wakeup((event_t) pager);
1773 vm_object_hash_entry_free(entry);
1774 }
1775 }
1776
1777 /*
1778 * Routine: vm_object_release_pager
1779 * Purpose: Terminate the pager and, upon completion,
1780 * release our last reference to it.
1781 * just like memory_object_terminate, except
1782 * that we wake up anyone blocked in vm_object_enter
1783 * waiting for termination message to be queued
1784 * before calling memory_object_init.
1785 */
1786 static void
1787 vm_object_release_pager(
1788 memory_object_t pager,
1789 boolean_t hashed)
1790 {
1791
1792 /*
1793 * Terminate the pager.
1794 */
1795
1796 (void) memory_object_terminate(pager);
1797
1798 if (hashed == TRUE) {
1799 /*
1800 * Wakeup anyone waiting for this terminate
1801 * and remove the entry from the hash
1802 */
1803 vm_object_pager_wakeup(pager);
1804 }
1805 /*
1806 * Release reference to pager.
1807 */
1808 memory_object_deallocate(pager);
1809 }
1810
1811 /*
1812 * Routine: vm_object_destroy
1813 * Purpose:
1814 * Shut down a VM object, despite the
1815 * presence of address map (or other) references
1816 * to the vm_object.
1817 */
1818 kern_return_t
1819 vm_object_destroy(
1820 vm_object_t object,
1821 __unused kern_return_t reason)
1822 {
1823 memory_object_t old_pager;
1824
1825 if (object == VM_OBJECT_NULL)
1826 return(KERN_SUCCESS);
1827
1828 /*
1829 * Remove the pager association immediately.
1830 *
1831 * This will prevent the memory manager from further
1832 * meddling. [If it wanted to flush data or make
1833 * other changes, it should have done so before performing
1834 * the destroy call.]
1835 */
1836
1837 vm_object_lock(object);
1838 object->can_persist = FALSE;
1839 object->named = FALSE;
1840 object->alive = FALSE;
1841
1842 if (object->hashed) {
1843 lck_mtx_t *lck;
1844 /*
1845 * Rip out the pager from the vm_object now...
1846 */
1847 lck = vm_object_hash_lock_spin(object->pager);
1848 vm_object_remove(object);
1849 vm_object_hash_unlock(lck);
1850 }
1851 old_pager = object->pager;
1852 object->pager = MEMORY_OBJECT_NULL;
1853 if (old_pager != MEMORY_OBJECT_NULL)
1854 memory_object_control_disable(object->pager_control);
1855
1856 /*
1857 * Wait for the existing paging activity (that got
1858 * through before we nulled out the pager) to subside.
1859 */
1860
1861 vm_object_paging_wait(object, THREAD_UNINT);
1862 vm_object_unlock(object);
1863
1864 /*
1865 * Terminate the object now.
1866 */
1867 if (old_pager != MEMORY_OBJECT_NULL) {
1868 vm_object_release_pager(old_pager, object->hashed);
1869
1870 /*
1871 * JMM - Release the caller's reference. This assumes the
1872 * caller had a reference to release, which is a big (but
1873 * currently valid) assumption if this is driven from the
1874 * vnode pager (it is holding a named reference when making
1875 * this call)..
1876 */
1877 vm_object_deallocate(object);
1878
1879 }
1880 return(KERN_SUCCESS);
1881 }
1882
1883
1884 #define VM_OBJ_DEACT_ALL_STATS DEBUG
1885 #if VM_OBJ_DEACT_ALL_STATS
1886 uint32_t vm_object_deactivate_all_pages_batches = 0;
1887 uint32_t vm_object_deactivate_all_pages_pages = 0;
1888 #endif /* VM_OBJ_DEACT_ALL_STATS */
1889 /*
1890 * vm_object_deactivate_all_pages
1891 *
1892 * Deactivate all pages in the specified object. (Keep its pages
1893 * in memory even though it is no longer referenced.)
1894 *
1895 * The object must be locked.
1896 */
1897 static void
1898 vm_object_deactivate_all_pages(
1899 register vm_object_t object)
1900 {
1901 register vm_page_t p;
1902 int loop_count;
1903 #if VM_OBJ_DEACT_ALL_STATS
1904 int pages_count;
1905 #endif /* VM_OBJ_DEACT_ALL_STATS */
1906 #define V_O_D_A_P_MAX_BATCH 256
1907
1908 loop_count = V_O_D_A_P_MAX_BATCH;
1909 #if VM_OBJ_DEACT_ALL_STATS
1910 pages_count = 0;
1911 #endif /* VM_OBJ_DEACT_ALL_STATS */
1912 vm_page_lock_queues();
1913 queue_iterate(&object->memq, p, vm_page_t, listq) {
1914 if (--loop_count == 0) {
1915 #if VM_OBJ_DEACT_ALL_STATS
1916 hw_atomic_add(&vm_object_deactivate_all_pages_batches,
1917 1);
1918 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1919 pages_count);
1920 pages_count = 0;
1921 #endif /* VM_OBJ_DEACT_ALL_STATS */
1922 lck_mtx_yield(&vm_page_queue_lock);
1923 loop_count = V_O_D_A_P_MAX_BATCH;
1924 }
1925 if (!p->busy && !p->throttled) {
1926 #if VM_OBJ_DEACT_ALL_STATS
1927 pages_count++;
1928 #endif /* VM_OBJ_DEACT_ALL_STATS */
1929 vm_page_deactivate(p);
1930 }
1931 }
1932 #if VM_OBJ_DEACT_ALL_STATS
1933 if (pages_count) {
1934 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1);
1935 hw_atomic_add(&vm_object_deactivate_all_pages_pages,
1936 pages_count);
1937 pages_count = 0;
1938 }
1939 #endif /* VM_OBJ_DEACT_ALL_STATS */
1940 vm_page_unlock_queues();
1941 }
1942
1943
1944
1945 /*
1946 * when deallocating pages it is necessary to hold
1947 * the vm_page_queue_lock (a hot global lock) for certain operations
1948 * on the page... however, the majority of the work can be done
1949 * while merely holding the object lock... to mitigate the time spent behind the
1950 * global lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
1951 * while doing all of the work that doesn't require the vm_page_queue_lock...
1952 * them call dw_do_work to acquire the vm_page_queue_lock and do the
1953 * necessary work for each page... we will grab the busy bit on the page
1954 * so that dw_do_work can drop the object lock if it can't immediately take the
1955 * vm_page_queue_lock in order to compete for the locks in the same order that
1956 * vm_pageout_scan takes them.
1957 */
1958
1959 #define DELAYED_WORK_LIMIT 32
1960
1961 #define DW_clear_reference 0x01
1962 #define DW_move_page 0x02
1963 #define DW_clear_busy 0x04
1964 #define DW_PAGE_WAKEUP 0x08
1965
1966
1967 struct dw {
1968 vm_page_t dw_m;
1969 int dw_mask;
1970 };
1971
1972 static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count);
1973
1974
1975 static void
1976 dw_do_work(
1977 vm_object_t object,
1978 struct dw *dwp,
1979 int dw_count)
1980 {
1981 vm_page_t m;
1982 int j;
1983
1984 /*
1985 * pageout_scan takes the vm_page_lock_queues first
1986 * then tries for the object lock... to avoid what
1987 * is effectively a lock inversion, we'll go to the
1988 * trouble of taking them in that same order... otherwise
1989 * if this object contains the majority of the pages resident
1990 * in the UBC (or a small set of large objects actively being
1991 * worked on contain the majority of the pages), we could
1992 * cause the pageout_scan thread to 'starve' in its attempt
1993 * to find pages to move to the free queue, since it has to
1994 * successfully acquire the object lock of any candidate page
1995 * before it can steal/clean it.
1996 */
1997 if (!vm_page_trylockspin_queues()) {
1998 vm_object_unlock(object);
1999
2000 vm_page_lockspin_queues();
2001
2002 for (j = 0; ; j++) {
2003 if (!vm_object_lock_avoid(object) &&
2004 _vm_object_lock_try(object))
2005 break;
2006 vm_page_unlock_queues();
2007 mutex_pause(j);
2008 vm_page_lockspin_queues();
2009 }
2010 }
2011 for (j = 0; j < dw_count; j++, dwp++) {
2012
2013 m = dwp->dw_m;
2014
2015 if (dwp->dw_mask & DW_clear_reference)
2016 m->reference = FALSE;
2017
2018 if (dwp->dw_mask & DW_move_page) {
2019 VM_PAGE_QUEUES_REMOVE(m);
2020
2021 assert(!m->laundry);
2022 assert(m->object != kernel_object);
2023 assert(m->pageq.next == NULL &&
2024 m->pageq.prev == NULL);
2025
2026 if (m->zero_fill) {
2027 queue_enter_first(&vm_page_queue_zf, m, vm_page_t, pageq);
2028 vm_zf_queue_count++;
2029 } else {
2030 queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq);
2031 }
2032 m->inactive = TRUE;
2033
2034 if (!m->fictitious) {
2035 vm_page_inactive_count++;
2036 token_new_pagecount++;
2037 } else {
2038 assert(m->phys_page == vm_page_fictitious_addr);
2039 }
2040 }
2041 if (dwp->dw_mask & DW_clear_busy)
2042 dwp->dw_m->busy = FALSE;
2043
2044 if (dwp->dw_mask & DW_PAGE_WAKEUP)
2045 PAGE_WAKEUP(dwp->dw_m);
2046 }
2047 vm_page_unlock_queues();
2048
2049 #if CONFIG_EMBEDDED
2050 {
2051 int percent_avail;
2052
2053 /*
2054 * Decide if we need to send a memory status notification.
2055 */
2056 percent_avail =
2057 (vm_page_active_count + vm_page_inactive_count +
2058 vm_page_speculative_count + vm_page_free_count +
2059 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2060 atop_64(max_mem);
2061 if (percent_avail >= (kern_memorystatus_level + 5) ||
2062 percent_avail <= (kern_memorystatus_level - 5)) {
2063 kern_memorystatus_level = percent_avail;
2064 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2065 }
2066 }
2067 #endif
2068 }
2069
2070
2071
2072 /*
2073 * The "chunk" macros are used by routines below when looking for pages to deactivate. These
2074 * exist because of the need to handle shadow chains. When deactivating pages, we only
2075 * want to deactive the ones at the top most level in the object chain. In order to do
2076 * this efficiently, the specified address range is divided up into "chunks" and we use
2077 * a bit map to keep track of which pages have already been processed as we descend down
2078 * the shadow chain. These chunk macros hide the details of the bit map implementation
2079 * as much as we can.
2080 *
2081 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is
2082 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest
2083 * order bit represents page 0 in the current range and highest order bit represents
2084 * page 63.
2085 *
2086 * For further convenience, we also use negative logic for the page state in the bit map.
2087 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has
2088 * been processed. This way we can simply test the 64-bit long word to see if it's zero
2089 * to easily tell if the whole range has been processed. Therefore, the bit map starts
2090 * out with all the bits set. The macros below hide all these details from the caller.
2091 */
2092
2093 #define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */
2094 /* be the same as the number of bits in */
2095 /* the chunk_state_t type. We use 64 */
2096 /* just for convenience. */
2097
2098 #define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */
2099
2100 typedef uint64_t chunk_state_t;
2101
2102 /*
2103 * The bit map uses negative logic, so we start out with all 64 bits set to indicate
2104 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE,
2105 * then we mark pages beyond the len as having been "processed" so that we don't waste time
2106 * looking at pages in that range. This can save us from unnecessarily chasing down the
2107 * shadow chain.
2108 */
2109
2110 #define CHUNK_INIT(c, len) \
2111 MACRO_BEGIN \
2112 uint64_t p; \
2113 \
2114 (c) = 0xffffffffffffffffLL; \
2115 \
2116 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \
2117 MARK_PAGE_HANDLED(c, p); \
2118 MACRO_END
2119
2120 /*
2121 * Return true if all pages in the chunk have not yet been processed.
2122 */
2123
2124 #define CHUNK_NOT_COMPLETE(c) ((c) != 0)
2125
2126 /*
2127 * Return true if the page at offset 'p' in the bit map has already been handled
2128 * while processing a higher level object in the shadow chain.
2129 */
2130
2131 #define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0)
2132
2133 /*
2134 * Mark the page at offset 'p' in the bit map as having been processed.
2135 */
2136
2137 #define MARK_PAGE_HANDLED(c, p) \
2138 MACRO_BEGIN \
2139 (c) = (c) & ~(1LL << (p)); \
2140 MACRO_END
2141
2142
2143 /*
2144 * Return true if the page at the given offset has been paged out. Object is
2145 * locked upon entry and returned locked.
2146 */
2147
2148 static boolean_t
2149 page_is_paged_out(
2150 vm_object_t object,
2151 vm_object_offset_t offset)
2152 {
2153 kern_return_t kr;
2154 memory_object_t pager;
2155
2156 /*
2157 * Check the existence map for the page if we have one, otherwise
2158 * ask the pager about this page.
2159 */
2160
2161 #if MACH_PAGEMAP
2162 if (object->existence_map) {
2163 if (vm_external_state_get(object->existence_map, offset)
2164 == VM_EXTERNAL_STATE_EXISTS) {
2165 /*
2166 * We found the page
2167 */
2168
2169 return TRUE;
2170 }
2171 } else
2172 #endif
2173 if (object->internal &&
2174 object->alive &&
2175 !object->terminating &&
2176 object->pager_ready) {
2177
2178 /*
2179 * We're already holding a "paging in progress" reference
2180 * so the object can't disappear when we release the lock.
2181 */
2182
2183 assert(object->paging_in_progress);
2184 pager = object->pager;
2185 vm_object_unlock(object);
2186
2187 kr = memory_object_data_request(
2188 pager,
2189 offset + object->paging_offset,
2190 0, /* just poke the pager */
2191 VM_PROT_READ,
2192 NULL);
2193
2194 vm_object_lock(object);
2195
2196 if (kr == KERN_SUCCESS) {
2197
2198 /*
2199 * We found the page
2200 */
2201
2202 return TRUE;
2203 }
2204 }
2205
2206 return FALSE;
2207 }
2208
2209
2210 /*
2211 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any
2212 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify
2213 * a size that is less than or equal to the CHUNK_SIZE.
2214 */
2215
2216 static void
2217 deactivate_pages_in_object(
2218 vm_object_t object,
2219 vm_object_offset_t offset,
2220 vm_object_size_t size,
2221 boolean_t kill_page,
2222 boolean_t reusable_page,
2223 #if !MACH_ASSERT
2224 __unused
2225 #endif
2226 boolean_t all_reusable,
2227 chunk_state_t *chunk_state)
2228 {
2229 vm_page_t m;
2230 int p;
2231 struct dw dw_array[DELAYED_WORK_LIMIT];
2232 struct dw *dwp;
2233 int dw_count;
2234 unsigned int reusable = 0;
2235
2236
2237 /*
2238 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the
2239 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may
2240 * have pages marked as having been processed already. We stop the loop early if we find we've handled
2241 * all the pages in the chunk.
2242 */
2243
2244 dwp = &dw_array[0];
2245 dw_count = 0;
2246
2247 for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) {
2248
2249 /*
2250 * If this offset has already been found and handled in a higher level object, then don't
2251 * do anything with it in the current shadow object.
2252 */
2253
2254 if (PAGE_ALREADY_HANDLED(*chunk_state, p))
2255 continue;
2256
2257 /*
2258 * See if the page at this offset is around. First check to see if the page is resident,
2259 * then if not, check the existence map or with the pager.
2260 */
2261
2262 if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
2263
2264 /*
2265 * We found a page we were looking for. Mark it as "handled" now in the chunk_state
2266 * so that we won't bother looking for a page at this offset again if there are more
2267 * shadow objects. Then deactivate the page.
2268 */
2269
2270 MARK_PAGE_HANDLED(*chunk_state, p);
2271
2272 if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy)) {
2273 int clear_refmod;
2274
2275 assert(!m->laundry);
2276
2277 clear_refmod = VM_MEM_REFERENCED;
2278 dwp->dw_mask = DW_clear_reference;
2279
2280 if ((kill_page) && (object->internal)) {
2281 m->precious = FALSE;
2282 m->dirty = FALSE;
2283
2284 clear_refmod |= VM_MEM_MODIFIED;
2285 if (m->throttled) {
2286 /*
2287 * This page is now clean and
2288 * reclaimable. Move it out
2289 * of the throttled queue, so
2290 * that vm_pageout_scan() can
2291 * find it.
2292 */
2293 dwp->dw_mask |= DW_move_page;
2294 }
2295 #if MACH_PAGEMAP
2296 vm_external_state_clr(object->existence_map, offset);
2297 #endif /* MACH_PAGEMAP */
2298
2299 if (reusable_page && !m->reusable) {
2300 assert(!all_reusable);
2301 assert(!object->all_reusable);
2302 m->reusable = TRUE;
2303 object->reusable_page_count++;
2304 assert(object->resident_page_count >= object->reusable_page_count);
2305 reusable++;
2306 #if CONFIG_EMBEDDED
2307 } else {
2308 if (m->reusable) {
2309 m->reusable = FALSE;
2310 object->reusable_page_count--;
2311 }
2312 #endif
2313 }
2314 }
2315 pmap_clear_refmod(m->phys_page, clear_refmod);
2316
2317 if (!m->throttled && !(reusable_page || all_reusable))
2318 dwp->dw_mask |= DW_move_page;
2319 /*
2320 * dw_do_work may need to drop the object lock
2321 * if it does, we need the pages its looking at to
2322 * be held stable via the busy bit.
2323 */
2324 m->busy = TRUE;
2325 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
2326
2327 dwp->dw_m = m;
2328 dwp++;
2329 dw_count++;
2330
2331 if (dw_count >= DELAYED_WORK_LIMIT) {
2332 if (reusable) {
2333 OSAddAtomic(reusable,
2334 &vm_page_stats_reusable.reusable_count);
2335 vm_page_stats_reusable.reusable += reusable;
2336 reusable = 0;
2337 }
2338 dw_do_work(object, &dw_array[0], dw_count);
2339
2340 dwp = &dw_array[0];
2341 dw_count = 0;
2342 }
2343 }
2344
2345 } else {
2346
2347 /*
2348 * The page at this offset isn't memory resident, check to see if it's
2349 * been paged out. If so, mark it as handled so we don't bother looking
2350 * for it in the shadow chain.
2351 */
2352
2353 if (page_is_paged_out(object, offset)) {
2354 MARK_PAGE_HANDLED(*chunk_state, p);
2355
2356 /*
2357 * If we're killing a non-resident page, then clear the page in the existence
2358 * map so we don't bother paging it back in if it's touched again in the future.
2359 */
2360
2361 if ((kill_page) && (object->internal)) {
2362 #if MACH_PAGEMAP
2363 vm_external_state_clr(object->existence_map, offset);
2364 #endif /* MACH_PAGEMAP */
2365 }
2366 }
2367 }
2368 }
2369
2370 if (reusable) {
2371 OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count);
2372 vm_page_stats_reusable.reusable += reusable;
2373 reusable = 0;
2374 }
2375
2376 if (dw_count)
2377 dw_do_work(object, &dw_array[0], dw_count);
2378 }
2379
2380
2381 /*
2382 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk"
2383 * will always be less than or equal to the given size. The total range is divided up
2384 * into chunks for efficiency and performance related to the locks and handling the shadow
2385 * chain. This routine returns how much of the given "size" it actually processed. It's
2386 * up to the caler to loop and keep calling this routine until the entire range they want
2387 * to process has been done.
2388 */
2389
2390 static vm_object_size_t
2391 deactivate_a_chunk(
2392 vm_object_t orig_object,
2393 vm_object_offset_t offset,
2394 vm_object_size_t size,
2395 boolean_t kill_page,
2396 boolean_t reusable_page,
2397 boolean_t all_reusable)
2398 {
2399 vm_object_t object;
2400 vm_object_t tmp_object;
2401 vm_object_size_t length;
2402 chunk_state_t chunk_state;
2403
2404
2405 /*
2406 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the
2407 * remaining size the caller asked for.
2408 */
2409
2410 length = MIN(size, CHUNK_SIZE);
2411
2412 /*
2413 * The chunk_state keeps track of which pages we've already processed if there's
2414 * a shadow chain on this object. At this point, we haven't done anything with this
2415 * range of pages yet, so initialize the state to indicate no pages processed yet.
2416 */
2417
2418 CHUNK_INIT(chunk_state, length);
2419 object = orig_object;
2420
2421 /*
2422 * Start at the top level object and iterate around the loop once for each object
2423 * in the shadow chain. We stop processing early if we've already found all the pages
2424 * in the range. Otherwise we stop when we run out of shadow objects.
2425 */
2426
2427 while (object && CHUNK_NOT_COMPLETE(chunk_state)) {
2428 vm_object_paging_begin(object);
2429
2430 deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state);
2431
2432 vm_object_paging_end(object);
2433
2434 /*
2435 * We've finished with this object, see if there's a shadow object. If
2436 * there is, update the offset and lock the new object. We also turn off
2437 * kill_page at this point since we only kill pages in the top most object.
2438 */
2439
2440 tmp_object = object->shadow;
2441
2442 if (tmp_object) {
2443 kill_page = FALSE;
2444 reusable_page = FALSE;
2445 all_reusable = FALSE;
2446 offset += object->shadow_offset;
2447 vm_object_lock(tmp_object);
2448 }
2449
2450 if (object != orig_object)
2451 vm_object_unlock(object);
2452
2453 object = tmp_object;
2454 }
2455
2456 if (object && object != orig_object)
2457 vm_object_unlock(object);
2458
2459 return length;
2460 }
2461
2462
2463
2464 /*
2465 * Move any resident pages in the specified range to the inactive queue. If kill_page is set,
2466 * we also clear the modified status of the page and "forget" any changes that have been made
2467 * to the page.
2468 */
2469
2470 __private_extern__ void
2471 vm_object_deactivate_pages(
2472 vm_object_t object,
2473 vm_object_offset_t offset,
2474 vm_object_size_t size,
2475 boolean_t kill_page,
2476 boolean_t reusable_page)
2477 {
2478 vm_object_size_t length;
2479 boolean_t all_reusable;
2480
2481 /*
2482 * We break the range up into chunks and do one chunk at a time. This is for
2483 * efficiency and performance while handling the shadow chains and the locks.
2484 * The deactivate_a_chunk() function returns how much of the range it processed.
2485 * We keep calling this routine until the given size is exhausted.
2486 */
2487
2488
2489 all_reusable = FALSE;
2490 if (reusable_page &&
2491 object->size != 0 &&
2492 object->size == size &&
2493 object->reusable_page_count == 0) {
2494 all_reusable = TRUE;
2495 reusable_page = FALSE;
2496 }
2497
2498 #if CONFIG_EMBEDDED
2499 if ((reusable_page || all_reusable) && object->all_reusable) {
2500 /* This means MADV_FREE_REUSABLE has been called twice, which
2501 * is probably illegal. */
2502 return;
2503 }
2504 #endif
2505
2506 while (size) {
2507 length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable);
2508
2509 size -= length;
2510 offset += length;
2511 }
2512
2513 if (all_reusable) {
2514 if (!object->all_reusable) {
2515 unsigned int reusable;
2516
2517 object->all_reusable = TRUE;
2518 assert(object->reusable_page_count == 0);
2519 /* update global stats */
2520 reusable = object->resident_page_count;
2521 OSAddAtomic(reusable,
2522 &vm_page_stats_reusable.reusable_count);
2523 vm_page_stats_reusable.reusable += reusable;
2524 vm_page_stats_reusable.all_reusable_calls++;
2525 }
2526 } else if (reusable_page) {
2527 vm_page_stats_reusable.partial_reusable_calls++;
2528 }
2529 }
2530
2531 void
2532 vm_object_reuse_pages(
2533 vm_object_t object,
2534 vm_object_offset_t start_offset,
2535 vm_object_offset_t end_offset,
2536 boolean_t allow_partial_reuse)
2537 {
2538 vm_object_offset_t cur_offset;
2539 vm_page_t m;
2540 unsigned int reused, reusable;
2541
2542 #define VM_OBJECT_REUSE_PAGE(object, m, reused) \
2543 MACRO_BEGIN \
2544 if ((m) != VM_PAGE_NULL && \
2545 (m)->reusable) { \
2546 assert((object)->reusable_page_count <= \
2547 (object)->resident_page_count); \
2548 assert((object)->reusable_page_count > 0); \
2549 (object)->reusable_page_count--; \
2550 (m)->reusable = FALSE; \
2551 (reused)++; \
2552 } \
2553 MACRO_END
2554
2555 reused = 0;
2556 reusable = 0;
2557
2558 vm_object_lock_assert_exclusive(object);
2559
2560 if (object->all_reusable) {
2561 assert(object->reusable_page_count == 0);
2562 object->all_reusable = FALSE;
2563 if (end_offset - start_offset == object->size ||
2564 !allow_partial_reuse) {
2565 vm_page_stats_reusable.all_reuse_calls++;
2566 reused = object->resident_page_count;
2567 } else {
2568 vm_page_stats_reusable.partial_reuse_calls++;
2569 queue_iterate(&object->memq, m, vm_page_t, listq) {
2570 if (m->offset < start_offset ||
2571 m->offset >= end_offset) {
2572 m->reusable = TRUE;
2573 object->reusable_page_count++;
2574 assert(object->resident_page_count >= object->reusable_page_count);
2575 continue;
2576 } else {
2577 assert(!m->reusable);
2578 reused++;
2579 }
2580 }
2581 }
2582 } else if (object->resident_page_count >
2583 ((end_offset - start_offset) >> PAGE_SHIFT)) {
2584 vm_page_stats_reusable.partial_reuse_calls++;
2585 for (cur_offset = start_offset;
2586 cur_offset < end_offset;
2587 cur_offset += PAGE_SIZE_64) {
2588 if (object->reusable_page_count == 0) {
2589 break;
2590 }
2591 m = vm_page_lookup(object, cur_offset);
2592 VM_OBJECT_REUSE_PAGE(object, m, reused);
2593 }
2594 } else {
2595 vm_page_stats_reusable.partial_reuse_calls++;
2596 queue_iterate(&object->memq, m, vm_page_t, listq) {
2597 if (object->reusable_page_count == 0) {
2598 break;
2599 }
2600 if (m->offset < start_offset ||
2601 m->offset >= end_offset) {
2602 continue;
2603 }
2604 VM_OBJECT_REUSE_PAGE(object, m, reused);
2605 }
2606 }
2607
2608 /* update global stats */
2609 OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count);
2610 vm_page_stats_reusable.reused += reused;
2611 vm_page_stats_reusable.reusable += reusable;
2612 }
2613
2614 /*
2615 * Routine: vm_object_pmap_protect
2616 *
2617 * Purpose:
2618 * Reduces the permission for all physical
2619 * pages in the specified object range.
2620 *
2621 * If removing write permission only, it is
2622 * sufficient to protect only the pages in
2623 * the top-level object; only those pages may
2624 * have write permission.
2625 *
2626 * If removing all access, we must follow the
2627 * shadow chain from the top-level object to
2628 * remove access to all pages in shadowed objects.
2629 *
2630 * The object must *not* be locked. The object must
2631 * be temporary/internal.
2632 *
2633 * If pmap is not NULL, this routine assumes that
2634 * the only mappings for the pages are in that
2635 * pmap.
2636 */
2637
2638 __private_extern__ void
2639 vm_object_pmap_protect(
2640 register vm_object_t object,
2641 register vm_object_offset_t offset,
2642 vm_object_size_t size,
2643 pmap_t pmap,
2644 vm_map_offset_t pmap_start,
2645 vm_prot_t prot)
2646 {
2647 if (object == VM_OBJECT_NULL)
2648 return;
2649 size = vm_object_round_page(size);
2650 offset = vm_object_trunc_page(offset);
2651
2652 vm_object_lock(object);
2653
2654 if (object->phys_contiguous) {
2655 if (pmap != NULL) {
2656 vm_object_unlock(object);
2657 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
2658 } else {
2659 vm_object_offset_t phys_start, phys_end, phys_addr;
2660
2661 phys_start = object->shadow_offset + offset;
2662 phys_end = phys_start + size;
2663 assert(phys_start <= phys_end);
2664 assert(phys_end <= object->shadow_offset + object->size);
2665 vm_object_unlock(object);
2666
2667 for (phys_addr = phys_start;
2668 phys_addr < phys_end;
2669 phys_addr += PAGE_SIZE_64) {
2670 pmap_page_protect((ppnum_t) (phys_addr >> PAGE_SHIFT), prot);
2671 }
2672 }
2673 return;
2674 }
2675
2676 assert(object->internal);
2677
2678 while (TRUE) {
2679 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
2680 vm_object_unlock(object);
2681 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
2682 return;
2683 }
2684
2685 /* if we are doing large ranges with respect to resident */
2686 /* page count then we should interate over pages otherwise */
2687 /* inverse page look-up will be faster */
2688 if (ptoa_64(object->resident_page_count / 4) < size) {
2689 vm_page_t p;
2690 vm_object_offset_t end;
2691
2692 end = offset + size;
2693
2694 if (pmap != PMAP_NULL) {
2695 queue_iterate(&object->memq, p, vm_page_t, listq) {
2696 if (!p->fictitious &&
2697 (offset <= p->offset) && (p->offset < end)) {
2698 vm_map_offset_t start;
2699
2700 start = pmap_start + p->offset - offset;
2701 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
2702 }
2703 }
2704 } else {
2705 queue_iterate(&object->memq, p, vm_page_t, listq) {
2706 if (!p->fictitious &&
2707 (offset <= p->offset) && (p->offset < end)) {
2708
2709 pmap_page_protect(p->phys_page, prot);
2710 }
2711 }
2712 }
2713 } else {
2714 vm_page_t p;
2715 vm_object_offset_t end;
2716 vm_object_offset_t target_off;
2717
2718 end = offset + size;
2719
2720 if (pmap != PMAP_NULL) {
2721 for(target_off = offset;
2722 target_off < end;
2723 target_off += PAGE_SIZE) {
2724 p = vm_page_lookup(object, target_off);
2725 if (p != VM_PAGE_NULL) {
2726 vm_object_offset_t start;
2727 start = pmap_start +
2728 (p->offset - offset);
2729 pmap_protect(pmap, start,
2730 start + PAGE_SIZE, prot);
2731 }
2732 }
2733 } else {
2734 for(target_off = offset;
2735 target_off < end; target_off += PAGE_SIZE) {
2736 p = vm_page_lookup(object, target_off);
2737 if (p != VM_PAGE_NULL) {
2738 pmap_page_protect(p->phys_page, prot);
2739 }
2740 }
2741 }
2742 }
2743
2744 if (prot == VM_PROT_NONE) {
2745 /*
2746 * Must follow shadow chain to remove access
2747 * to pages in shadowed objects.
2748 */
2749 register vm_object_t next_object;
2750
2751 next_object = object->shadow;
2752 if (next_object != VM_OBJECT_NULL) {
2753 offset += object->shadow_offset;
2754 vm_object_lock(next_object);
2755 vm_object_unlock(object);
2756 object = next_object;
2757 }
2758 else {
2759 /*
2760 * End of chain - we are done.
2761 */
2762 break;
2763 }
2764 }
2765 else {
2766 /*
2767 * Pages in shadowed objects may never have
2768 * write permission - we may stop here.
2769 */
2770 break;
2771 }
2772 }
2773
2774 vm_object_unlock(object);
2775 }
2776
2777 /*
2778 * Routine: vm_object_copy_slowly
2779 *
2780 * Description:
2781 * Copy the specified range of the source
2782 * virtual memory object without using
2783 * protection-based optimizations (such
2784 * as copy-on-write). The pages in the
2785 * region are actually copied.
2786 *
2787 * In/out conditions:
2788 * The caller must hold a reference and a lock
2789 * for the source virtual memory object. The source
2790 * object will be returned *unlocked*.
2791 *
2792 * Results:
2793 * If the copy is completed successfully, KERN_SUCCESS is
2794 * returned. If the caller asserted the interruptible
2795 * argument, and an interruption occurred while waiting
2796 * for a user-generated event, MACH_SEND_INTERRUPTED is
2797 * returned. Other values may be returned to indicate
2798 * hard errors during the copy operation.
2799 *
2800 * A new virtual memory object is returned in a
2801 * parameter (_result_object). The contents of this
2802 * new object, starting at a zero offset, are a copy
2803 * of the source memory region. In the event of
2804 * an error, this parameter will contain the value
2805 * VM_OBJECT_NULL.
2806 */
2807 __private_extern__ kern_return_t
2808 vm_object_copy_slowly(
2809 register vm_object_t src_object,
2810 vm_object_offset_t src_offset,
2811 vm_object_size_t size,
2812 boolean_t interruptible,
2813 vm_object_t *_result_object) /* OUT */
2814 {
2815 vm_object_t new_object;
2816 vm_object_offset_t new_offset;
2817
2818 struct vm_object_fault_info fault_info;
2819
2820 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
2821 src_object, src_offset, size, 0, 0);
2822
2823 if (size == 0) {
2824 vm_object_unlock(src_object);
2825 *_result_object = VM_OBJECT_NULL;
2826 return(KERN_INVALID_ARGUMENT);
2827 }
2828
2829 /*
2830 * Prevent destruction of the source object while we copy.
2831 */
2832
2833 vm_object_reference_locked(src_object);
2834 vm_object_unlock(src_object);
2835
2836 /*
2837 * Create a new object to hold the copied pages.
2838 * A few notes:
2839 * We fill the new object starting at offset 0,
2840 * regardless of the input offset.
2841 * We don't bother to lock the new object within
2842 * this routine, since we have the only reference.
2843 */
2844
2845 new_object = vm_object_allocate(size);
2846 new_offset = 0;
2847
2848 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
2849
2850 fault_info.interruptible = interruptible;
2851 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
2852 fault_info.user_tag = 0;
2853 fault_info.lo_offset = src_offset;
2854 fault_info.hi_offset = src_offset + size;
2855 fault_info.no_cache = FALSE;
2856 fault_info.stealth = TRUE;
2857
2858 for ( ;
2859 size != 0 ;
2860 src_offset += PAGE_SIZE_64,
2861 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
2862 ) {
2863 vm_page_t new_page;
2864 vm_fault_return_t result;
2865
2866 vm_object_lock(new_object);
2867
2868 while ((new_page = vm_page_alloc(new_object, new_offset))
2869 == VM_PAGE_NULL) {
2870
2871 vm_object_unlock(new_object);
2872
2873 if (!vm_page_wait(interruptible)) {
2874 vm_object_deallocate(new_object);
2875 vm_object_deallocate(src_object);
2876 *_result_object = VM_OBJECT_NULL;
2877 return(MACH_SEND_INTERRUPTED);
2878 }
2879 vm_object_lock(new_object);
2880 }
2881 vm_object_unlock(new_object);
2882
2883 do {
2884 vm_prot_t prot = VM_PROT_READ;
2885 vm_page_t _result_page;
2886 vm_page_t top_page;
2887 register
2888 vm_page_t result_page;
2889 kern_return_t error_code;
2890
2891 vm_object_lock(src_object);
2892 vm_object_paging_begin(src_object);
2893
2894 if (size > (vm_size_t) -1) {
2895 /* 32-bit overflow */
2896 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE);
2897 } else {
2898 fault_info.cluster_size = (vm_size_t) size;
2899 assert(fault_info.cluster_size == size);
2900 }
2901
2902 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
2903 result = vm_fault_page(src_object, src_offset,
2904 VM_PROT_READ, FALSE,
2905 &prot, &_result_page, &top_page,
2906 (int *)0,
2907 &error_code, FALSE, FALSE, &fault_info);
2908
2909 switch(result) {
2910 case VM_FAULT_SUCCESS:
2911 result_page = _result_page;
2912
2913 /*
2914 * We don't need to hold the object
2915 * lock -- the busy page will be enough.
2916 * [We don't care about picking up any
2917 * new modifications.]
2918 *
2919 * Copy the page to the new object.
2920 *
2921 * POLICY DECISION:
2922 * If result_page is clean,
2923 * we could steal it instead
2924 * of copying.
2925 */
2926
2927 vm_object_unlock(result_page->object);
2928 vm_page_copy(result_page, new_page);
2929
2930 /*
2931 * Let go of both pages (make them
2932 * not busy, perform wakeup, activate).
2933 */
2934 vm_object_lock(new_object);
2935 new_page->dirty = TRUE;
2936 PAGE_WAKEUP_DONE(new_page);
2937 vm_object_unlock(new_object);
2938
2939 vm_object_lock(result_page->object);
2940 PAGE_WAKEUP_DONE(result_page);
2941
2942 vm_page_lockspin_queues();
2943 if (!result_page->active &&
2944 !result_page->inactive &&
2945 !result_page->throttled)
2946 vm_page_activate(result_page);
2947 vm_page_activate(new_page);
2948 vm_page_unlock_queues();
2949
2950 /*
2951 * Release paging references and
2952 * top-level placeholder page, if any.
2953 */
2954
2955 vm_fault_cleanup(result_page->object,
2956 top_page);
2957
2958 break;
2959
2960 case VM_FAULT_RETRY:
2961 break;
2962
2963 case VM_FAULT_FICTITIOUS_SHORTAGE:
2964 vm_page_more_fictitious();
2965 break;
2966
2967 case VM_FAULT_MEMORY_SHORTAGE:
2968 if (vm_page_wait(interruptible))
2969 break;
2970 /* fall thru */
2971
2972 case VM_FAULT_INTERRUPTED:
2973 vm_object_lock(new_object);
2974 VM_PAGE_FREE(new_page);
2975 vm_object_unlock(new_object);
2976
2977 vm_object_deallocate(new_object);
2978 vm_object_deallocate(src_object);
2979 *_result_object = VM_OBJECT_NULL;
2980 return(MACH_SEND_INTERRUPTED);
2981
2982 case VM_FAULT_SUCCESS_NO_VM_PAGE:
2983 /* success but no VM page: fail */
2984 vm_object_paging_end(src_object);
2985 vm_object_unlock(src_object);
2986 /*FALLTHROUGH*/
2987 case VM_FAULT_MEMORY_ERROR:
2988 /*
2989 * A policy choice:
2990 * (a) ignore pages that we can't
2991 * copy
2992 * (b) return the null object if
2993 * any page fails [chosen]
2994 */
2995
2996 vm_object_lock(new_object);
2997 VM_PAGE_FREE(new_page);
2998 vm_object_unlock(new_object);
2999
3000 vm_object_deallocate(new_object);
3001 vm_object_deallocate(src_object);
3002 *_result_object = VM_OBJECT_NULL;
3003 return(error_code ? error_code:
3004 KERN_MEMORY_ERROR);
3005
3006 default:
3007 panic("vm_object_copy_slowly: unexpected error"
3008 " 0x%x from vm_fault_page()\n", result);
3009 }
3010 } while (result != VM_FAULT_SUCCESS);
3011 }
3012
3013 /*
3014 * Lose the extra reference, and return our object.
3015 */
3016 vm_object_deallocate(src_object);
3017 *_result_object = new_object;
3018 return(KERN_SUCCESS);
3019 }
3020
3021 /*
3022 * Routine: vm_object_copy_quickly
3023 *
3024 * Purpose:
3025 * Copy the specified range of the source virtual
3026 * memory object, if it can be done without waiting
3027 * for user-generated events.
3028 *
3029 * Results:
3030 * If the copy is successful, the copy is returned in
3031 * the arguments; otherwise, the arguments are not
3032 * affected.
3033 *
3034 * In/out conditions:
3035 * The object should be unlocked on entry and exit.
3036 */
3037
3038 /*ARGSUSED*/
3039 __private_extern__ boolean_t
3040 vm_object_copy_quickly(
3041 vm_object_t *_object, /* INOUT */
3042 __unused vm_object_offset_t offset, /* IN */
3043 __unused vm_object_size_t size, /* IN */
3044 boolean_t *_src_needs_copy, /* OUT */
3045 boolean_t *_dst_needs_copy) /* OUT */
3046 {
3047 vm_object_t object = *_object;
3048 memory_object_copy_strategy_t copy_strategy;
3049
3050 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
3051 *_object, offset, size, 0, 0);
3052 if (object == VM_OBJECT_NULL) {
3053 *_src_needs_copy = FALSE;
3054 *_dst_needs_copy = FALSE;
3055 return(TRUE);
3056 }
3057
3058 vm_object_lock(object);
3059
3060 copy_strategy = object->copy_strategy;
3061
3062 switch (copy_strategy) {
3063 case MEMORY_OBJECT_COPY_SYMMETRIC:
3064
3065 /*
3066 * Symmetric copy strategy.
3067 * Make another reference to the object.
3068 * Leave object/offset unchanged.
3069 */
3070
3071 vm_object_reference_locked(object);
3072 object->shadowed = TRUE;
3073 vm_object_unlock(object);
3074
3075 /*
3076 * Both source and destination must make
3077 * shadows, and the source must be made
3078 * read-only if not already.
3079 */
3080
3081 *_src_needs_copy = TRUE;
3082 *_dst_needs_copy = TRUE;
3083
3084 break;
3085
3086 case MEMORY_OBJECT_COPY_DELAY:
3087 vm_object_unlock(object);
3088 return(FALSE);
3089
3090 default:
3091 vm_object_unlock(object);
3092 return(FALSE);
3093 }
3094 return(TRUE);
3095 }
3096
3097 static int copy_call_count = 0;
3098 static int copy_call_sleep_count = 0;
3099 static int copy_call_restart_count = 0;
3100
3101 /*
3102 * Routine: vm_object_copy_call [internal]
3103 *
3104 * Description:
3105 * Copy the source object (src_object), using the
3106 * user-managed copy algorithm.
3107 *
3108 * In/out conditions:
3109 * The source object must be locked on entry. It
3110 * will be *unlocked* on exit.
3111 *
3112 * Results:
3113 * If the copy is successful, KERN_SUCCESS is returned.
3114 * A new object that represents the copied virtual
3115 * memory is returned in a parameter (*_result_object).
3116 * If the return value indicates an error, this parameter
3117 * is not valid.
3118 */
3119 static kern_return_t
3120 vm_object_copy_call(
3121 vm_object_t src_object,
3122 vm_object_offset_t src_offset,
3123 vm_object_size_t size,
3124 vm_object_t *_result_object) /* OUT */
3125 {
3126 kern_return_t kr;
3127 vm_object_t copy;
3128 boolean_t check_ready = FALSE;
3129 uint32_t try_failed_count = 0;
3130
3131 /*
3132 * If a copy is already in progress, wait and retry.
3133 *
3134 * XXX
3135 * Consider making this call interruptable, as Mike
3136 * intended it to be.
3137 *
3138 * XXXO
3139 * Need a counter or version or something to allow
3140 * us to use the copy that the currently requesting
3141 * thread is obtaining -- is it worth adding to the
3142 * vm object structure? Depends how common this case it.
3143 */
3144 copy_call_count++;
3145 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
3146 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
3147 THREAD_UNINT);
3148 copy_call_restart_count++;
3149 }
3150
3151 /*
3152 * Indicate (for the benefit of memory_object_create_copy)
3153 * that we want a copy for src_object. (Note that we cannot
3154 * do a real assert_wait before calling memory_object_copy,
3155 * so we simply set the flag.)
3156 */
3157
3158 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
3159 vm_object_unlock(src_object);
3160
3161 /*
3162 * Ask the memory manager to give us a memory object
3163 * which represents a copy of the src object.
3164 * The memory manager may give us a memory object
3165 * which we already have, or it may give us a
3166 * new memory object. This memory object will arrive
3167 * via memory_object_create_copy.
3168 */
3169
3170 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
3171 if (kr != KERN_SUCCESS) {
3172 return kr;
3173 }
3174
3175 /*
3176 * Wait for the copy to arrive.
3177 */
3178 vm_object_lock(src_object);
3179 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
3180 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
3181 THREAD_UNINT);
3182 copy_call_sleep_count++;
3183 }
3184 Retry:
3185 assert(src_object->copy != VM_OBJECT_NULL);
3186 copy = src_object->copy;
3187 if (!vm_object_lock_try(copy)) {
3188 vm_object_unlock(src_object);
3189
3190 try_failed_count++;
3191 mutex_pause(try_failed_count); /* wait a bit */
3192
3193 vm_object_lock(src_object);
3194 goto Retry;
3195 }
3196 if (copy->size < src_offset+size)
3197 copy->size = src_offset+size;
3198
3199 if (!copy->pager_ready)
3200 check_ready = TRUE;
3201
3202 /*
3203 * Return the copy.
3204 */
3205 *_result_object = copy;
3206 vm_object_unlock(copy);
3207 vm_object_unlock(src_object);
3208
3209 /* Wait for the copy to be ready. */
3210 if (check_ready == TRUE) {
3211 vm_object_lock(copy);
3212 while (!copy->pager_ready) {
3213 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
3214 }
3215 vm_object_unlock(copy);
3216 }
3217
3218 return KERN_SUCCESS;
3219 }
3220
3221 static int copy_delayed_lock_collisions = 0;
3222 static int copy_delayed_max_collisions = 0;
3223 static int copy_delayed_lock_contention = 0;
3224 static int copy_delayed_protect_iterate = 0;
3225
3226 /*
3227 * Routine: vm_object_copy_delayed [internal]
3228 *
3229 * Description:
3230 * Copy the specified virtual memory object, using
3231 * the asymmetric copy-on-write algorithm.
3232 *
3233 * In/out conditions:
3234 * The src_object must be locked on entry. It will be unlocked
3235 * on exit - so the caller must also hold a reference to it.
3236 *
3237 * This routine will not block waiting for user-generated
3238 * events. It is not interruptible.
3239 */
3240 __private_extern__ vm_object_t
3241 vm_object_copy_delayed(
3242 vm_object_t src_object,
3243 vm_object_offset_t src_offset,
3244 vm_object_size_t size,
3245 boolean_t src_object_shared)
3246 {
3247 vm_object_t new_copy = VM_OBJECT_NULL;
3248 vm_object_t old_copy;
3249 vm_page_t p;
3250 vm_object_size_t copy_size = src_offset + size;
3251
3252
3253 int collisions = 0;
3254 /*
3255 * The user-level memory manager wants to see all of the changes
3256 * to this object, but it has promised not to make any changes on
3257 * its own.
3258 *
3259 * Perform an asymmetric copy-on-write, as follows:
3260 * Create a new object, called a "copy object" to hold
3261 * pages modified by the new mapping (i.e., the copy,
3262 * not the original mapping).
3263 * Record the original object as the backing object for
3264 * the copy object. If the original mapping does not
3265 * change a page, it may be used read-only by the copy.
3266 * Record the copy object in the original object.
3267 * When the original mapping causes a page to be modified,
3268 * it must be copied to a new page that is "pushed" to
3269 * the copy object.
3270 * Mark the new mapping (the copy object) copy-on-write.
3271 * This makes the copy object itself read-only, allowing
3272 * it to be reused if the original mapping makes no
3273 * changes, and simplifying the synchronization required
3274 * in the "push" operation described above.
3275 *
3276 * The copy-on-write is said to be assymetric because the original
3277 * object is *not* marked copy-on-write. A copied page is pushed
3278 * to the copy object, regardless which party attempted to modify
3279 * the page.
3280 *
3281 * Repeated asymmetric copy operations may be done. If the
3282 * original object has not been changed since the last copy, its
3283 * copy object can be reused. Otherwise, a new copy object can be
3284 * inserted between the original object and its previous copy
3285 * object. Since any copy object is read-only, this cannot affect
3286 * affect the contents of the previous copy object.
3287 *
3288 * Note that a copy object is higher in the object tree than the
3289 * original object; therefore, use of the copy object recorded in
3290 * the original object must be done carefully, to avoid deadlock.
3291 */
3292
3293 Retry:
3294
3295 /*
3296 * Wait for paging in progress.
3297 */
3298 if (!src_object->true_share &&
3299 (src_object->paging_in_progress != 0 ||
3300 src_object->activity_in_progress != 0)) {
3301 if (src_object_shared == TRUE) {
3302 vm_object_unlock(src_object);
3303 vm_object_lock(src_object);
3304 src_object_shared = FALSE;
3305 goto Retry;
3306 }
3307 vm_object_paging_wait(src_object, THREAD_UNINT);
3308 }
3309 /*
3310 * See whether we can reuse the result of a previous
3311 * copy operation.
3312 */
3313
3314 old_copy = src_object->copy;
3315 if (old_copy != VM_OBJECT_NULL) {
3316 int lock_granted;
3317
3318 /*
3319 * Try to get the locks (out of order)
3320 */
3321 if (src_object_shared == TRUE)
3322 lock_granted = vm_object_lock_try_shared(old_copy);
3323 else
3324 lock_granted = vm_object_lock_try(old_copy);
3325
3326 if (!lock_granted) {
3327 vm_object_unlock(src_object);
3328
3329 if (collisions++ == 0)
3330 copy_delayed_lock_contention++;
3331 mutex_pause(collisions);
3332
3333 /* Heisenberg Rules */
3334 copy_delayed_lock_collisions++;
3335
3336 if (collisions > copy_delayed_max_collisions)
3337 copy_delayed_max_collisions = collisions;
3338
3339 if (src_object_shared == TRUE)
3340 vm_object_lock_shared(src_object);
3341 else
3342 vm_object_lock(src_object);
3343
3344 goto Retry;
3345 }
3346
3347 /*
3348 * Determine whether the old copy object has
3349 * been modified.
3350 */
3351
3352 if (old_copy->resident_page_count == 0 &&
3353 !old_copy->pager_created) {
3354 /*
3355 * It has not been modified.
3356 *
3357 * Return another reference to
3358 * the existing copy-object if
3359 * we can safely grow it (if
3360 * needed).
3361 */
3362
3363 if (old_copy->size < copy_size) {
3364 if (src_object_shared == TRUE) {
3365 vm_object_unlock(old_copy);
3366 vm_object_unlock(src_object);
3367
3368 vm_object_lock(src_object);
3369 src_object_shared = FALSE;
3370 goto Retry;
3371 }
3372 /*
3373 * We can't perform a delayed copy if any of the
3374 * pages in the extended range are wired (because
3375 * we can't safely take write permission away from
3376 * wired pages). If the pages aren't wired, then
3377 * go ahead and protect them.
3378 */
3379 copy_delayed_protect_iterate++;
3380
3381 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
3382 if (!p->fictitious &&
3383 p->offset >= old_copy->size &&
3384 p->offset < copy_size) {
3385 if (VM_PAGE_WIRED(p)) {
3386 vm_object_unlock(old_copy);
3387 vm_object_unlock(src_object);
3388
3389 if (new_copy != VM_OBJECT_NULL) {
3390 vm_object_unlock(new_copy);
3391 vm_object_deallocate(new_copy);
3392 }
3393
3394 return VM_OBJECT_NULL;
3395 } else {
3396 pmap_page_protect(p->phys_page,
3397 (VM_PROT_ALL & ~VM_PROT_WRITE));
3398 }
3399 }
3400 }
3401 old_copy->size = copy_size;
3402 }
3403 if (src_object_shared == TRUE)
3404 vm_object_reference_shared(old_copy);
3405 else
3406 vm_object_reference_locked(old_copy);
3407 vm_object_unlock(old_copy);
3408 vm_object_unlock(src_object);
3409
3410 if (new_copy != VM_OBJECT_NULL) {
3411 vm_object_unlock(new_copy);
3412 vm_object_deallocate(new_copy);
3413 }
3414 return(old_copy);
3415 }
3416
3417
3418
3419 /*
3420 * Adjust the size argument so that the newly-created
3421 * copy object will be large enough to back either the
3422 * old copy object or the new mapping.
3423 */
3424 if (old_copy->size > copy_size)
3425 copy_size = old_copy->size;
3426
3427 if (new_copy == VM_OBJECT_NULL) {
3428 vm_object_unlock(old_copy);
3429 vm_object_unlock(src_object);
3430 new_copy = vm_object_allocate(copy_size);
3431 vm_object_lock(src_object);
3432 vm_object_lock(new_copy);
3433
3434 src_object_shared = FALSE;
3435 goto Retry;
3436 }
3437 new_copy->size = copy_size;
3438
3439 /*
3440 * The copy-object is always made large enough to
3441 * completely shadow the original object, since
3442 * it may have several users who want to shadow
3443 * the original object at different points.
3444 */
3445
3446 assert((old_copy->shadow == src_object) &&
3447 (old_copy->shadow_offset == (vm_object_offset_t) 0));
3448
3449 } else if (new_copy == VM_OBJECT_NULL) {
3450 vm_object_unlock(src_object);
3451 new_copy = vm_object_allocate(copy_size);
3452 vm_object_lock(src_object);
3453 vm_object_lock(new_copy);
3454
3455 src_object_shared = FALSE;
3456 goto Retry;
3457 }
3458
3459 /*
3460 * We now have the src object locked, and the new copy object
3461 * allocated and locked (and potentially the old copy locked).
3462 * Before we go any further, make sure we can still perform
3463 * a delayed copy, as the situation may have changed.
3464 *
3465 * Specifically, we can't perform a delayed copy if any of the
3466 * pages in the range are wired (because we can't safely take
3467 * write permission away from wired pages). If the pages aren't
3468 * wired, then go ahead and protect them.
3469 */
3470 copy_delayed_protect_iterate++;
3471
3472 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
3473 if (!p->fictitious && p->offset < copy_size) {
3474 if (VM_PAGE_WIRED(p)) {
3475 if (old_copy)
3476 vm_object_unlock(old_copy);
3477 vm_object_unlock(src_object);
3478 vm_object_unlock(new_copy);
3479 vm_object_deallocate(new_copy);
3480 return VM_OBJECT_NULL;
3481 } else {
3482 pmap_page_protect(p->phys_page,
3483 (VM_PROT_ALL & ~VM_PROT_WRITE));
3484 }
3485 }
3486 }
3487 if (old_copy != VM_OBJECT_NULL) {
3488 /*
3489 * Make the old copy-object shadow the new one.
3490 * It will receive no more pages from the original
3491 * object.
3492 */
3493
3494 /* remove ref. from old_copy */
3495 vm_object_lock_assert_exclusive(src_object);
3496 src_object->ref_count--;
3497 assert(src_object->ref_count > 0);
3498 vm_object_lock_assert_exclusive(old_copy);
3499 old_copy->shadow = new_copy;
3500 vm_object_lock_assert_exclusive(new_copy);
3501 assert(new_copy->ref_count > 0);
3502 new_copy->ref_count++; /* for old_copy->shadow ref. */
3503
3504 #if TASK_SWAPPER
3505 if (old_copy->res_count) {
3506 VM_OBJ_RES_INCR(new_copy);
3507 VM_OBJ_RES_DECR(src_object);
3508 }
3509 #endif
3510
3511 vm_object_unlock(old_copy); /* done with old_copy */
3512 }
3513
3514 /*
3515 * Point the new copy at the existing object.
3516 */
3517 vm_object_lock_assert_exclusive(new_copy);
3518 new_copy->shadow = src_object;
3519 new_copy->shadow_offset = 0;
3520 new_copy->shadowed = TRUE; /* caller must set needs_copy */
3521
3522 vm_object_lock_assert_exclusive(src_object);
3523 vm_object_reference_locked(src_object);
3524 src_object->copy = new_copy;
3525 vm_object_unlock(src_object);
3526 vm_object_unlock(new_copy);
3527
3528 XPR(XPR_VM_OBJECT,
3529 "vm_object_copy_delayed: used copy object %X for source %X\n",
3530 new_copy, src_object, 0, 0, 0);
3531
3532 return new_copy;
3533 }
3534
3535 /*
3536 * Routine: vm_object_copy_strategically
3537 *
3538 * Purpose:
3539 * Perform a copy according to the source object's
3540 * declared strategy. This operation may block,
3541 * and may be interrupted.
3542 */
3543 __private_extern__ kern_return_t
3544 vm_object_copy_strategically(
3545 register vm_object_t src_object,
3546 vm_object_offset_t src_offset,
3547 vm_object_size_t size,
3548 vm_object_t *dst_object, /* OUT */
3549 vm_object_offset_t *dst_offset, /* OUT */
3550 boolean_t *dst_needs_copy) /* OUT */
3551 {
3552 boolean_t result;
3553 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
3554 boolean_t object_lock_shared = FALSE;
3555 memory_object_copy_strategy_t copy_strategy;
3556
3557 assert(src_object != VM_OBJECT_NULL);
3558
3559 copy_strategy = src_object->copy_strategy;
3560
3561 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
3562 vm_object_lock_shared(src_object);
3563 object_lock_shared = TRUE;
3564 } else
3565 vm_object_lock(src_object);
3566
3567 /*
3568 * The copy strategy is only valid if the memory manager
3569 * is "ready". Internal objects are always ready.
3570 */
3571
3572 while (!src_object->internal && !src_object->pager_ready) {
3573 wait_result_t wait_result;
3574
3575 if (object_lock_shared == TRUE) {
3576 vm_object_unlock(src_object);
3577 vm_object_lock(src_object);
3578 object_lock_shared = FALSE;
3579 continue;
3580 }
3581 wait_result = vm_object_sleep( src_object,
3582 VM_OBJECT_EVENT_PAGER_READY,
3583 interruptible);
3584 if (wait_result != THREAD_AWAKENED) {
3585 vm_object_unlock(src_object);
3586 *dst_object = VM_OBJECT_NULL;
3587 *dst_offset = 0;
3588 *dst_needs_copy = FALSE;
3589 return(MACH_SEND_INTERRUPTED);
3590 }
3591 }
3592
3593 /*
3594 * Use the appropriate copy strategy.
3595 */
3596
3597 switch (copy_strategy) {
3598 case MEMORY_OBJECT_COPY_DELAY:
3599 *dst_object = vm_object_copy_delayed(src_object,
3600 src_offset, size, object_lock_shared);
3601 if (*dst_object != VM_OBJECT_NULL) {
3602 *dst_offset = src_offset;
3603 *dst_needs_copy = TRUE;
3604 result = KERN_SUCCESS;
3605 break;
3606 }
3607 vm_object_lock(src_object);
3608 /* fall thru when delayed copy not allowed */
3609
3610 case MEMORY_OBJECT_COPY_NONE:
3611 result = vm_object_copy_slowly(src_object, src_offset, size,
3612 interruptible, dst_object);
3613 if (result == KERN_SUCCESS) {
3614 *dst_offset = 0;
3615 *dst_needs_copy = FALSE;
3616 }
3617 break;
3618
3619 case MEMORY_OBJECT_COPY_CALL:
3620 result = vm_object_copy_call(src_object, src_offset, size,
3621 dst_object);
3622 if (result == KERN_SUCCESS) {
3623 *dst_offset = src_offset;
3624 *dst_needs_copy = TRUE;
3625 }
3626 break;
3627
3628 case MEMORY_OBJECT_COPY_SYMMETRIC:
3629 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0);
3630 vm_object_unlock(src_object);
3631 result = KERN_MEMORY_RESTART_COPY;
3632 break;
3633
3634 default:
3635 panic("copy_strategically: bad strategy");
3636 result = KERN_INVALID_ARGUMENT;
3637 }
3638 return(result);
3639 }
3640
3641 /*
3642 * vm_object_shadow:
3643 *
3644 * Create a new object which is backed by the
3645 * specified existing object range. The source
3646 * object reference is deallocated.
3647 *
3648 * The new object and offset into that object
3649 * are returned in the source parameters.
3650 */
3651 boolean_t vm_object_shadow_check = FALSE;
3652
3653 __private_extern__ boolean_t
3654 vm_object_shadow(
3655 vm_object_t *object, /* IN/OUT */
3656 vm_object_offset_t *offset, /* IN/OUT */
3657 vm_object_size_t length)
3658 {
3659 register vm_object_t source;
3660 register vm_object_t result;
3661
3662 source = *object;
3663 #if 0
3664 /*
3665 * XXX FBDP
3666 * This assertion is valid but it gets triggered by Rosetta for example
3667 * due to a combination of vm_remap() that changes a VM object's
3668 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
3669 * that then sets "needs_copy" on its map entry. This creates a
3670 * mapping situation that VM should never see and doesn't know how to
3671 * handle.
3672 * It's not clear if this can create any real problem but we should
3673 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
3674 * do more than just set "needs_copy" to handle the copy-on-write...
3675 * In the meantime, let's disable the assertion.
3676 */
3677 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3678 #endif
3679
3680 /*
3681 * Determine if we really need a shadow.
3682 */
3683
3684 if (vm_object_shadow_check && source->ref_count == 1 &&
3685 (source->shadow == VM_OBJECT_NULL ||
3686 source->shadow->copy == VM_OBJECT_NULL))
3687 {
3688 source->shadowed = FALSE;
3689 return FALSE;
3690 }
3691
3692 /*
3693 * Allocate a new object with the given length
3694 */
3695
3696 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
3697 panic("vm_object_shadow: no object for shadowing");
3698
3699 /*
3700 * The new object shadows the source object, adding
3701 * a reference to it. Our caller changes his reference
3702 * to point to the new object, removing a reference to
3703 * the source object. Net result: no change of reference
3704 * count.
3705 */
3706 result->shadow = source;
3707
3708 /*
3709 * Store the offset into the source object,
3710 * and fix up the offset into the new object.
3711 */
3712
3713 result->shadow_offset = *offset;
3714
3715 /*
3716 * Return the new things
3717 */
3718
3719 *offset = 0;
3720 *object = result;
3721 return TRUE;
3722 }
3723
3724 /*
3725 * The relationship between vm_object structures and
3726 * the memory_object requires careful synchronization.
3727 *
3728 * All associations are created by memory_object_create_named
3729 * for external pagers and vm_object_pager_create for internal
3730 * objects as follows:
3731 *
3732 * pager: the memory_object itself, supplied by
3733 * the user requesting a mapping (or the kernel,
3734 * when initializing internal objects); the
3735 * kernel simulates holding send rights by keeping
3736 * a port reference;
3737 *
3738 * pager_request:
3739 * the memory object control port,
3740 * created by the kernel; the kernel holds
3741 * receive (and ownership) rights to this
3742 * port, but no other references.
3743 *
3744 * When initialization is complete, the "initialized" field
3745 * is asserted. Other mappings using a particular memory object,
3746 * and any references to the vm_object gained through the
3747 * port association must wait for this initialization to occur.
3748 *
3749 * In order to allow the memory manager to set attributes before
3750 * requests (notably virtual copy operations, but also data or
3751 * unlock requests) are made, a "ready" attribute is made available.
3752 * Only the memory manager may affect the value of this attribute.
3753 * Its value does not affect critical kernel functions, such as
3754 * internal object initialization or destruction. [Furthermore,
3755 * memory objects created by the kernel are assumed to be ready
3756 * immediately; the default memory manager need not explicitly
3757 * set the "ready" attribute.]
3758 *
3759 * [Both the "initialized" and "ready" attribute wait conditions
3760 * use the "pager" field as the wait event.]
3761 *
3762 * The port associations can be broken down by any of the
3763 * following routines:
3764 * vm_object_terminate:
3765 * No references to the vm_object remain, and
3766 * the object cannot (or will not) be cached.
3767 * This is the normal case, and is done even
3768 * though one of the other cases has already been
3769 * done.
3770 * memory_object_destroy:
3771 * The memory manager has requested that the
3772 * kernel relinquish references to the memory
3773 * object. [The memory manager may not want to
3774 * destroy the memory object, but may wish to
3775 * refuse or tear down existing memory mappings.]
3776 *
3777 * Each routine that breaks an association must break all of
3778 * them at once. At some later time, that routine must clear
3779 * the pager field and release the memory object references.
3780 * [Furthermore, each routine must cope with the simultaneous
3781 * or previous operations of the others.]
3782 *
3783 * In addition to the lock on the object, the vm_object_hash_lock
3784 * governs the associations. References gained through the
3785 * association require use of the hash lock.
3786 *
3787 * Because the pager field may be cleared spontaneously, it
3788 * cannot be used to determine whether a memory object has
3789 * ever been associated with a particular vm_object. [This
3790 * knowledge is important to the shadow object mechanism.]
3791 * For this reason, an additional "created" attribute is
3792 * provided.
3793 *
3794 * During various paging operations, the pager reference found in the
3795 * vm_object must be valid. To prevent this from being released,
3796 * (other than being removed, i.e., made null), routines may use
3797 * the vm_object_paging_begin/end routines [actually, macros].
3798 * The implementation uses the "paging_in_progress" and "wanted" fields.
3799 * [Operations that alter the validity of the pager values include the
3800 * termination routines and vm_object_collapse.]
3801 */
3802
3803
3804 /*
3805 * Routine: vm_object_enter
3806 * Purpose:
3807 * Find a VM object corresponding to the given
3808 * pager; if no such object exists, create one,
3809 * and initialize the pager.
3810 */
3811 vm_object_t
3812 vm_object_enter(
3813 memory_object_t pager,
3814 vm_object_size_t size,
3815 boolean_t internal,
3816 boolean_t init,
3817 boolean_t named)
3818 {
3819 register vm_object_t object;
3820 vm_object_t new_object;
3821 boolean_t must_init;
3822 vm_object_hash_entry_t entry, new_entry;
3823 uint32_t try_failed_count = 0;
3824 lck_mtx_t *lck;
3825
3826 if (pager == MEMORY_OBJECT_NULL)
3827 return(vm_object_allocate(size));
3828
3829 new_object = VM_OBJECT_NULL;
3830 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3831 must_init = init;
3832
3833 /*
3834 * Look for an object associated with this port.
3835 */
3836 Retry:
3837 lck = vm_object_hash_lock_spin(pager);
3838 do {
3839 entry = vm_object_hash_lookup(pager, FALSE);
3840
3841 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
3842 if (new_object == VM_OBJECT_NULL) {
3843 /*
3844 * We must unlock to create a new object;
3845 * if we do so, we must try the lookup again.
3846 */
3847 vm_object_hash_unlock(lck);
3848 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
3849 new_entry = vm_object_hash_entry_alloc(pager);
3850 new_object = vm_object_allocate(size);
3851 lck = vm_object_hash_lock_spin(pager);
3852 } else {
3853 /*
3854 * Lookup failed twice, and we have something
3855 * to insert; set the object.
3856 */
3857 vm_object_hash_insert(new_entry, new_object);
3858 entry = new_entry;
3859 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
3860 new_object = VM_OBJECT_NULL;
3861 must_init = TRUE;
3862 }
3863 } else if (entry->object == VM_OBJECT_NULL) {
3864 /*
3865 * If a previous object is being terminated,
3866 * we must wait for the termination message
3867 * to be queued (and lookup the entry again).
3868 */
3869 entry->waiting = TRUE;
3870 entry = VM_OBJECT_HASH_ENTRY_NULL;
3871 assert_wait((event_t) pager, THREAD_UNINT);
3872 vm_object_hash_unlock(lck);
3873
3874 thread_block(THREAD_CONTINUE_NULL);
3875 lck = vm_object_hash_lock_spin(pager);
3876 }
3877 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
3878
3879 object = entry->object;
3880 assert(object != VM_OBJECT_NULL);
3881
3882 if (!must_init) {
3883 if ( !vm_object_lock_try(object)) {
3884
3885 vm_object_hash_unlock(lck);
3886
3887 try_failed_count++;
3888 mutex_pause(try_failed_count); /* wait a bit */
3889 goto Retry;
3890 }
3891 assert(!internal || object->internal);
3892 #if VM_OBJECT_CACHE
3893 if (object->ref_count == 0) {
3894 if ( !vm_object_cache_lock_try()) {
3895
3896 vm_object_hash_unlock(lck);
3897 vm_object_unlock(object);
3898
3899 try_failed_count++;
3900 mutex_pause(try_failed_count); /* wait a bit */
3901 goto Retry;
3902 }
3903 XPR(XPR_VM_OBJECT_CACHE,
3904 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
3905 object,
3906 vm_object_cached_list.next,
3907 vm_object_cached_list.prev, 0,0);
3908 queue_remove(&vm_object_cached_list, object,
3909 vm_object_t, cached_list);
3910 vm_object_cached_count--;
3911
3912 vm_object_cache_unlock();
3913 }
3914 #endif
3915 if (named) {
3916 assert(!object->named);
3917 object->named = TRUE;
3918 }
3919 vm_object_lock_assert_exclusive(object);
3920 object->ref_count++;
3921 vm_object_res_reference(object);
3922
3923 vm_object_hash_unlock(lck);
3924 vm_object_unlock(object);
3925
3926 VM_STAT_INCR(hits);
3927 } else
3928 vm_object_hash_unlock(lck);
3929
3930 assert(object->ref_count > 0);
3931
3932 VM_STAT_INCR(lookups);
3933
3934 XPR(XPR_VM_OBJECT,
3935 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
3936 pager, object, must_init, 0, 0);
3937
3938 /*
3939 * If we raced to create a vm_object but lost, let's
3940 * throw away ours.
3941 */
3942
3943 if (new_object != VM_OBJECT_NULL)
3944 vm_object_deallocate(new_object);
3945
3946 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
3947 vm_object_hash_entry_free(new_entry);
3948
3949 if (must_init) {
3950 memory_object_control_t control;
3951
3952 /*
3953 * Allocate request port.
3954 */
3955
3956 control = memory_object_control_allocate(object);
3957 assert (control != MEMORY_OBJECT_CONTROL_NULL);
3958
3959 vm_object_lock(object);
3960 assert(object != kernel_object);
3961
3962 /*
3963 * Copy the reference we were given.
3964 */
3965
3966 memory_object_reference(pager);
3967 object->pager_created = TRUE;
3968 object->pager = pager;
3969 object->internal = internal;
3970 object->pager_trusted = internal;
3971 if (!internal) {
3972 /* copy strategy invalid until set by memory manager */
3973 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
3974 }
3975 object->pager_control = control;
3976 object->pager_ready = FALSE;
3977
3978 vm_object_unlock(object);
3979
3980 /*
3981 * Let the pager know we're using it.
3982 */
3983
3984 (void) memory_object_init(pager,
3985 object->pager_control,
3986 PAGE_SIZE);
3987
3988 vm_object_lock(object);
3989 if (named)
3990 object->named = TRUE;
3991 if (internal) {
3992 object->pager_ready = TRUE;
3993 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
3994 }
3995
3996 object->pager_initialized = TRUE;
3997 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
3998 } else {
3999 vm_object_lock(object);
4000 }
4001
4002 /*
4003 * [At this point, the object must be locked]
4004 */
4005
4006 /*
4007 * Wait for the work above to be done by the first
4008 * thread to map this object.
4009 */
4010
4011 while (!object->pager_initialized) {
4012 vm_object_sleep(object,
4013 VM_OBJECT_EVENT_INITIALIZED,
4014 THREAD_UNINT);
4015 }
4016 vm_object_unlock(object);
4017
4018 XPR(XPR_VM_OBJECT,
4019 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
4020 object, object->pager, internal, 0,0);
4021 return(object);
4022 }
4023
4024 /*
4025 * Routine: vm_object_pager_create
4026 * Purpose:
4027 * Create a memory object for an internal object.
4028 * In/out conditions:
4029 * The object is locked on entry and exit;
4030 * it may be unlocked within this call.
4031 * Limitations:
4032 * Only one thread may be performing a
4033 * vm_object_pager_create on an object at
4034 * a time. Presumably, only the pageout
4035 * daemon will be using this routine.
4036 */
4037
4038 void
4039 vm_object_pager_create(
4040 register vm_object_t object)
4041 {
4042 memory_object_t pager;
4043 vm_object_hash_entry_t entry;
4044 lck_mtx_t *lck;
4045 #if MACH_PAGEMAP
4046 vm_object_size_t size;
4047 vm_external_map_t map;
4048 #endif /* MACH_PAGEMAP */
4049
4050 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
4051 object, 0,0,0,0);
4052
4053 assert(object != kernel_object);
4054
4055 if (memory_manager_default_check() != KERN_SUCCESS)
4056 return;
4057
4058 /*
4059 * Prevent collapse or termination by holding a paging reference
4060 */
4061
4062 vm_object_paging_begin(object);
4063 if (object->pager_created) {
4064 /*
4065 * Someone else got to it first...
4066 * wait for them to finish initializing the ports
4067 */
4068 while (!object->pager_initialized) {
4069 vm_object_sleep(object,
4070 VM_OBJECT_EVENT_INITIALIZED,
4071 THREAD_UNINT);
4072 }
4073 vm_object_paging_end(object);
4074 return;
4075 }
4076
4077 /*
4078 * Indicate that a memory object has been assigned
4079 * before dropping the lock, to prevent a race.
4080 */
4081
4082 object->pager_created = TRUE;
4083 object->paging_offset = 0;
4084
4085 #if MACH_PAGEMAP
4086 size = object->size;
4087 #endif /* MACH_PAGEMAP */
4088 vm_object_unlock(object);
4089
4090 #if MACH_PAGEMAP
4091 map = vm_external_create(size);
4092 vm_object_lock(object);
4093 assert(object->size == size);
4094 object->existence_map = map;
4095 vm_object_unlock(object);
4096 #endif /* MACH_PAGEMAP */
4097
4098 if ((uint32_t) object->size != object->size) {
4099 panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n",
4100 (uint64_t) object->size);
4101 }
4102
4103 /*
4104 * Create the [internal] pager, and associate it with this object.
4105 *
4106 * We make the association here so that vm_object_enter()
4107 * can look up the object to complete initializing it. No
4108 * user will ever map this object.
4109 */
4110 {
4111 memory_object_default_t dmm;
4112
4113 /* acquire a reference for the default memory manager */
4114 dmm = memory_manager_default_reference();
4115
4116 assert(object->temporary);
4117
4118 /* create our new memory object */
4119 assert((vm_size_t) object->size == object->size);
4120 (void) memory_object_create(dmm, (vm_size_t) object->size,
4121 &pager);
4122
4123 memory_object_default_deallocate(dmm);
4124 }
4125
4126 entry = vm_object_hash_entry_alloc(pager);
4127
4128 lck = vm_object_hash_lock_spin(pager);
4129 vm_object_hash_insert(entry, object);
4130 vm_object_hash_unlock(lck);
4131
4132 /*
4133 * A reference was returned by
4134 * memory_object_create(), and it is
4135 * copied by vm_object_enter().
4136 */
4137
4138 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
4139 panic("vm_object_pager_create: mismatch");
4140
4141 /*
4142 * Drop the reference we were passed.
4143 */
4144 memory_object_deallocate(pager);
4145
4146 vm_object_lock(object);
4147
4148 /*
4149 * Release the paging reference
4150 */
4151 vm_object_paging_end(object);
4152 }
4153
4154 /*
4155 * Routine: vm_object_remove
4156 * Purpose:
4157 * Eliminate the pager/object association
4158 * for this pager.
4159 * Conditions:
4160 * The object cache must be locked.
4161 */
4162 __private_extern__ void
4163 vm_object_remove(
4164 vm_object_t object)
4165 {
4166 memory_object_t pager;
4167
4168 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
4169 vm_object_hash_entry_t entry;
4170
4171 entry = vm_object_hash_lookup(pager, FALSE);
4172 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
4173 entry->object = VM_OBJECT_NULL;
4174 }
4175
4176 }
4177
4178 /*
4179 * Global variables for vm_object_collapse():
4180 *
4181 * Counts for normal collapses and bypasses.
4182 * Debugging variables, to watch or disable collapse.
4183 */
4184 static long object_collapses = 0;
4185 static long object_bypasses = 0;
4186
4187 static boolean_t vm_object_collapse_allowed = TRUE;
4188 static boolean_t vm_object_bypass_allowed = TRUE;
4189
4190 #if MACH_PAGEMAP
4191 static int vm_external_discarded;
4192 static int vm_external_collapsed;
4193 #endif
4194
4195 unsigned long vm_object_collapse_encrypted = 0;
4196
4197 /*
4198 * Routine: vm_object_do_collapse
4199 * Purpose:
4200 * Collapse an object with the object backing it.
4201 * Pages in the backing object are moved into the
4202 * parent, and the backing object is deallocated.
4203 * Conditions:
4204 * Both objects and the cache are locked; the page
4205 * queues are unlocked.
4206 *
4207 */
4208 static void
4209 vm_object_do_collapse(
4210 vm_object_t object,
4211 vm_object_t backing_object)
4212 {
4213 vm_page_t p, pp;
4214 vm_object_offset_t new_offset, backing_offset;
4215 vm_object_size_t size;
4216
4217 vm_object_lock_assert_exclusive(object);
4218 vm_object_lock_assert_exclusive(backing_object);
4219
4220 backing_offset = object->shadow_offset;
4221 size = object->size;
4222
4223 /*
4224 * Move all in-memory pages from backing_object
4225 * to the parent. Pages that have been paged out
4226 * will be overwritten by any of the parent's
4227 * pages that shadow them.
4228 */
4229
4230 while (!queue_empty(&backing_object->memq)) {
4231
4232 p = (vm_page_t) queue_first(&backing_object->memq);
4233
4234 new_offset = (p->offset - backing_offset);
4235
4236 assert(!p->busy || p->absent);
4237
4238 /*
4239 * If the parent has a page here, or if
4240 * this page falls outside the parent,
4241 * dispose of it.
4242 *
4243 * Otherwise, move it as planned.
4244 */
4245
4246 if (p->offset < backing_offset || new_offset >= size) {
4247 VM_PAGE_FREE(p);
4248 } else {
4249 /*
4250 * ENCRYPTED SWAP:
4251 * The encryption key includes the "pager" and the
4252 * "paging_offset". These will not change during the
4253 * object collapse, so we can just move an encrypted
4254 * page from one object to the other in this case.
4255 * We can't decrypt the page here, since we can't drop
4256 * the object lock.
4257 */
4258 if (p->encrypted) {
4259 vm_object_collapse_encrypted++;
4260 }
4261 pp = vm_page_lookup(object, new_offset);
4262 if (pp == VM_PAGE_NULL) {
4263
4264 /*
4265 * Parent now has no page.
4266 * Move the backing object's page up.
4267 */
4268
4269 vm_page_rename(p, object, new_offset, TRUE);
4270 #if MACH_PAGEMAP
4271 } else if (pp->absent) {
4272
4273 /*
4274 * Parent has an absent page...
4275 * it's not being paged in, so
4276 * it must really be missing from
4277 * the parent.
4278 *
4279 * Throw out the absent page...
4280 * any faults looking for that
4281 * page will restart with the new
4282 * one.
4283 */
4284
4285 VM_PAGE_FREE(pp);
4286 vm_page_rename(p, object, new_offset, TRUE);
4287 #endif /* MACH_PAGEMAP */
4288 } else {
4289 assert(! pp->absent);
4290
4291 /*
4292 * Parent object has a real page.
4293 * Throw away the backing object's
4294 * page.
4295 */
4296 VM_PAGE_FREE(p);
4297 }
4298 }
4299 }
4300
4301 #if !MACH_PAGEMAP
4302 assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL))
4303 || (!backing_object->pager_created
4304 && (backing_object->pager == MEMORY_OBJECT_NULL)));
4305 #else
4306 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
4307 #endif /* !MACH_PAGEMAP */
4308
4309 if (backing_object->pager != MEMORY_OBJECT_NULL) {
4310 vm_object_hash_entry_t entry;
4311
4312 /*
4313 * Move the pager from backing_object to object.
4314 *
4315 * XXX We're only using part of the paging space
4316 * for keeps now... we ought to discard the
4317 * unused portion.
4318 */
4319
4320 assert(!object->paging_in_progress);
4321 assert(!object->activity_in_progress);
4322 object->pager = backing_object->pager;
4323
4324 if (backing_object->hashed) {
4325 lck_mtx_t *lck;
4326
4327 lck = vm_object_hash_lock_spin(backing_object->pager);
4328 entry = vm_object_hash_lookup(object->pager, FALSE);
4329 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
4330 entry->object = object;
4331 vm_object_hash_unlock(lck);
4332
4333 object->hashed = TRUE;
4334 }
4335 object->pager_created = backing_object->pager_created;
4336 object->pager_control = backing_object->pager_control;
4337 object->pager_ready = backing_object->pager_ready;
4338 object->pager_initialized = backing_object->pager_initialized;
4339 object->paging_offset =
4340 backing_object->paging_offset + backing_offset;
4341 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
4342 memory_object_control_collapse(object->pager_control,
4343 object);
4344 }
4345 }
4346
4347 #if MACH_PAGEMAP
4348 /*
4349 * If the shadow offset is 0, the use the existence map from
4350 * the backing object if there is one. If the shadow offset is
4351 * not zero, toss it.
4352 *
4353 * XXX - If the shadow offset is not 0 then a bit copy is needed
4354 * if the map is to be salvaged. For now, we just just toss the
4355 * old map, giving the collapsed object no map. This means that
4356 * the pager is invoked for zero fill pages. If analysis shows
4357 * that this happens frequently and is a performance hit, then
4358 * this code should be fixed to salvage the map.
4359 */
4360 assert(object->existence_map == VM_EXTERNAL_NULL);
4361 if (backing_offset || (size != backing_object->size)) {
4362 vm_external_discarded++;
4363 vm_external_destroy(backing_object->existence_map,
4364 backing_object->size);
4365 }
4366 else {
4367 vm_external_collapsed++;
4368 object->existence_map = backing_object->existence_map;
4369 }
4370 backing_object->existence_map = VM_EXTERNAL_NULL;
4371 #endif /* MACH_PAGEMAP */
4372
4373 /*
4374 * Object now shadows whatever backing_object did.
4375 * Note that the reference to backing_object->shadow
4376 * moves from within backing_object to within object.
4377 */
4378
4379 assert(!object->phys_contiguous);
4380 assert(!backing_object->phys_contiguous);
4381 object->shadow = backing_object->shadow;
4382 if (object->shadow) {
4383 object->shadow_offset += backing_object->shadow_offset;
4384 } else {
4385 /* no shadow, therefore no shadow offset... */
4386 object->shadow_offset = 0;
4387 }
4388 assert((object->shadow == VM_OBJECT_NULL) ||
4389 (object->shadow->copy != backing_object));
4390
4391 /*
4392 * Discard backing_object.
4393 *
4394 * Since the backing object has no pages, no
4395 * pager left, and no object references within it,
4396 * all that is necessary is to dispose of it.
4397 */
4398
4399 assert((backing_object->ref_count == 1) &&
4400 (backing_object->resident_page_count == 0) &&
4401 (backing_object->paging_in_progress == 0) &&
4402 (backing_object->activity_in_progress == 0));
4403
4404 backing_object->alive = FALSE;
4405 vm_object_unlock(backing_object);
4406
4407 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
4408 backing_object, 0,0,0,0);
4409
4410 vm_object_lock_destroy(backing_object);
4411
4412 zfree(vm_object_zone, backing_object);
4413
4414 object_collapses++;
4415 }
4416
4417 static void
4418 vm_object_do_bypass(
4419 vm_object_t object,
4420 vm_object_t backing_object)
4421 {
4422 /*
4423 * Make the parent shadow the next object
4424 * in the chain.
4425 */
4426
4427 vm_object_lock_assert_exclusive(object);
4428 vm_object_lock_assert_exclusive(backing_object);
4429
4430 #if TASK_SWAPPER
4431 /*
4432 * Do object reference in-line to
4433 * conditionally increment shadow's
4434 * residence count. If object is not
4435 * resident, leave residence count
4436 * on shadow alone.
4437 */
4438 if (backing_object->shadow != VM_OBJECT_NULL) {
4439 vm_object_lock(backing_object->shadow);
4440 vm_object_lock_assert_exclusive(backing_object->shadow);
4441 backing_object->shadow->ref_count++;
4442 if (object->res_count != 0)
4443 vm_object_res_reference(backing_object->shadow);
4444 vm_object_unlock(backing_object->shadow);
4445 }
4446 #else /* TASK_SWAPPER */
4447 vm_object_reference(backing_object->shadow);
4448 #endif /* TASK_SWAPPER */
4449
4450 assert(!object->phys_contiguous);
4451 assert(!backing_object->phys_contiguous);
4452 object->shadow = backing_object->shadow;
4453 if (object->shadow) {
4454 object->shadow_offset += backing_object->shadow_offset;
4455 } else {
4456 /* no shadow, therefore no shadow offset... */
4457 object->shadow_offset = 0;
4458 }
4459
4460 /*
4461 * Backing object might have had a copy pointer
4462 * to us. If it did, clear it.
4463 */
4464 if (backing_object->copy == object) {
4465 backing_object->copy = VM_OBJECT_NULL;
4466 }
4467
4468 /*
4469 * Drop the reference count on backing_object.
4470 #if TASK_SWAPPER
4471 * Since its ref_count was at least 2, it
4472 * will not vanish; so we don't need to call
4473 * vm_object_deallocate.
4474 * [with a caveat for "named" objects]
4475 *
4476 * The res_count on the backing object is
4477 * conditionally decremented. It's possible
4478 * (via vm_pageout_scan) to get here with
4479 * a "swapped" object, which has a 0 res_count,
4480 * in which case, the backing object res_count
4481 * is already down by one.
4482 #else
4483 * Don't call vm_object_deallocate unless
4484 * ref_count drops to zero.
4485 *
4486 * The ref_count can drop to zero here if the
4487 * backing object could be bypassed but not
4488 * collapsed, such as when the backing object
4489 * is temporary and cachable.
4490 #endif
4491 */
4492 if (backing_object->ref_count > 2 ||
4493 (!backing_object->named && backing_object->ref_count > 1)) {
4494 vm_object_lock_assert_exclusive(backing_object);
4495 backing_object->ref_count--;
4496 #if TASK_SWAPPER
4497 if (object->res_count != 0)
4498 vm_object_res_deallocate(backing_object);
4499 assert(backing_object->ref_count > 0);
4500 #endif /* TASK_SWAPPER */
4501 vm_object_unlock(backing_object);
4502 } else {
4503
4504 /*
4505 * Drop locks so that we can deallocate
4506 * the backing object.
4507 */
4508
4509 #if TASK_SWAPPER
4510 if (object->res_count == 0) {
4511 /* XXX get a reference for the deallocate below */
4512 vm_object_res_reference(backing_object);
4513 }
4514 #endif /* TASK_SWAPPER */
4515 vm_object_unlock(object);
4516 vm_object_unlock(backing_object);
4517 vm_object_deallocate(backing_object);
4518
4519 /*
4520 * Relock object. We don't have to reverify
4521 * its state since vm_object_collapse will
4522 * do that for us as it starts at the
4523 * top of its loop.
4524 */
4525
4526 vm_object_lock(object);
4527 }
4528
4529 object_bypasses++;
4530 }
4531
4532
4533 /*
4534 * vm_object_collapse:
4535 *
4536 * Perform an object collapse or an object bypass if appropriate.
4537 * The real work of collapsing and bypassing is performed in
4538 * the routines vm_object_do_collapse and vm_object_do_bypass.
4539 *
4540 * Requires that the object be locked and the page queues be unlocked.
4541 *
4542 */
4543 static unsigned long vm_object_collapse_calls = 0;
4544 static unsigned long vm_object_collapse_objects = 0;
4545 static unsigned long vm_object_collapse_do_collapse = 0;
4546 static unsigned long vm_object_collapse_do_bypass = 0;
4547 static unsigned long vm_object_collapse_delays = 0;
4548 __private_extern__ void
4549 vm_object_collapse(
4550 register vm_object_t object,
4551 register vm_object_offset_t hint_offset,
4552 boolean_t can_bypass)
4553 {
4554 register vm_object_t backing_object;
4555 register unsigned int rcount;
4556 register unsigned int size;
4557 vm_object_t original_object;
4558 int object_lock_type;
4559 int backing_object_lock_type;
4560
4561 vm_object_collapse_calls++;
4562
4563 if (! vm_object_collapse_allowed &&
4564 ! (can_bypass && vm_object_bypass_allowed)) {
4565 return;
4566 }
4567
4568 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
4569 object, 0,0,0,0);
4570
4571 if (object == VM_OBJECT_NULL)
4572 return;
4573
4574 original_object = object;
4575
4576 /*
4577 * The top object was locked "exclusive" by the caller.
4578 * In the first pass, to determine if we can collapse the shadow chain,
4579 * take a "shared" lock on the shadow objects. If we can collapse,
4580 * we'll have to go down the chain again with exclusive locks.
4581 */
4582 object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4583 backing_object_lock_type = OBJECT_LOCK_SHARED;
4584
4585 retry:
4586 object = original_object;
4587 vm_object_lock_assert_exclusive(object);
4588
4589 while (TRUE) {
4590 vm_object_collapse_objects++;
4591 /*
4592 * Verify that the conditions are right for either
4593 * collapse or bypass:
4594 */
4595
4596 /*
4597 * There is a backing object, and
4598 */
4599
4600 backing_object = object->shadow;
4601 if (backing_object == VM_OBJECT_NULL) {
4602 if (object != original_object) {
4603 vm_object_unlock(object);
4604 }
4605 return;
4606 }
4607 if (backing_object_lock_type == OBJECT_LOCK_SHARED) {
4608 vm_object_lock_shared(backing_object);
4609 } else {
4610 vm_object_lock(backing_object);
4611 }
4612
4613 /*
4614 * No pages in the object are currently
4615 * being paged out, and
4616 */
4617 if (object->paging_in_progress != 0 ||
4618 object->activity_in_progress != 0) {
4619 /* try and collapse the rest of the shadow chain */
4620 if (object != original_object) {
4621 vm_object_unlock(object);
4622 }
4623 object = backing_object;
4624 object_lock_type = backing_object_lock_type;
4625 continue;
4626 }
4627
4628 /*
4629 * ...
4630 * The backing object is not read_only,
4631 * and no pages in the backing object are
4632 * currently being paged out.
4633 * The backing object is internal.
4634 *
4635 */
4636
4637 if (!backing_object->internal ||
4638 backing_object->paging_in_progress != 0 ||
4639 backing_object->activity_in_progress != 0) {
4640 /* try and collapse the rest of the shadow chain */
4641 if (object != original_object) {
4642 vm_object_unlock(object);
4643 }
4644 object = backing_object;
4645 object_lock_type = backing_object_lock_type;
4646 continue;
4647 }
4648
4649 /*
4650 * The backing object can't be a copy-object:
4651 * the shadow_offset for the copy-object must stay
4652 * as 0. Furthermore (for the 'we have all the
4653 * pages' case), if we bypass backing_object and
4654 * just shadow the next object in the chain, old
4655 * pages from that object would then have to be copied
4656 * BOTH into the (former) backing_object and into the
4657 * parent object.
4658 */
4659 if (backing_object->shadow != VM_OBJECT_NULL &&
4660 backing_object->shadow->copy == backing_object) {
4661 /* try and collapse the rest of the shadow chain */
4662 if (object != original_object) {
4663 vm_object_unlock(object);
4664 }
4665 object = backing_object;
4666 object_lock_type = backing_object_lock_type;
4667 continue;
4668 }
4669
4670 /*
4671 * We can now try to either collapse the backing
4672 * object (if the parent is the only reference to
4673 * it) or (perhaps) remove the parent's reference
4674 * to it.
4675 *
4676 * If there is exactly one reference to the backing
4677 * object, we may be able to collapse it into the
4678 * parent.
4679 *
4680 * If MACH_PAGEMAP is defined:
4681 * The parent must not have a pager created for it,
4682 * since collapsing a backing_object dumps new pages
4683 * into the parent that its pager doesn't know about
4684 * (and the collapse code can't merge the existence
4685 * maps).
4686 * Otherwise:
4687 * As long as one of the objects is still not known
4688 * to the pager, we can collapse them.
4689 */
4690 if (backing_object->ref_count == 1 &&
4691 (!object->pager_created
4692 #if !MACH_PAGEMAP
4693 || !backing_object->pager_created
4694 #endif /*!MACH_PAGEMAP */
4695 ) && vm_object_collapse_allowed) {
4696
4697 /*
4698 * We need the exclusive lock on the VM objects.
4699 */
4700 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) {
4701 /*
4702 * We have an object and its shadow locked
4703 * "shared". We can't just upgrade the locks
4704 * to "exclusive", as some other thread might
4705 * also have these objects locked "shared" and
4706 * attempt to upgrade one or the other to
4707 * "exclusive". The upgrades would block
4708 * forever waiting for the other "shared" locks
4709 * to get released.
4710 * So we have to release the locks and go
4711 * down the shadow chain again (since it could
4712 * have changed) with "exclusive" locking.
4713 */
4714 vm_object_unlock(backing_object);
4715 if (object != original_object)
4716 vm_object_unlock(object);
4717 object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4718 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4719 goto retry;
4720 }
4721
4722 XPR(XPR_VM_OBJECT,
4723 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
4724 backing_object, object,
4725 backing_object->pager,
4726 backing_object->pager_control, 0);
4727
4728 /*
4729 * Collapse the object with its backing
4730 * object, and try again with the object's
4731 * new backing object.
4732 */
4733
4734 vm_object_do_collapse(object, backing_object);
4735 vm_object_collapse_do_collapse++;
4736 continue;
4737 }
4738
4739 /*
4740 * Collapsing the backing object was not possible
4741 * or permitted, so let's try bypassing it.
4742 */
4743
4744 if (! (can_bypass && vm_object_bypass_allowed)) {
4745 /* try and collapse the rest of the shadow chain */
4746 if (object != original_object) {
4747 vm_object_unlock(object);
4748 }
4749 object = backing_object;
4750 object_lock_type = backing_object_lock_type;
4751 continue;
4752 }
4753
4754
4755 /*
4756 * If the object doesn't have all its pages present,
4757 * we have to make sure no pages in the backing object
4758 * "show through" before bypassing it.
4759 */
4760 size = atop(object->size);
4761 rcount = object->resident_page_count;
4762 if (rcount != size) {
4763 vm_object_offset_t offset;
4764 vm_object_offset_t backing_offset;
4765 unsigned int backing_rcount;
4766 unsigned int lookups = 0;
4767
4768 /*
4769 * If the backing object has a pager but no pagemap,
4770 * then we cannot bypass it, because we don't know
4771 * what pages it has.
4772 */
4773 if (backing_object->pager_created
4774 #if MACH_PAGEMAP
4775 && (backing_object->existence_map == VM_EXTERNAL_NULL)
4776 #endif /* MACH_PAGEMAP */
4777 ) {
4778 /* try and collapse the rest of the shadow chain */
4779 if (object != original_object) {
4780 vm_object_unlock(object);
4781 }
4782 object = backing_object;
4783 object_lock_type = backing_object_lock_type;
4784 continue;
4785 }
4786
4787 /*
4788 * If the object has a pager but no pagemap,
4789 * then we cannot bypass it, because we don't know
4790 * what pages it has.
4791 */
4792 if (object->pager_created
4793 #if MACH_PAGEMAP
4794 && (object->existence_map == VM_EXTERNAL_NULL)
4795 #endif /* MACH_PAGEMAP */
4796 ) {
4797 /* try and collapse the rest of the shadow chain */
4798 if (object != original_object) {
4799 vm_object_unlock(object);
4800 }
4801 object = backing_object;
4802 object_lock_type = backing_object_lock_type;
4803 continue;
4804 }
4805
4806 /*
4807 * If all of the pages in the backing object are
4808 * shadowed by the parent object, the parent
4809 * object no longer has to shadow the backing
4810 * object; it can shadow the next one in the
4811 * chain.
4812 *
4813 * If the backing object has existence info,
4814 * we must check examine its existence info
4815 * as well.
4816 *
4817 */
4818
4819 backing_offset = object->shadow_offset;
4820 backing_rcount = backing_object->resident_page_count;
4821
4822 #if MACH_PAGEMAP
4823 #define EXISTS_IN_OBJECT(obj, off, rc) \
4824 (vm_external_state_get((obj)->existence_map, \
4825 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
4826 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
4827 #else
4828 #define EXISTS_IN_OBJECT(obj, off, rc) \
4829 (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
4830 #endif /* MACH_PAGEMAP */
4831
4832 /*
4833 * Check the hint location first
4834 * (since it is often the quickest way out of here).
4835 */
4836 if (object->cow_hint != ~(vm_offset_t)0)
4837 hint_offset = (vm_object_offset_t)object->cow_hint;
4838 else
4839 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
4840 (hint_offset - 8 * PAGE_SIZE_64) : 0;
4841
4842 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
4843 backing_offset, backing_rcount) &&
4844 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
4845 /* dependency right at the hint */
4846 object->cow_hint = (vm_offset_t) hint_offset; /* atomic */
4847 /* try and collapse the rest of the shadow chain */
4848 if (object != original_object) {
4849 vm_object_unlock(object);
4850 }
4851 object = backing_object;
4852 object_lock_type = backing_object_lock_type;
4853 continue;
4854 }
4855
4856 /*
4857 * If the object's window onto the backing_object
4858 * is large compared to the number of resident
4859 * pages in the backing object, it makes sense to
4860 * walk the backing_object's resident pages first.
4861 *
4862 * NOTE: Pages may be in both the existence map and
4863 * resident. So, we can't permanently decrement
4864 * the rcount here because the second loop may
4865 * find the same pages in the backing object'
4866 * existence map that we found here and we would
4867 * double-decrement the rcount. We also may or
4868 * may not have found the
4869 */
4870 if (backing_rcount &&
4871 #if MACH_PAGEMAP
4872 size > ((backing_object->existence_map) ?
4873 backing_rcount : (backing_rcount >> 1))
4874 #else
4875 size > (backing_rcount >> 1)
4876 #endif /* MACH_PAGEMAP */
4877 ) {
4878 unsigned int rc = rcount;
4879 vm_page_t p;
4880
4881 backing_rcount = backing_object->resident_page_count;
4882 p = (vm_page_t)queue_first(&backing_object->memq);
4883 do {
4884 /* Until we get more than one lookup lock */
4885 if (lookups > 256) {
4886 vm_object_collapse_delays++;
4887 lookups = 0;
4888 mutex_pause(0);
4889 }
4890
4891 offset = (p->offset - backing_offset);
4892 if (offset < object->size &&
4893 offset != hint_offset &&
4894 !EXISTS_IN_OBJECT(object, offset, rc)) {
4895 /* found a dependency */
4896 object->cow_hint = (vm_offset_t) offset; /* atomic */
4897
4898 break;
4899 }
4900 p = (vm_page_t) queue_next(&p->listq);
4901
4902 } while (--backing_rcount);
4903 if (backing_rcount != 0 ) {
4904 /* try and collapse the rest of the shadow chain */
4905 if (object != original_object) {
4906 vm_object_unlock(object);
4907 }
4908 object = backing_object;
4909 object_lock_type = backing_object_lock_type;
4910 continue;
4911 }
4912 }
4913
4914 /*
4915 * Walk through the offsets looking for pages in the
4916 * backing object that show through to the object.
4917 */
4918 if (backing_rcount
4919 #if MACH_PAGEMAP
4920 || backing_object->existence_map
4921 #endif /* MACH_PAGEMAP */
4922 ) {
4923 offset = hint_offset;
4924
4925 while((offset =
4926 (offset + PAGE_SIZE_64 < object->size) ?
4927 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
4928
4929 /* Until we get more than one lookup lock */
4930 if (lookups > 256) {
4931 vm_object_collapse_delays++;
4932 lookups = 0;
4933 mutex_pause(0);
4934 }
4935
4936 if (EXISTS_IN_OBJECT(backing_object, offset +
4937 backing_offset, backing_rcount) &&
4938 !EXISTS_IN_OBJECT(object, offset, rcount)) {
4939 /* found a dependency */
4940 object->cow_hint = (vm_offset_t) offset; /* atomic */
4941 break;
4942 }
4943 }
4944 if (offset != hint_offset) {
4945 /* try and collapse the rest of the shadow chain */
4946 if (object != original_object) {
4947 vm_object_unlock(object);
4948 }
4949 object = backing_object;
4950 object_lock_type = backing_object_lock_type;
4951 continue;
4952 }
4953 }
4954 }
4955
4956 /*
4957 * We need "exclusive" locks on the 2 VM objects.
4958 */
4959 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) {
4960 vm_object_unlock(backing_object);
4961 if (object != original_object)
4962 vm_object_unlock(object);
4963 object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4964 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
4965 goto retry;
4966 }
4967
4968 /* reset the offset hint for any objects deeper in the chain */
4969 object->cow_hint = (vm_offset_t)0;
4970
4971 /*
4972 * All interesting pages in the backing object
4973 * already live in the parent or its pager.
4974 * Thus we can bypass the backing object.
4975 */
4976
4977 vm_object_do_bypass(object, backing_object);
4978 vm_object_collapse_do_bypass++;
4979
4980 /*
4981 * Try again with this object's new backing object.
4982 */
4983
4984 continue;
4985 }
4986
4987 if (object != original_object) {
4988 vm_object_unlock(object);
4989 }
4990 }
4991
4992 /*
4993 * Routine: vm_object_page_remove: [internal]
4994 * Purpose:
4995 * Removes all physical pages in the specified
4996 * object range from the object's list of pages.
4997 *
4998 * In/out conditions:
4999 * The object must be locked.
5000 * The object must not have paging_in_progress, usually
5001 * guaranteed by not having a pager.
5002 */
5003 unsigned int vm_object_page_remove_lookup = 0;
5004 unsigned int vm_object_page_remove_iterate = 0;
5005
5006 __private_extern__ void
5007 vm_object_page_remove(
5008 register vm_object_t object,
5009 register vm_object_offset_t start,
5010 register vm_object_offset_t end)
5011 {
5012 register vm_page_t p, next;
5013
5014 /*
5015 * One and two page removals are most popular.
5016 * The factor of 16 here is somewhat arbitrary.
5017 * It balances vm_object_lookup vs iteration.
5018 */
5019
5020 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
5021 vm_object_page_remove_lookup++;
5022
5023 for (; start < end; start += PAGE_SIZE_64) {
5024 p = vm_page_lookup(object, start);
5025 if (p != VM_PAGE_NULL) {
5026 assert(!p->cleaning && !p->pageout);
5027 if (!p->fictitious && p->pmapped)
5028 pmap_disconnect(p->phys_page);
5029 VM_PAGE_FREE(p);
5030 }
5031 }
5032 } else {
5033 vm_object_page_remove_iterate++;
5034
5035 p = (vm_page_t) queue_first(&object->memq);
5036 while (!queue_end(&object->memq, (queue_entry_t) p)) {
5037 next = (vm_page_t) queue_next(&p->listq);
5038 if ((start <= p->offset) && (p->offset < end)) {
5039 assert(!p->cleaning && !p->pageout);
5040 if (!p->fictitious && p->pmapped)
5041 pmap_disconnect(p->phys_page);
5042 VM_PAGE_FREE(p);
5043 }
5044 p = next;
5045 }
5046 }
5047 }
5048
5049
5050 /*
5051 * Routine: vm_object_coalesce
5052 * Function: Coalesces two objects backing up adjoining
5053 * regions of memory into a single object.
5054 *
5055 * returns TRUE if objects were combined.
5056 *
5057 * NOTE: Only works at the moment if the second object is NULL -
5058 * if it's not, which object do we lock first?
5059 *
5060 * Parameters:
5061 * prev_object First object to coalesce
5062 * prev_offset Offset into prev_object
5063 * next_object Second object into coalesce
5064 * next_offset Offset into next_object
5065 *
5066 * prev_size Size of reference to prev_object
5067 * next_size Size of reference to next_object
5068 *
5069 * Conditions:
5070 * The object(s) must *not* be locked. The map must be locked
5071 * to preserve the reference to the object(s).
5072 */
5073 static int vm_object_coalesce_count = 0;
5074
5075 __private_extern__ boolean_t
5076 vm_object_coalesce(
5077 register vm_object_t prev_object,
5078 vm_object_t next_object,
5079 vm_object_offset_t prev_offset,
5080 __unused vm_object_offset_t next_offset,
5081 vm_object_size_t prev_size,
5082 vm_object_size_t next_size)
5083 {
5084 vm_object_size_t newsize;
5085
5086 #ifdef lint
5087 next_offset++;
5088 #endif /* lint */
5089
5090 if (next_object != VM_OBJECT_NULL) {
5091 return(FALSE);
5092 }
5093
5094 if (prev_object == VM_OBJECT_NULL) {
5095 return(TRUE);
5096 }
5097
5098 XPR(XPR_VM_OBJECT,
5099 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
5100 prev_object, prev_offset, prev_size, next_size, 0);
5101
5102 vm_object_lock(prev_object);
5103
5104 /*
5105 * Try to collapse the object first
5106 */
5107 vm_object_collapse(prev_object, prev_offset, TRUE);
5108
5109 /*
5110 * Can't coalesce if pages not mapped to
5111 * prev_entry may be in use any way:
5112 * . more than one reference
5113 * . paged out
5114 * . shadows another object
5115 * . has a copy elsewhere
5116 * . is purgeable
5117 * . paging references (pages might be in page-list)
5118 */
5119
5120 if ((prev_object->ref_count > 1) ||
5121 prev_object->pager_created ||
5122 (prev_object->shadow != VM_OBJECT_NULL) ||
5123 (prev_object->copy != VM_OBJECT_NULL) ||
5124 (prev_object->true_share != FALSE) ||
5125 (prev_object->purgable != VM_PURGABLE_DENY) ||
5126 (prev_object->paging_in_progress != 0) ||
5127 (prev_object->activity_in_progress != 0)) {
5128 vm_object_unlock(prev_object);
5129 return(FALSE);
5130 }
5131
5132 vm_object_coalesce_count++;
5133
5134 /*
5135 * Remove any pages that may still be in the object from
5136 * a previous deallocation.
5137 */
5138 vm_object_page_remove(prev_object,
5139 prev_offset + prev_size,
5140 prev_offset + prev_size + next_size);
5141
5142 /*
5143 * Extend the object if necessary.
5144 */
5145 newsize = prev_offset + prev_size + next_size;
5146 if (newsize > prev_object->size) {
5147 #if MACH_PAGEMAP
5148 /*
5149 * We cannot extend an object that has existence info,
5150 * since the existence info might then fail to cover
5151 * the entire object.
5152 *
5153 * This assertion must be true because the object
5154 * has no pager, and we only create existence info
5155 * for objects with pagers.
5156 */
5157 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
5158 #endif /* MACH_PAGEMAP */
5159 prev_object->size = newsize;
5160 }
5161
5162 vm_object_unlock(prev_object);
5163 return(TRUE);
5164 }
5165
5166 /*
5167 * Attach a set of physical pages to an object, so that they can
5168 * be mapped by mapping the object. Typically used to map IO memory.
5169 *
5170 * The mapping function and its private data are used to obtain the
5171 * physical addresses for each page to be mapped.
5172 */
5173 void
5174 vm_object_page_map(
5175 vm_object_t object,
5176 vm_object_offset_t offset,
5177 vm_object_size_t size,
5178 vm_object_offset_t (*map_fn)(void *map_fn_data,
5179 vm_object_offset_t offset),
5180 void *map_fn_data) /* private to map_fn */
5181 {
5182 int64_t num_pages;
5183 int i;
5184 vm_page_t m;
5185 vm_page_t old_page;
5186 vm_object_offset_t addr;
5187
5188 num_pages = atop_64(size);
5189
5190 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
5191
5192 addr = (*map_fn)(map_fn_data, offset);
5193
5194 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
5195 vm_page_more_fictitious();
5196
5197 vm_object_lock(object);
5198 if ((old_page = vm_page_lookup(object, offset))
5199 != VM_PAGE_NULL)
5200 {
5201 VM_PAGE_FREE(old_page);
5202 }
5203
5204 assert((ppnum_t) addr == addr);
5205 vm_page_init(m, (ppnum_t) addr);
5206 /*
5207 * private normally requires lock_queues but since we
5208 * are initializing the page, its not necessary here
5209 */
5210 m->private = TRUE; /* don`t free page */
5211 m->wire_count = 1;
5212 vm_page_insert(m, object, offset);
5213
5214 PAGE_WAKEUP_DONE(m);
5215 vm_object_unlock(object);
5216 }
5217 }
5218
5219 #include <mach_kdb.h>
5220
5221 #if MACH_KDB
5222 #include <ddb/db_output.h>
5223 #include <vm/vm_print.h>
5224
5225 #define printf kdbprintf
5226
5227 extern boolean_t vm_object_cached(
5228 vm_object_t object);
5229
5230 extern void print_bitstring(
5231 char byte);
5232
5233 boolean_t vm_object_print_pages = FALSE;
5234
5235 void
5236 print_bitstring(
5237 char byte)
5238 {
5239 printf("%c%c%c%c%c%c%c%c",
5240 ((byte & (1 << 0)) ? '1' : '0'),
5241 ((byte & (1 << 1)) ? '1' : '0'),
5242 ((byte & (1 << 2)) ? '1' : '0'),
5243 ((byte & (1 << 3)) ? '1' : '0'),
5244 ((byte & (1 << 4)) ? '1' : '0'),
5245 ((byte & (1 << 5)) ? '1' : '0'),
5246 ((byte & (1 << 6)) ? '1' : '0'),
5247 ((byte & (1 << 7)) ? '1' : '0'));
5248 }
5249
5250 boolean_t
5251 vm_object_cached(
5252 __unused register vm_object_t object)
5253 {
5254 #if VM_OBJECT_CACHE
5255 register vm_object_t o;
5256
5257 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
5258 if (object == o) {
5259 return TRUE;
5260 }
5261 }
5262 #endif
5263 return FALSE;
5264 }
5265
5266 #if MACH_PAGEMAP
5267 /*
5268 * vm_external_print: [ debug ]
5269 */
5270 void
5271 vm_external_print(
5272 vm_external_map_t emap,
5273 vm_object_size_t size)
5274 {
5275 if (emap == VM_EXTERNAL_NULL) {
5276 printf("0 ");
5277 } else {
5278 vm_object_size_t existence_size = stob(size);
5279 printf("{ size=%lld, map=[", (uint64_t) existence_size);
5280 if (existence_size > 0) {
5281 print_bitstring(emap[0]);
5282 }
5283 if (existence_size > 1) {
5284 print_bitstring(emap[1]);
5285 }
5286 if (existence_size > 2) {
5287 printf("...");
5288 print_bitstring(emap[existence_size-1]);
5289 }
5290 printf("] }\n");
5291 }
5292 return;
5293 }
5294 #endif /* MACH_PAGEMAP */
5295
5296 int
5297 vm_follow_object(
5298 vm_object_t object)
5299 {
5300 int count = 0;
5301 int orig_db_indent = db_indent;
5302
5303 while (TRUE) {
5304 if (object == VM_OBJECT_NULL) {
5305 db_indent = orig_db_indent;
5306 return count;
5307 }
5308
5309 count += 1;
5310
5311 iprintf("object 0x%x", object);
5312 printf(", shadow=0x%x", object->shadow);
5313 printf(", copy=0x%x", object->copy);
5314 printf(", pager=0x%x", object->pager);
5315 printf(", ref=%d\n", object->ref_count);
5316
5317 db_indent += 2;
5318 object = object->shadow;
5319 }
5320
5321 }
5322
5323 /*
5324 * vm_object_print: [ debug ]
5325 */
5326 void
5327 vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
5328 __unused db_expr_t arg_count, __unused char *modif)
5329 {
5330 vm_object_t object;
5331 register vm_page_t p;
5332 const char *s;
5333
5334 register int count;
5335
5336 object = (vm_object_t) (long) db_addr;
5337 if (object == VM_OBJECT_NULL)
5338 return;
5339
5340 iprintf("object 0x%x\n", object);
5341
5342 db_indent += 2;
5343
5344 iprintf("size=0x%x", object->size);
5345 printf(", memq_hint=%p", object->memq_hint);
5346 printf(", ref_count=%d\n", object->ref_count);
5347 iprintf("");
5348 #if TASK_SWAPPER
5349 printf("res_count=%d, ", object->res_count);
5350 #endif /* TASK_SWAPPER */
5351 printf("resident_page_count=%d\n", object->resident_page_count);
5352
5353 iprintf("shadow=0x%x", object->shadow);
5354 if (object->shadow) {
5355 register int i = 0;
5356 vm_object_t shadow = object;
5357 while((shadow = shadow->shadow))
5358 i++;
5359 printf(" (depth %d)", i);
5360 }
5361 printf(", copy=0x%x", object->copy);
5362 printf(", shadow_offset=0x%x", object->shadow_offset);
5363 printf(", last_alloc=0x%x\n", object->last_alloc);
5364
5365 iprintf("pager=0x%x", object->pager);
5366 printf(", paging_offset=0x%x", object->paging_offset);
5367 printf(", pager_control=0x%x\n", object->pager_control);
5368
5369 iprintf("copy_strategy=%d[", object->copy_strategy);
5370 switch (object->copy_strategy) {
5371 case MEMORY_OBJECT_COPY_NONE:
5372 printf("copy_none");
5373 break;
5374
5375 case MEMORY_OBJECT_COPY_CALL:
5376 printf("copy_call");
5377 break;
5378
5379 case MEMORY_OBJECT_COPY_DELAY:
5380 printf("copy_delay");
5381 break;
5382
5383 case MEMORY_OBJECT_COPY_SYMMETRIC:
5384 printf("copy_symmetric");
5385 break;
5386
5387 case MEMORY_OBJECT_COPY_INVALID:
5388 printf("copy_invalid");
5389 break;
5390
5391 default:
5392 printf("?");
5393 }
5394 printf("]");
5395
5396 iprintf("all_wanted=0x%x<", object->all_wanted);
5397 s = "";
5398 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
5399 printf("%sinit", s);
5400 s = ",";
5401 }
5402 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
5403 printf("%sready", s);
5404 s = ",";
5405 }
5406 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
5407 printf("%spaging", s);
5408 s = ",";
5409 }
5410 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
5411 printf("%slock", s);
5412 s = ",";
5413 }
5414 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
5415 printf("%suncaching", s);
5416 s = ",";
5417 }
5418 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
5419 printf("%scopy_call", s);
5420 s = ",";
5421 }
5422 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
5423 printf("%scaching", s);
5424 s = ",";
5425 }
5426 printf(">");
5427 printf(", paging_in_progress=%d\n", object->paging_in_progress);
5428 printf(", activity_in_progress=%d\n", object->activity_in_progress);
5429
5430 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
5431 (object->pager_created ? "" : "!"),
5432 (object->pager_initialized ? "" : "!"),
5433 (object->pager_ready ? "" : "!"),
5434 (object->can_persist ? "" : "!"),
5435 (object->pager_trusted ? "" : "!"),
5436 (object->pageout ? "" : "!"),
5437 (object->internal ? "internal" : "external"),
5438 (object->temporary ? "temporary" : "permanent"));
5439 iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
5440 (object->alive ? "" : "!"),
5441 ((object->purgable != VM_PURGABLE_DENY) ? "" : "!"),
5442 ((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"),
5443 ((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"),
5444 (object->shadowed ? "" : "!"),
5445 (vm_object_cached(object) ? "" : "!"),
5446 (object->private ? "" : "!"));
5447 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
5448 (object->advisory_pageout ? "" : "!"),
5449 (object->silent_overwrite ? "" : "!"));
5450
5451 #if MACH_PAGEMAP
5452 iprintf("existence_map=");
5453 vm_external_print(object->existence_map, object->size);
5454 #endif /* MACH_PAGEMAP */
5455 #if MACH_ASSERT
5456 iprintf("paging_object=0x%x\n", object->paging_object);
5457 #endif /* MACH_ASSERT */
5458
5459 if (vm_object_print_pages) {
5460 count = 0;
5461 p = (vm_page_t) queue_first(&object->memq);
5462 while (!queue_end(&object->memq, (queue_entry_t) p)) {
5463 if (count == 0) {
5464 iprintf("memory:=");
5465 } else if (count == 2) {
5466 printf("\n");
5467 iprintf(" ...");
5468 count = 0;
5469 } else {
5470 printf(",");
5471 }
5472 count++;
5473
5474 printf("(off=0x%llX,page=%p)", p->offset, p);
5475 p = (vm_page_t) queue_next(&p->listq);
5476 }
5477 if (count != 0) {
5478 printf("\n");
5479 }
5480 }
5481 db_indent -= 2;
5482 }
5483
5484
5485 /*
5486 * vm_object_find [ debug ]
5487 *
5488 * Find all tasks which reference the given vm_object.
5489 */
5490
5491 boolean_t vm_object_find(vm_object_t object);
5492 boolean_t vm_object_print_verbose = FALSE;
5493
5494 boolean_t
5495 vm_object_find(
5496 vm_object_t object)
5497 {
5498 task_t task;
5499 vm_map_t map;
5500 vm_map_entry_t entry;
5501 boolean_t found = FALSE;
5502
5503 queue_iterate(&tasks, task, task_t, tasks) {
5504 map = task->map;
5505 for (entry = vm_map_first_entry(map);
5506 entry && entry != vm_map_to_entry(map);
5507 entry = entry->vme_next) {
5508
5509 vm_object_t obj;
5510
5511 /*
5512 * For the time being skip submaps,
5513 * only the kernel can have submaps,
5514 * and unless we are interested in
5515 * kernel objects, we can simply skip
5516 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
5517 * for a full solution.
5518 */
5519 if (entry->is_sub_map)
5520 continue;
5521 if (entry)
5522 obj = entry->object.vm_object;
5523 else
5524 continue;
5525
5526 while (obj != VM_OBJECT_NULL) {
5527 if (obj == object) {
5528 if (!found) {
5529 printf("TASK\t\tMAP\t\tENTRY\n");
5530 found = TRUE;
5531 }
5532 printf("0x%x\t0x%x\t0x%x\n",
5533 task, map, entry);
5534 }
5535 obj = obj->shadow;
5536 }
5537 }
5538 }
5539
5540 return(found);
5541 }
5542
5543 #endif /* MACH_KDB */
5544
5545 kern_return_t
5546 vm_object_populate_with_private(
5547 vm_object_t object,
5548 vm_object_offset_t offset,
5549 ppnum_t phys_page,
5550 vm_size_t size)
5551 {
5552 ppnum_t base_page;
5553 vm_object_offset_t base_offset;
5554
5555
5556 if(!object->private)
5557 return KERN_FAILURE;
5558
5559 base_page = phys_page;
5560
5561 vm_object_lock(object);
5562 if(!object->phys_contiguous) {
5563 vm_page_t m;
5564 if((base_offset = trunc_page_64(offset)) != offset) {
5565 vm_object_unlock(object);
5566 return KERN_FAILURE;
5567 }
5568 base_offset += object->paging_offset;
5569 while(size) {
5570 m = vm_page_lookup(object, base_offset);
5571 if(m != VM_PAGE_NULL) {
5572 if(m->fictitious) {
5573 if (m->phys_page != vm_page_guard_addr) {
5574
5575 vm_page_lockspin_queues();
5576 m->private = TRUE;
5577 vm_page_unlock_queues();
5578
5579 m->fictitious = FALSE;
5580 m->phys_page = base_page;
5581 if(!m->busy) {
5582 m->busy = TRUE;
5583 }
5584 if(!m->absent) {
5585 m->absent = TRUE;
5586 }
5587 m->list_req_pending = TRUE;
5588 }
5589 } else if (m->phys_page != base_page) {
5590 if (m->pmapped) {
5591 /*
5592 * pmap call to clear old mapping
5593 */
5594 pmap_disconnect(m->phys_page);
5595 }
5596 m->phys_page = base_page;
5597 }
5598
5599 /*
5600 * ENCRYPTED SWAP:
5601 * We're not pointing to the same
5602 * physical page any longer and the
5603 * contents of the new one are not
5604 * supposed to be encrypted.
5605 * XXX What happens to the original
5606 * physical page. Is it lost ?
5607 */
5608 m->encrypted = FALSE;
5609
5610 } else {
5611 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
5612 vm_page_more_fictitious();
5613
5614 /*
5615 * private normally requires lock_queues but since we
5616 * are initializing the page, its not necessary here
5617 */
5618 m->private = TRUE;
5619 m->fictitious = FALSE;
5620 m->phys_page = base_page;
5621 m->list_req_pending = TRUE;
5622 m->absent = TRUE;
5623 m->unusual = TRUE;
5624
5625 vm_page_insert(m, object, base_offset);
5626 }
5627 base_page++; /* Go to the next physical page */
5628 base_offset += PAGE_SIZE;
5629 size -= PAGE_SIZE;
5630 }
5631 } else {
5632 /* NOTE: we should check the original settings here */
5633 /* if we have a size > zero a pmap call should be made */
5634 /* to disable the range */
5635
5636 /* pmap_? */
5637
5638 /* shadows on contiguous memory are not allowed */
5639 /* we therefore can use the offset field */
5640 object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
5641 object->size = size;
5642 }
5643 vm_object_unlock(object);
5644 return KERN_SUCCESS;
5645 }
5646
5647 /*
5648 * memory_object_free_from_cache:
5649 *
5650 * Walk the vm_object cache list, removing and freeing vm_objects
5651 * which are backed by the pager identified by the caller, (pager_ops).
5652 * Remove up to "count" objects, if there are that may available
5653 * in the cache.
5654 *
5655 * Walk the list at most once, return the number of vm_objects
5656 * actually freed.
5657 */
5658
5659 __private_extern__ kern_return_t
5660 memory_object_free_from_cache(
5661 __unused host_t host,
5662 __unused memory_object_pager_ops_t pager_ops,
5663 int *count)
5664 {
5665 #if VM_OBJECT_CACHE
5666 int object_released = 0;
5667
5668 register vm_object_t object = VM_OBJECT_NULL;
5669 vm_object_t shadow;
5670
5671 /*
5672 if(host == HOST_NULL)
5673 return(KERN_INVALID_ARGUMENT);
5674 */
5675
5676 try_again:
5677 vm_object_cache_lock();
5678
5679 queue_iterate(&vm_object_cached_list, object,
5680 vm_object_t, cached_list) {
5681 if (object->pager &&
5682 (pager_ops == object->pager->mo_pager_ops)) {
5683 vm_object_lock(object);
5684 queue_remove(&vm_object_cached_list, object,
5685 vm_object_t, cached_list);
5686 vm_object_cached_count--;
5687
5688 vm_object_cache_unlock();
5689 /*
5690 * Since this object is in the cache, we know
5691 * that it is initialized and has only a pager's
5692 * (implicit) reference. Take a reference to avoid
5693 * recursive deallocations.
5694 */
5695
5696 assert(object->pager_initialized);
5697 assert(object->ref_count == 0);
5698 vm_object_lock_assert_exclusive(object);
5699 object->ref_count++;
5700
5701 /*
5702 * Terminate the object.
5703 * If the object had a shadow, we let
5704 * vm_object_deallocate deallocate it.
5705 * "pageout" objects have a shadow, but
5706 * maintain a "paging reference" rather
5707 * than a normal reference.
5708 * (We are careful here to limit recursion.)
5709 */
5710 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
5711
5712 if ((vm_object_terminate(object) == KERN_SUCCESS)
5713 && (shadow != VM_OBJECT_NULL)) {
5714 vm_object_deallocate(shadow);
5715 }
5716
5717 if(object_released++ == *count)
5718 return KERN_SUCCESS;
5719 goto try_again;
5720 }
5721 }
5722 vm_object_cache_unlock();
5723 *count = object_released;
5724 #else
5725 *count = 0;
5726 #endif
5727 return KERN_SUCCESS;
5728 }
5729
5730
5731
5732 kern_return_t
5733 memory_object_create_named(
5734 memory_object_t pager,
5735 memory_object_offset_t size,
5736 memory_object_control_t *control)
5737 {
5738 vm_object_t object;
5739 vm_object_hash_entry_t entry;
5740 lck_mtx_t *lck;
5741
5742 *control = MEMORY_OBJECT_CONTROL_NULL;
5743 if (pager == MEMORY_OBJECT_NULL)
5744 return KERN_INVALID_ARGUMENT;
5745
5746 lck = vm_object_hash_lock_spin(pager);
5747 entry = vm_object_hash_lookup(pager, FALSE);
5748
5749 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
5750 (entry->object != VM_OBJECT_NULL)) {
5751 if (entry->object->named == TRUE)
5752 panic("memory_object_create_named: caller already holds the right"); }
5753 vm_object_hash_unlock(lck);
5754
5755 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) {
5756 return(KERN_INVALID_OBJECT);
5757 }
5758
5759 /* wait for object (if any) to be ready */
5760 if (object != VM_OBJECT_NULL) {
5761 vm_object_lock(object);
5762 object->named = TRUE;
5763 while (!object->pager_ready) {
5764 vm_object_sleep(object,
5765 VM_OBJECT_EVENT_PAGER_READY,
5766 THREAD_UNINT);
5767 }
5768 *control = object->pager_control;
5769 vm_object_unlock(object);
5770 }
5771 return (KERN_SUCCESS);
5772 }
5773
5774
5775 /*
5776 * Routine: memory_object_recover_named [user interface]
5777 * Purpose:
5778 * Attempt to recover a named reference for a VM object.
5779 * VM will verify that the object has not already started
5780 * down the termination path, and if it has, will optionally
5781 * wait for that to finish.
5782 * Returns:
5783 * KERN_SUCCESS - we recovered a named reference on the object
5784 * KERN_FAILURE - we could not recover a reference (object dead)
5785 * KERN_INVALID_ARGUMENT - bad memory object control
5786 */
5787 kern_return_t
5788 memory_object_recover_named(
5789 memory_object_control_t control,
5790 boolean_t wait_on_terminating)
5791 {
5792 vm_object_t object;
5793
5794 object = memory_object_control_to_vm_object(control);
5795 if (object == VM_OBJECT_NULL) {
5796 return (KERN_INVALID_ARGUMENT);
5797 }
5798 restart:
5799 vm_object_lock(object);
5800
5801 if (object->terminating && wait_on_terminating) {
5802 vm_object_wait(object,
5803 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
5804 THREAD_UNINT);
5805 goto restart;
5806 }
5807
5808 if (!object->alive) {
5809 vm_object_unlock(object);
5810 return KERN_FAILURE;
5811 }
5812
5813 if (object->named == TRUE) {
5814 vm_object_unlock(object);
5815 return KERN_SUCCESS;
5816 }
5817 #if VM_OBJECT_CACHE
5818 if ((object->ref_count == 0) && (!object->terminating)) {
5819 if (!vm_object_cache_lock_try()) {
5820 vm_object_unlock(object);
5821 goto restart;
5822 }
5823 queue_remove(&vm_object_cached_list, object,
5824 vm_object_t, cached_list);
5825 vm_object_cached_count--;
5826 XPR(XPR_VM_OBJECT_CACHE,
5827 "memory_object_recover_named: removing %X, head (%X, %X)\n",
5828 object,
5829 vm_object_cached_list.next,
5830 vm_object_cached_list.prev, 0,0);
5831
5832 vm_object_cache_unlock();
5833 }
5834 #endif
5835 object->named = TRUE;
5836 vm_object_lock_assert_exclusive(object);
5837 object->ref_count++;
5838 vm_object_res_reference(object);
5839 while (!object->pager_ready) {
5840 vm_object_sleep(object,
5841 VM_OBJECT_EVENT_PAGER_READY,
5842 THREAD_UNINT);
5843 }
5844 vm_object_unlock(object);
5845 return (KERN_SUCCESS);
5846 }
5847
5848
5849 /*
5850 * vm_object_release_name:
5851 *
5852 * Enforces name semantic on memory_object reference count decrement
5853 * This routine should not be called unless the caller holds a name
5854 * reference gained through the memory_object_create_named.
5855 *
5856 * If the TERMINATE_IDLE flag is set, the call will return if the
5857 * reference count is not 1. i.e. idle with the only remaining reference
5858 * being the name.
5859 * If the decision is made to proceed the name field flag is set to
5860 * false and the reference count is decremented. If the RESPECT_CACHE
5861 * flag is set and the reference count has gone to zero, the
5862 * memory_object is checked to see if it is cacheable otherwise when
5863 * the reference count is zero, it is simply terminated.
5864 */
5865
5866 __private_extern__ kern_return_t
5867 vm_object_release_name(
5868 vm_object_t object,
5869 int flags)
5870 {
5871 vm_object_t shadow;
5872 boolean_t original_object = TRUE;
5873
5874 while (object != VM_OBJECT_NULL) {
5875
5876 vm_object_lock(object);
5877
5878 assert(object->alive);
5879 if (original_object)
5880 assert(object->named);
5881 assert(object->ref_count > 0);
5882
5883 /*
5884 * We have to wait for initialization before
5885 * destroying or caching the object.
5886 */
5887
5888 if (object->pager_created && !object->pager_initialized) {
5889 assert(!object->can_persist);
5890 vm_object_assert_wait(object,
5891 VM_OBJECT_EVENT_INITIALIZED,
5892 THREAD_UNINT);
5893 vm_object_unlock(object);
5894 thread_block(THREAD_CONTINUE_NULL);
5895 continue;
5896 }
5897
5898 if (((object->ref_count > 1)
5899 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
5900 || (object->terminating)) {
5901 vm_object_unlock(object);
5902 return KERN_FAILURE;
5903 } else {
5904 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
5905 vm_object_unlock(object);
5906 return KERN_SUCCESS;
5907 }
5908 }
5909
5910 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
5911 (object->ref_count == 1)) {
5912 if (original_object)
5913 object->named = FALSE;
5914 vm_object_unlock(object);
5915 /* let vm_object_deallocate push this thing into */
5916 /* the cache, if that it is where it is bound */
5917 vm_object_deallocate(object);
5918 return KERN_SUCCESS;
5919 }
5920 VM_OBJ_RES_DECR(object);
5921 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
5922
5923 if (object->ref_count == 1) {
5924 if (vm_object_terminate(object) != KERN_SUCCESS) {
5925 if (original_object) {
5926 return KERN_FAILURE;
5927 } else {
5928 return KERN_SUCCESS;
5929 }
5930 }
5931 if (shadow != VM_OBJECT_NULL) {
5932 original_object = FALSE;
5933 object = shadow;
5934 continue;
5935 }
5936 return KERN_SUCCESS;
5937 } else {
5938 vm_object_lock_assert_exclusive(object);
5939 object->ref_count--;
5940 assert(object->ref_count > 0);
5941 if(original_object)
5942 object->named = FALSE;
5943 vm_object_unlock(object);
5944 return KERN_SUCCESS;
5945 }
5946 }
5947 /*NOTREACHED*/
5948 assert(0);
5949 return KERN_FAILURE;
5950 }
5951
5952
5953 __private_extern__ kern_return_t
5954 vm_object_lock_request(
5955 vm_object_t object,
5956 vm_object_offset_t offset,
5957 vm_object_size_t size,
5958 memory_object_return_t should_return,
5959 int flags,
5960 vm_prot_t prot)
5961 {
5962 __unused boolean_t should_flush;
5963
5964 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
5965
5966 XPR(XPR_MEMORY_OBJECT,
5967 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
5968 object, offset, size,
5969 (((should_return&1)<<1)|should_flush), prot);
5970
5971 /*
5972 * Check for bogus arguments.
5973 */
5974 if (object == VM_OBJECT_NULL)
5975 return (KERN_INVALID_ARGUMENT);
5976
5977 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
5978 return (KERN_INVALID_ARGUMENT);
5979
5980 size = round_page_64(size);
5981
5982 /*
5983 * Lock the object, and acquire a paging reference to
5984 * prevent the memory_object reference from being released.
5985 */
5986 vm_object_lock(object);
5987 vm_object_paging_begin(object);
5988
5989 (void)vm_object_update(object,
5990 offset, size, NULL, NULL, should_return, flags, prot);
5991
5992 vm_object_paging_end(object);
5993 vm_object_unlock(object);
5994
5995 return (KERN_SUCCESS);
5996 }
5997
5998 /*
5999 * Empty a purgeable object by grabbing the physical pages assigned to it and
6000 * putting them on the free queue without writing them to backing store, etc.
6001 * When the pages are next touched they will be demand zero-fill pages. We
6002 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
6003 * skip referenced/dirty pages, pages on the active queue, etc. We're more
6004 * than happy to grab these since this is a purgeable object. We mark the
6005 * object as "empty" after reaping its pages.
6006 *
6007 * On entry the object must be locked and it must be
6008 * purgeable with no delayed copies pending.
6009 */
6010 void
6011 vm_object_purge(vm_object_t object)
6012 {
6013 vm_object_lock_assert_exclusive(object);
6014
6015 if (object->purgable == VM_PURGABLE_DENY)
6016 return;
6017
6018 assert(object->copy == VM_OBJECT_NULL);
6019 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6020
6021 if(object->purgable == VM_PURGABLE_VOLATILE) {
6022 unsigned int delta;
6023 assert(object->resident_page_count >=
6024 object->wired_page_count);
6025 delta = (object->resident_page_count -
6026 object->wired_page_count);
6027 if (delta != 0) {
6028 assert(vm_page_purgeable_count >=
6029 delta);
6030 OSAddAtomic(-delta,
6031 (SInt32 *)&vm_page_purgeable_count);
6032 }
6033 if (object->wired_page_count != 0) {
6034 assert(vm_page_purgeable_wired_count >=
6035 object->wired_page_count);
6036 OSAddAtomic(-object->wired_page_count,
6037 (SInt32 *)&vm_page_purgeable_wired_count);
6038 }
6039 }
6040 object->purgable = VM_PURGABLE_EMPTY;
6041
6042 vm_object_reap_pages(object, REAP_PURGEABLE);
6043 }
6044
6045
6046 /*
6047 * vm_object_purgeable_control() allows the caller to control and investigate the
6048 * state of a purgeable object. A purgeable object is created via a call to
6049 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
6050 * never be coalesced with any other object -- even other purgeable objects --
6051 * and will thus always remain a distinct object. A purgeable object has
6052 * special semantics when its reference count is exactly 1. If its reference
6053 * count is greater than 1, then a purgeable object will behave like a normal
6054 * object and attempts to use this interface will result in an error return
6055 * of KERN_INVALID_ARGUMENT.
6056 *
6057 * A purgeable object may be put into a "volatile" state which will make the
6058 * object's pages elligable for being reclaimed without paging to backing
6059 * store if the system runs low on memory. If the pages in a volatile
6060 * purgeable object are reclaimed, the purgeable object is said to have been
6061 * "emptied." When a purgeable object is emptied the system will reclaim as
6062 * many pages from the object as it can in a convenient manner (pages already
6063 * en route to backing store or busy for other reasons are left as is). When
6064 * a purgeable object is made volatile, its pages will generally be reclaimed
6065 * before other pages in the application's working set. This semantic is
6066 * generally used by applications which can recreate the data in the object
6067 * faster than it can be paged in. One such example might be media assets
6068 * which can be reread from a much faster RAID volume.
6069 *
6070 * A purgeable object may be designated as "non-volatile" which means it will
6071 * behave like all other objects in the system with pages being written to and
6072 * read from backing store as needed to satisfy system memory needs. If the
6073 * object was emptied before the object was made non-volatile, that fact will
6074 * be returned as the old state of the purgeable object (see
6075 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
6076 * were reclaimed as part of emptying the object will be refaulted in as
6077 * zero-fill on demand. It is up to the application to note that an object
6078 * was emptied and recreate the objects contents if necessary. When a
6079 * purgeable object is made non-volatile, its pages will generally not be paged
6080 * out to backing store in the immediate future. A purgeable object may also
6081 * be manually emptied.
6082 *
6083 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
6084 * volatile purgeable object may be queried at any time. This information may
6085 * be used as a control input to let the application know when the system is
6086 * experiencing memory pressure and is reclaiming memory.
6087 *
6088 * The specified address may be any address within the purgeable object. If
6089 * the specified address does not represent any object in the target task's
6090 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
6091 * object containing the specified address is not a purgeable object, then
6092 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
6093 * returned.
6094 *
6095 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
6096 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
6097 * state is used to set the new state of the purgeable object and return its
6098 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
6099 * object is returned in the parameter state.
6100 *
6101 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
6102 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
6103 * the non-volatile, volatile and volatile/empty states described above.
6104 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
6105 * immediately reclaim as many pages in the object as can be conveniently
6106 * collected (some may have already been written to backing store or be
6107 * otherwise busy).
6108 *
6109 * The process of making a purgeable object non-volatile and determining its
6110 * previous state is atomic. Thus, if a purgeable object is made
6111 * VM_PURGABLE_NONVOLATILE and the old state is returned as
6112 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
6113 * completely intact and will remain so until the object is made volatile
6114 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
6115 * was reclaimed while it was in a volatile state and its previous contents
6116 * have been lost.
6117 */
6118 /*
6119 * The object must be locked.
6120 */
6121 kern_return_t
6122 vm_object_purgable_control(
6123 vm_object_t object,
6124 vm_purgable_t control,
6125 int *state)
6126 {
6127 int old_state;
6128 int new_state;
6129
6130 if (object == VM_OBJECT_NULL) {
6131 /*
6132 * Object must already be present or it can't be purgeable.
6133 */
6134 return KERN_INVALID_ARGUMENT;
6135 }
6136
6137 /*
6138 * Get current state of the purgeable object.
6139 */
6140 old_state = object->purgable;
6141 if (old_state == VM_PURGABLE_DENY)
6142 return KERN_INVALID_ARGUMENT;
6143
6144 /* purgeable cant have delayed copies - now or in the future */
6145 assert(object->copy == VM_OBJECT_NULL);
6146 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6147
6148 /*
6149 * Execute the desired operation.
6150 */
6151 if (control == VM_PURGABLE_GET_STATE) {
6152 *state = old_state;
6153 return KERN_SUCCESS;
6154 }
6155
6156 if ((*state) & VM_PURGABLE_DEBUG_EMPTY) {
6157 object->volatile_empty = TRUE;
6158 }
6159 if ((*state) & VM_PURGABLE_DEBUG_FAULT) {
6160 object->volatile_fault = TRUE;
6161 }
6162
6163 new_state = *state & VM_PURGABLE_STATE_MASK;
6164 if (new_state == VM_PURGABLE_VOLATILE &&
6165 object->volatile_empty) {
6166 new_state = VM_PURGABLE_EMPTY;
6167 }
6168
6169 switch (new_state) {
6170 case VM_PURGABLE_DENY:
6171 case VM_PURGABLE_NONVOLATILE:
6172 object->purgable = new_state;
6173
6174 if (old_state == VM_PURGABLE_VOLATILE) {
6175 unsigned int delta;
6176
6177 assert(object->resident_page_count >=
6178 object->wired_page_count);
6179 delta = (object->resident_page_count -
6180 object->wired_page_count);
6181
6182 assert(vm_page_purgeable_count >= delta);
6183
6184 if (delta != 0) {
6185 OSAddAtomic(-delta,
6186 (SInt32 *)&vm_page_purgeable_count);
6187 }
6188 if (object->wired_page_count != 0) {
6189 assert(vm_page_purgeable_wired_count >=
6190 object->wired_page_count);
6191 OSAddAtomic(-object->wired_page_count,
6192 (SInt32 *)&vm_page_purgeable_wired_count);
6193 }
6194
6195 vm_page_lock_queues();
6196
6197 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
6198 purgeable_q_t queue = vm_purgeable_object_remove(object);
6199 assert(queue);
6200
6201 vm_purgeable_token_delete_first(queue);
6202 assert(queue->debug_count_objects>=0);
6203
6204 vm_page_unlock_queues();
6205 }
6206 break;
6207
6208 case VM_PURGABLE_VOLATILE:
6209 if (object->volatile_fault) {
6210 vm_page_t p;
6211 int refmod;
6212
6213 queue_iterate(&object->memq, p, vm_page_t, listq) {
6214 if (p->busy ||
6215 VM_PAGE_WIRED(p) ||
6216 p->fictitious) {
6217 continue;
6218 }
6219 refmod = pmap_disconnect(p->phys_page);
6220 if ((refmod & VM_MEM_MODIFIED) &&
6221 !p->dirty) {
6222 p->dirty = TRUE;
6223 }
6224 }
6225 }
6226
6227 if (old_state == VM_PURGABLE_EMPTY &&
6228 object->resident_page_count == 0)
6229 break;
6230
6231 purgeable_q_t queue;
6232
6233 /* find the correct queue */
6234 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
6235 queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
6236 else {
6237 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
6238 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
6239 else
6240 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
6241 }
6242
6243 if (old_state == VM_PURGABLE_NONVOLATILE ||
6244 old_state == VM_PURGABLE_EMPTY) {
6245 unsigned int delta;
6246
6247 /* try to add token... this can fail */
6248 vm_page_lock_queues();
6249
6250 kern_return_t result = vm_purgeable_token_add(queue);
6251 if (result != KERN_SUCCESS) {
6252 vm_page_unlock_queues();
6253 return result;
6254 }
6255 vm_page_unlock_queues();
6256
6257 assert(object->resident_page_count >=
6258 object->wired_page_count);
6259 delta = (object->resident_page_count -
6260 object->wired_page_count);
6261
6262 if (delta != 0) {
6263 OSAddAtomic(delta,
6264 &vm_page_purgeable_count);
6265 }
6266 if (object->wired_page_count != 0) {
6267 OSAddAtomic(object->wired_page_count,
6268 &vm_page_purgeable_wired_count);
6269 }
6270
6271 object->purgable = new_state;
6272
6273 /* object should not be on a queue */
6274 assert(object->objq.next == NULL && object->objq.prev == NULL);
6275 }
6276 else if (old_state == VM_PURGABLE_VOLATILE) {
6277 /*
6278 * if reassigning priorities / purgeable groups, we don't change the
6279 * token queue. So moving priorities will not make pages stay around longer.
6280 * Reasoning is that the algorithm gives most priority to the most important
6281 * object. If a new token is added, the most important object' priority is boosted.
6282 * This biases the system already for purgeable queues that move a lot.
6283 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
6284 */
6285 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
6286
6287 purgeable_q_t old_queue=vm_purgeable_object_remove(object);
6288 assert(old_queue);
6289
6290 if (old_queue != queue) {
6291 kern_return_t result;
6292
6293 /* Changing queue. Have to move token. */
6294 vm_page_lock_queues();
6295 vm_purgeable_token_delete_first(old_queue);
6296 result = vm_purgeable_token_add(queue);
6297 vm_page_unlock_queues();
6298
6299 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */
6300 }
6301 };
6302 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT );
6303
6304 assert(queue->debug_count_objects>=0);
6305
6306 break;
6307
6308
6309 case VM_PURGABLE_EMPTY:
6310 if (object->volatile_fault) {
6311 vm_page_t p;
6312 int refmod;
6313
6314 queue_iterate(&object->memq, p, vm_page_t, listq) {
6315 if (p->busy ||
6316 VM_PAGE_WIRED(p) ||
6317 p->fictitious) {
6318 continue;
6319 }
6320 refmod = pmap_disconnect(p->phys_page);
6321 if ((refmod & VM_MEM_MODIFIED) &&
6322 !p->dirty) {
6323 p->dirty = TRUE;
6324 }
6325 }
6326 }
6327
6328 if (old_state != new_state) {
6329 assert(old_state == VM_PURGABLE_NONVOLATILE ||
6330 old_state == VM_PURGABLE_VOLATILE);
6331 if (old_state == VM_PURGABLE_VOLATILE) {
6332 purgeable_q_t old_queue;
6333
6334 /* object should be on a queue */
6335 assert(object->objq.next != NULL &&
6336 object->objq.prev != NULL);
6337 old_queue = vm_purgeable_object_remove(object);
6338 assert(old_queue);
6339 vm_page_lock_queues();
6340 vm_purgeable_token_delete_first(old_queue);
6341 vm_page_unlock_queues();
6342 }
6343 (void) vm_object_purge(object);
6344 }
6345 break;
6346
6347 }
6348 *state = old_state;
6349
6350 return KERN_SUCCESS;
6351 }
6352
6353 #if TASK_SWAPPER
6354 /*
6355 * vm_object_res_deallocate
6356 *
6357 * (recursively) decrement residence counts on vm objects and their shadows.
6358 * Called from vm_object_deallocate and when swapping out an object.
6359 *
6360 * The object is locked, and remains locked throughout the function,
6361 * even as we iterate down the shadow chain. Locks on intermediate objects
6362 * will be dropped, but not the original object.
6363 *
6364 * NOTE: this function used to use recursion, rather than iteration.
6365 */
6366
6367 __private_extern__ void
6368 vm_object_res_deallocate(
6369 vm_object_t object)
6370 {
6371 vm_object_t orig_object = object;
6372 /*
6373 * Object is locked so it can be called directly
6374 * from vm_object_deallocate. Original object is never
6375 * unlocked.
6376 */
6377 assert(object->res_count > 0);
6378 while (--object->res_count == 0) {
6379 assert(object->ref_count >= object->res_count);
6380 vm_object_deactivate_all_pages(object);
6381 /* iterate on shadow, if present */
6382 if (object->shadow != VM_OBJECT_NULL) {
6383 vm_object_t tmp_object = object->shadow;
6384 vm_object_lock(tmp_object);
6385 if (object != orig_object)
6386 vm_object_unlock(object);
6387 object = tmp_object;
6388 assert(object->res_count > 0);
6389 } else
6390 break;
6391 }
6392 if (object != orig_object)
6393 vm_object_unlock(object);
6394 }
6395
6396 /*
6397 * vm_object_res_reference
6398 *
6399 * Internal function to increment residence count on a vm object
6400 * and its shadows. It is called only from vm_object_reference, and
6401 * when swapping in a vm object, via vm_map_swap.
6402 *
6403 * The object is locked, and remains locked throughout the function,
6404 * even as we iterate down the shadow chain. Locks on intermediate objects
6405 * will be dropped, but not the original object.
6406 *
6407 * NOTE: this function used to use recursion, rather than iteration.
6408 */
6409
6410 __private_extern__ void
6411 vm_object_res_reference(
6412 vm_object_t object)
6413 {
6414 vm_object_t orig_object = object;
6415 /*
6416 * Object is locked, so this can be called directly
6417 * from vm_object_reference. This lock is never released.
6418 */
6419 while ((++object->res_count == 1) &&
6420 (object->shadow != VM_OBJECT_NULL)) {
6421 vm_object_t tmp_object = object->shadow;
6422
6423 assert(object->ref_count >= object->res_count);
6424 vm_object_lock(tmp_object);
6425 if (object != orig_object)
6426 vm_object_unlock(object);
6427 object = tmp_object;
6428 }
6429 if (object != orig_object)
6430 vm_object_unlock(object);
6431 assert(orig_object->ref_count >= orig_object->res_count);
6432 }
6433 #endif /* TASK_SWAPPER */
6434
6435 /*
6436 * vm_object_reference:
6437 *
6438 * Gets another reference to the given object.
6439 */
6440 #ifdef vm_object_reference
6441 #undef vm_object_reference
6442 #endif
6443 __private_extern__ void
6444 vm_object_reference(
6445 register vm_object_t object)
6446 {
6447 if (object == VM_OBJECT_NULL)
6448 return;
6449
6450 vm_object_lock(object);
6451 assert(object->ref_count > 0);
6452 vm_object_reference_locked(object);
6453 vm_object_unlock(object);
6454 }
6455
6456 #ifdef MACH_BSD
6457 /*
6458 * Scale the vm_object_cache
6459 * This is required to make sure that the vm_object_cache is big
6460 * enough to effectively cache the mapped file.
6461 * This is really important with UBC as all the regular file vnodes
6462 * have memory object associated with them. Havving this cache too
6463 * small results in rapid reclaim of vnodes and hurts performance a LOT!
6464 *
6465 * This is also needed as number of vnodes can be dynamically scaled.
6466 */
6467 kern_return_t
6468 adjust_vm_object_cache(
6469 __unused vm_size_t oval,
6470 __unused vm_size_t nval)
6471 {
6472 #if VM_OBJECT_CACHE
6473 vm_object_cached_max = nval;
6474 vm_object_cache_trim(FALSE);
6475 #endif
6476 return (KERN_SUCCESS);
6477 }
6478 #endif /* MACH_BSD */
6479
6480
6481 /*
6482 * vm_object_transpose
6483 *
6484 * This routine takes two VM objects of the same size and exchanges
6485 * their backing store.
6486 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
6487 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
6488 *
6489 * The VM objects must not be locked by caller.
6490 */
6491 unsigned int vm_object_transpose_count = 0;
6492 kern_return_t
6493 vm_object_transpose(
6494 vm_object_t object1,
6495 vm_object_t object2,
6496 vm_object_size_t transpose_size)
6497 {
6498 vm_object_t tmp_object;
6499 kern_return_t retval;
6500 boolean_t object1_locked, object2_locked;
6501 vm_page_t page;
6502 vm_object_offset_t page_offset;
6503 lck_mtx_t *hash_lck;
6504 vm_object_hash_entry_t hash_entry;
6505
6506 tmp_object = VM_OBJECT_NULL;
6507 object1_locked = FALSE; object2_locked = FALSE;
6508
6509 if (object1 == object2 ||
6510 object1 == VM_OBJECT_NULL ||
6511 object2 == VM_OBJECT_NULL) {
6512 /*
6513 * If the 2 VM objects are the same, there's
6514 * no point in exchanging their backing store.
6515 */
6516 retval = KERN_INVALID_VALUE;
6517 goto done;
6518 }
6519
6520 /*
6521 * Since we need to lock both objects at the same time,
6522 * make sure we always lock them in the same order to
6523 * avoid deadlocks.
6524 */
6525 if (object1 > object2) {
6526 tmp_object = object1;
6527 object1 = object2;
6528 object2 = tmp_object;
6529 }
6530
6531 /*
6532 * Allocate a temporary VM object to hold object1's contents
6533 * while we copy object2 to object1.
6534 */
6535 tmp_object = vm_object_allocate(transpose_size);
6536 vm_object_lock(tmp_object);
6537 tmp_object->can_persist = FALSE;
6538
6539
6540 /*
6541 * Grab control of the 1st VM object.
6542 */
6543 vm_object_lock(object1);
6544 object1_locked = TRUE;
6545 if (!object1->alive || object1->terminating ||
6546 object1->copy || object1->shadow || object1->shadowed ||
6547 object1->purgable != VM_PURGABLE_DENY) {
6548 /*
6549 * We don't deal with copy or shadow objects (yet).
6550 */
6551 retval = KERN_INVALID_VALUE;
6552 goto done;
6553 }
6554 /*
6555 * We're about to mess with the object's backing store and
6556 * taking a "paging_in_progress" reference wouldn't be enough
6557 * to prevent any paging activity on this object, so the caller should
6558 * have "quiesced" the objects beforehand, via a UPL operation with
6559 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
6560 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
6561 *
6562 * Wait for any paging operation to complete (but only paging, not
6563 * other kind of activities not linked to the pager). After we're
6564 * statisfied that there's no more paging in progress, we keep the
6565 * object locked, to guarantee that no one tries to access its pager.
6566 */
6567 vm_object_paging_only_wait(object1, THREAD_UNINT);
6568
6569 /*
6570 * Same as above for the 2nd object...
6571 */
6572 vm_object_lock(object2);
6573 object2_locked = TRUE;
6574 if (! object2->alive || object2->terminating ||
6575 object2->copy || object2->shadow || object2->shadowed ||
6576 object2->purgable != VM_PURGABLE_DENY) {
6577 retval = KERN_INVALID_VALUE;
6578 goto done;
6579 }
6580 vm_object_paging_only_wait(object2, THREAD_UNINT);
6581
6582
6583 if (object1->size != object2->size ||
6584 object1->size != transpose_size) {
6585 /*
6586 * If the 2 objects don't have the same size, we can't
6587 * exchange their backing stores or one would overflow.
6588 * If their size doesn't match the caller's
6589 * "transpose_size", we can't do it either because the
6590 * transpose operation will affect the entire span of
6591 * the objects.
6592 */
6593 retval = KERN_INVALID_VALUE;
6594 goto done;
6595 }
6596
6597
6598 /*
6599 * Transpose the lists of resident pages.
6600 * This also updates the resident_page_count and the memq_hint.
6601 */
6602 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
6603 /*
6604 * No pages in object1, just transfer pages
6605 * from object2 to object1. No need to go through
6606 * an intermediate object.
6607 */
6608 while (!queue_empty(&object2->memq)) {
6609 page = (vm_page_t) queue_first(&object2->memq);
6610 vm_page_rename(page, object1, page->offset, FALSE);
6611 }
6612 assert(queue_empty(&object2->memq));
6613 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
6614 /*
6615 * No pages in object2, just transfer pages
6616 * from object1 to object2. No need to go through
6617 * an intermediate object.
6618 */
6619 while (!queue_empty(&object1->memq)) {
6620 page = (vm_page_t) queue_first(&object1->memq);
6621 vm_page_rename(page, object2, page->offset, FALSE);
6622 }
6623 assert(queue_empty(&object1->memq));
6624 } else {
6625 /* transfer object1's pages to tmp_object */
6626 while (!queue_empty(&object1->memq)) {
6627 page = (vm_page_t) queue_first(&object1->memq);
6628 page_offset = page->offset;
6629 vm_page_remove(page, TRUE);
6630 page->offset = page_offset;
6631 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
6632 }
6633 assert(queue_empty(&object1->memq));
6634 /* transfer object2's pages to object1 */
6635 while (!queue_empty(&object2->memq)) {
6636 page = (vm_page_t) queue_first(&object2->memq);
6637 vm_page_rename(page, object1, page->offset, FALSE);
6638 }
6639 assert(queue_empty(&object2->memq));
6640 /* transfer tmp_object's pages to object1 */
6641 while (!queue_empty(&tmp_object->memq)) {
6642 page = (vm_page_t) queue_first(&tmp_object->memq);
6643 queue_remove(&tmp_object->memq, page,
6644 vm_page_t, listq);
6645 vm_page_insert(page, object2, page->offset);
6646 }
6647 assert(queue_empty(&tmp_object->memq));
6648 }
6649
6650 #define __TRANSPOSE_FIELD(field) \
6651 MACRO_BEGIN \
6652 tmp_object->field = object1->field; \
6653 object1->field = object2->field; \
6654 object2->field = tmp_object->field; \
6655 MACRO_END
6656
6657 /* "Lock" refers to the object not its contents */
6658 /* "size" should be identical */
6659 assert(object1->size == object2->size);
6660 /* "memq_hint" was updated above when transposing pages */
6661 /* "ref_count" refers to the object not its contents */
6662 #if TASK_SWAPPER
6663 /* "res_count" refers to the object not its contents */
6664 #endif
6665 /* "resident_page_count" was updated above when transposing pages */
6666 /* "wired_page_count" was updated above when transposing pages */
6667 /* "reusable_page_count" was updated above when transposing pages */
6668 /* there should be no "copy" */
6669 assert(!object1->copy);
6670 assert(!object2->copy);
6671 /* there should be no "shadow" */
6672 assert(!object1->shadow);
6673 assert(!object2->shadow);
6674 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
6675 __TRANSPOSE_FIELD(pager);
6676 __TRANSPOSE_FIELD(paging_offset);
6677 __TRANSPOSE_FIELD(pager_control);
6678 /* update the memory_objects' pointers back to the VM objects */
6679 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
6680 memory_object_control_collapse(object1->pager_control,
6681 object1);
6682 }
6683 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
6684 memory_object_control_collapse(object2->pager_control,
6685 object2);
6686 }
6687 __TRANSPOSE_FIELD(copy_strategy);
6688 /* "paging_in_progress" refers to the object not its contents */
6689 assert(!object1->paging_in_progress);
6690 assert(!object2->paging_in_progress);
6691 assert(object1->activity_in_progress);
6692 assert(object2->activity_in_progress);
6693 /* "all_wanted" refers to the object not its contents */
6694 __TRANSPOSE_FIELD(pager_created);
6695 __TRANSPOSE_FIELD(pager_initialized);
6696 __TRANSPOSE_FIELD(pager_ready);
6697 __TRANSPOSE_FIELD(pager_trusted);
6698 __TRANSPOSE_FIELD(can_persist);
6699 __TRANSPOSE_FIELD(internal);
6700 __TRANSPOSE_FIELD(temporary);
6701 __TRANSPOSE_FIELD(private);
6702 __TRANSPOSE_FIELD(pageout);
6703 /* "alive" should be set */
6704 assert(object1->alive);
6705 assert(object2->alive);
6706 /* "purgeable" should be non-purgeable */
6707 assert(object1->purgable == VM_PURGABLE_DENY);
6708 assert(object2->purgable == VM_PURGABLE_DENY);
6709 /* "shadowed" refers to the the object not its contents */
6710 __TRANSPOSE_FIELD(silent_overwrite);
6711 __TRANSPOSE_FIELD(advisory_pageout);
6712 __TRANSPOSE_FIELD(true_share);
6713 /* "terminating" should not be set */
6714 assert(!object1->terminating);
6715 assert(!object2->terminating);
6716 __TRANSPOSE_FIELD(named);
6717 /* "shadow_severed" refers to the object not its contents */
6718 __TRANSPOSE_FIELD(phys_contiguous);
6719 __TRANSPOSE_FIELD(nophyscache);
6720 /* "cached_list.next" points to transposed object */
6721 object1->cached_list.next = (queue_entry_t) object2;
6722 object2->cached_list.next = (queue_entry_t) object1;
6723 /* "cached_list.prev" should be NULL */
6724 assert(object1->cached_list.prev == NULL);
6725 assert(object2->cached_list.prev == NULL);
6726 /* "msr_q" is linked to the object not its contents */
6727 assert(queue_empty(&object1->msr_q));
6728 assert(queue_empty(&object2->msr_q));
6729 __TRANSPOSE_FIELD(last_alloc);
6730 __TRANSPOSE_FIELD(sequential);
6731 __TRANSPOSE_FIELD(pages_created);
6732 __TRANSPOSE_FIELD(pages_used);
6733 #if MACH_PAGEMAP
6734 __TRANSPOSE_FIELD(existence_map);
6735 #endif
6736 __TRANSPOSE_FIELD(cow_hint);
6737 #if MACH_ASSERT
6738 __TRANSPOSE_FIELD(paging_object);
6739 #endif
6740 __TRANSPOSE_FIELD(wimg_bits);
6741 __TRANSPOSE_FIELD(code_signed);
6742 if (object1->hashed) {
6743 hash_lck = vm_object_hash_lock_spin(object2->pager);
6744 hash_entry = vm_object_hash_lookup(object2->pager, FALSE);
6745 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
6746 hash_entry->object = object2;
6747 vm_object_hash_unlock(hash_lck);
6748 }
6749 if (object2->hashed) {
6750 hash_lck = vm_object_hash_lock_spin(object1->pager);
6751 hash_entry = vm_object_hash_lookup(object1->pager, FALSE);
6752 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
6753 hash_entry->object = object1;
6754 vm_object_hash_unlock(hash_lck);
6755 }
6756 __TRANSPOSE_FIELD(hashed);
6757 object1->transposed = TRUE;
6758 object2->transposed = TRUE;
6759 __TRANSPOSE_FIELD(mapping_in_progress);
6760 __TRANSPOSE_FIELD(volatile_empty);
6761 __TRANSPOSE_FIELD(volatile_fault);
6762 __TRANSPOSE_FIELD(all_reusable);
6763 assert(object1->blocked_access);
6764 assert(object2->blocked_access);
6765 assert(object1->__object2_unused_bits == 0);
6766 assert(object2->__object2_unused_bits == 0);
6767 #if UPL_DEBUG
6768 /* "uplq" refers to the object not its contents (see upl_transpose()) */
6769 #endif
6770 assert(object1->objq.next == NULL);
6771 assert(object1->objq.prev == NULL);
6772 assert(object2->objq.next == NULL);
6773 assert(object2->objq.prev == NULL);
6774
6775 #undef __TRANSPOSE_FIELD
6776
6777 retval = KERN_SUCCESS;
6778
6779 done:
6780 /*
6781 * Cleanup.
6782 */
6783 if (tmp_object != VM_OBJECT_NULL) {
6784 vm_object_unlock(tmp_object);
6785 /*
6786 * Re-initialize the temporary object to avoid
6787 * deallocating a real pager.
6788 */
6789 _vm_object_allocate(transpose_size, tmp_object);
6790 vm_object_deallocate(tmp_object);
6791 tmp_object = VM_OBJECT_NULL;
6792 }
6793
6794 if (object1_locked) {
6795 vm_object_unlock(object1);
6796 object1_locked = FALSE;
6797 }
6798 if (object2_locked) {
6799 vm_object_unlock(object2);
6800 object2_locked = FALSE;
6801 }
6802
6803 vm_object_transpose_count++;
6804
6805 return retval;
6806 }
6807
6808
6809 /*
6810 * vm_object_cluster_size
6811 *
6812 * Determine how big a cluster we should issue an I/O for...
6813 *
6814 * Inputs: *start == offset of page needed
6815 * *length == maximum cluster pager can handle
6816 * Outputs: *start == beginning offset of cluster
6817 * *length == length of cluster to try
6818 *
6819 * The original *start will be encompassed by the cluster
6820 *
6821 */
6822 extern int speculative_reads_disabled;
6823 #if CONFIG_EMBEDDED
6824 unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
6825 unsigned int preheat_pages_min = 8;
6826 unsigned int preheat_pages_mult = 4;
6827 #else
6828 unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
6829 unsigned int preheat_pages_min = 8;
6830 unsigned int preheat_pages_mult = 4;
6831 #endif
6832
6833 uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1];
6834 uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1];
6835
6836
6837 __private_extern__ void
6838 vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
6839 vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming)
6840 {
6841 vm_size_t pre_heat_size;
6842 vm_size_t tail_size;
6843 vm_size_t head_size;
6844 vm_size_t max_length;
6845 vm_size_t cluster_size;
6846 vm_object_offset_t object_size;
6847 vm_object_offset_t orig_start;
6848 vm_object_offset_t target_start;
6849 vm_object_offset_t offset;
6850 vm_behavior_t behavior;
6851 boolean_t look_behind = TRUE;
6852 boolean_t look_ahead = TRUE;
6853 uint32_t throttle_limit;
6854 int sequential_run;
6855 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
6856 unsigned int max_ph_size;
6857 unsigned int min_ph_size;
6858 unsigned int ph_mult;
6859
6860 assert( !(*length & PAGE_MASK));
6861 assert( !(*start & PAGE_MASK_64));
6862
6863 if ( (ph_mult = preheat_pages_mult) < 1 )
6864 ph_mult = 1;
6865 if ( (min_ph_size = preheat_pages_min) < 1 )
6866 min_ph_size = 1;
6867 if ( (max_ph_size = preheat_pages_max) > MAX_UPL_TRANSFER )
6868 max_ph_size = MAX_UPL_TRANSFER;
6869
6870 if ( (max_length = *length) > (max_ph_size * PAGE_SIZE) )
6871 max_length = (max_ph_size * PAGE_SIZE);
6872
6873 /*
6874 * we'll always return a cluster size of at least
6875 * 1 page, since the original fault must always
6876 * be processed
6877 */
6878 *length = PAGE_SIZE;
6879 *io_streaming = 0;
6880
6881 if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
6882 /*
6883 * no cluster... just fault the page in
6884 */
6885 return;
6886 }
6887 orig_start = *start;
6888 target_start = orig_start;
6889 cluster_size = round_page(fault_info->cluster_size);
6890 behavior = fault_info->behavior;
6891
6892 vm_object_lock(object);
6893
6894 if (object->internal)
6895 object_size = object->size;
6896 else if (object->pager != MEMORY_OBJECT_NULL)
6897 vnode_pager_get_object_size(object->pager, &object_size);
6898 else
6899 goto out; /* pager is gone for this object, nothing more to do */
6900
6901 object_size = round_page_64(object_size);
6902
6903 if (orig_start >= object_size) {
6904 /*
6905 * fault occurred beyond the EOF...
6906 * we need to punt w/o changing the
6907 * starting offset
6908 */
6909 goto out;
6910 }
6911 if (object->pages_used > object->pages_created) {
6912 /*
6913 * must have wrapped our 32 bit counters
6914 * so reset
6915 */
6916 object->pages_used = object->pages_created = 0;
6917 }
6918 if ((sequential_run = object->sequential)) {
6919 if (sequential_run < 0) {
6920 sequential_behavior = VM_BEHAVIOR_RSEQNTL;
6921 sequential_run = 0 - sequential_run;
6922 } else {
6923 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
6924 }
6925
6926 }
6927 switch(behavior) {
6928
6929 default:
6930 behavior = VM_BEHAVIOR_DEFAULT;
6931
6932 case VM_BEHAVIOR_DEFAULT:
6933 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
6934 goto out;
6935
6936 if (sequential_run >= (3 * PAGE_SIZE)) {
6937 pre_heat_size = sequential_run + PAGE_SIZE;
6938
6939 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL)
6940 look_behind = FALSE;
6941 else
6942 look_ahead = FALSE;
6943
6944 *io_streaming = 1;
6945 } else {
6946
6947 if (object->pages_created < 32 * ph_mult) {
6948 /*
6949 * prime the pump
6950 */
6951 pre_heat_size = PAGE_SIZE * 8 * ph_mult;
6952 break;
6953 }
6954 /*
6955 * Linear growth in PH size: The maximum size is max_length...
6956 * this cacluation will result in a size that is neither a
6957 * power of 2 nor a multiple of PAGE_SIZE... so round
6958 * it up to the nearest PAGE_SIZE boundary
6959 */
6960 pre_heat_size = (ph_mult * (max_length * object->pages_used) / object->pages_created);
6961
6962 if (pre_heat_size < PAGE_SIZE * min_ph_size)
6963 pre_heat_size = PAGE_SIZE * min_ph_size;
6964 else
6965 pre_heat_size = round_page(pre_heat_size);
6966 }
6967 break;
6968
6969 case VM_BEHAVIOR_RANDOM:
6970 if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
6971 goto out;
6972 break;
6973
6974 case VM_BEHAVIOR_SEQUENTIAL:
6975 if ((pre_heat_size = cluster_size) == 0)
6976 pre_heat_size = sequential_run + PAGE_SIZE;
6977 look_behind = FALSE;
6978 *io_streaming = 1;
6979
6980 break;
6981
6982 case VM_BEHAVIOR_RSEQNTL:
6983 if ((pre_heat_size = cluster_size) == 0)
6984 pre_heat_size = sequential_run + PAGE_SIZE;
6985 look_ahead = FALSE;
6986 *io_streaming = 1;
6987
6988 break;
6989
6990 }
6991 throttle_limit = (uint32_t) max_length;
6992 assert(throttle_limit == max_length);
6993
6994 if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) {
6995 if (max_length > throttle_limit)
6996 max_length = throttle_limit;
6997 }
6998 if (pre_heat_size > max_length)
6999 pre_heat_size = max_length;
7000
7001 if (behavior == VM_BEHAVIOR_DEFAULT) {
7002 if (vm_page_free_count < vm_page_throttle_limit)
7003 pre_heat_size = trunc_page(pre_heat_size / 8);
7004 else if (vm_page_free_count < vm_page_free_target)
7005 pre_heat_size = trunc_page(pre_heat_size / 2);
7006
7007 if (pre_heat_size <= PAGE_SIZE)
7008 goto out;
7009 }
7010 if (look_ahead == TRUE) {
7011 if (look_behind == TRUE) {
7012 /*
7013 * if we get here its due to a random access...
7014 * so we want to center the original fault address
7015 * within the cluster we will issue... make sure
7016 * to calculate 'head_size' as a multiple of PAGE_SIZE...
7017 * 'pre_heat_size' is a multiple of PAGE_SIZE but not
7018 * necessarily an even number of pages so we need to truncate
7019 * the result to a PAGE_SIZE boundary
7020 */
7021 head_size = trunc_page(pre_heat_size / 2);
7022
7023 if (target_start > head_size)
7024 target_start -= head_size;
7025 else
7026 target_start = 0;
7027
7028 /*
7029 * 'target_start' at this point represents the beginning offset
7030 * of the cluster we are considering... 'orig_start' will be in
7031 * the center of this cluster if we didn't have to clip the start
7032 * due to running into the start of the file
7033 */
7034 }
7035 if ((target_start + pre_heat_size) > object_size)
7036 pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start));
7037 /*
7038 * at this point caclulate the number of pages beyond the original fault
7039 * address that we want to consider... this is guaranteed not to extend beyond
7040 * the current EOF...
7041 */
7042 assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start));
7043 tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE;
7044 } else {
7045 if (pre_heat_size > target_start)
7046 pre_heat_size = (vm_size_t) target_start; /* XXX: 32-bit vs 64-bit ? Joe ? */
7047 tail_size = 0;
7048 }
7049 assert( !(target_start & PAGE_MASK_64));
7050 assert( !(pre_heat_size & PAGE_MASK));
7051
7052 pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
7053
7054 if (pre_heat_size <= PAGE_SIZE)
7055 goto out;
7056
7057 if (look_behind == TRUE) {
7058 /*
7059 * take a look at the pages before the original
7060 * faulting offset... recalculate this in case
7061 * we had to clip 'pre_heat_size' above to keep
7062 * from running past the EOF.
7063 */
7064 head_size = pre_heat_size - tail_size - PAGE_SIZE;
7065
7066 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
7067 /*
7068 * don't poke below the lowest offset
7069 */
7070 if (offset < fault_info->lo_offset)
7071 break;
7072 /*
7073 * for external objects and internal objects w/o an existence map
7074 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
7075 */
7076 #if MACH_PAGEMAP
7077 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
7078 /*
7079 * we know for a fact that the pager can't provide the page
7080 * so don't include it or any pages beyond it in this cluster
7081 */
7082 break;
7083 }
7084 #endif
7085 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
7086 /*
7087 * don't bridge resident pages
7088 */
7089 break;
7090 }
7091 *start = offset;
7092 *length += PAGE_SIZE;
7093 }
7094 }
7095 if (look_ahead == TRUE) {
7096 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
7097 /*
7098 * don't poke above the highest offset
7099 */
7100 if (offset >= fault_info->hi_offset)
7101 break;
7102 assert(offset < object_size);
7103
7104 /*
7105 * for external objects and internal objects w/o an existence map
7106 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
7107 */
7108 #if MACH_PAGEMAP
7109 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
7110 /*
7111 * we know for a fact that the pager can't provide the page
7112 * so don't include it or any pages beyond it in this cluster
7113 */
7114 break;
7115 }
7116 #endif
7117 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
7118 /*
7119 * don't bridge resident pages
7120 */
7121 break;
7122 }
7123 *length += PAGE_SIZE;
7124 }
7125 }
7126 out:
7127 if (*length > max_length)
7128 *length = max_length;
7129
7130 pre_heat_cluster[*length / PAGE_SIZE]++;
7131
7132 vm_object_unlock(object);
7133 }
7134
7135
7136 /*
7137 * Allow manipulation of individual page state. This is actually part of
7138 * the UPL regimen but takes place on the VM object rather than on a UPL
7139 */
7140
7141 kern_return_t
7142 vm_object_page_op(
7143 vm_object_t object,
7144 vm_object_offset_t offset,
7145 int ops,
7146 ppnum_t *phys_entry,
7147 int *flags)
7148 {
7149 vm_page_t dst_page;
7150
7151 vm_object_lock(object);
7152
7153 if(ops & UPL_POP_PHYSICAL) {
7154 if(object->phys_contiguous) {
7155 if (phys_entry) {
7156 *phys_entry = (ppnum_t)
7157 (object->shadow_offset >> PAGE_SHIFT);
7158 }
7159 vm_object_unlock(object);
7160 return KERN_SUCCESS;
7161 } else {
7162 vm_object_unlock(object);
7163 return KERN_INVALID_OBJECT;
7164 }
7165 }
7166 if(object->phys_contiguous) {
7167 vm_object_unlock(object);
7168 return KERN_INVALID_OBJECT;
7169 }
7170
7171 while(TRUE) {
7172 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
7173 vm_object_unlock(object);
7174 return KERN_FAILURE;
7175 }
7176
7177 /* Sync up on getting the busy bit */
7178 if((dst_page->busy || dst_page->cleaning) &&
7179 (((ops & UPL_POP_SET) &&
7180 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
7181 /* someone else is playing with the page, we will */
7182 /* have to wait */
7183 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
7184 continue;
7185 }
7186
7187 if (ops & UPL_POP_DUMP) {
7188 if (dst_page->pmapped == TRUE)
7189 pmap_disconnect(dst_page->phys_page);
7190
7191 VM_PAGE_FREE(dst_page);
7192 break;
7193 }
7194
7195 if (flags) {
7196 *flags = 0;
7197
7198 /* Get the condition of flags before requested ops */
7199 /* are undertaken */
7200
7201 if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
7202 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
7203 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
7204 if(dst_page->absent) *flags |= UPL_POP_ABSENT;
7205 if(dst_page->busy) *flags |= UPL_POP_BUSY;
7206 }
7207
7208 /* The caller should have made a call either contingent with */
7209 /* or prior to this call to set UPL_POP_BUSY */
7210 if(ops & UPL_POP_SET) {
7211 /* The protection granted with this assert will */
7212 /* not be complete. If the caller violates the */
7213 /* convention and attempts to change page state */
7214 /* without first setting busy we may not see it */
7215 /* because the page may already be busy. However */
7216 /* if such violations occur we will assert sooner */
7217 /* or later. */
7218 assert(dst_page->busy || (ops & UPL_POP_BUSY));
7219 if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
7220 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
7221 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
7222 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
7223 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
7224 }
7225
7226 if(ops & UPL_POP_CLR) {
7227 assert(dst_page->busy);
7228 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
7229 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
7230 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
7231 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
7232 if (ops & UPL_POP_BUSY) {
7233 dst_page->busy = FALSE;
7234 PAGE_WAKEUP(dst_page);
7235 }
7236 }
7237
7238 if (dst_page->encrypted) {
7239 /*
7240 * ENCRYPTED SWAP:
7241 * We need to decrypt this encrypted page before the
7242 * caller can access its contents.
7243 * But if the caller really wants to access the page's
7244 * contents, they have to keep the page "busy".
7245 * Otherwise, the page could get recycled or re-encrypted
7246 * at any time.
7247 */
7248 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
7249 dst_page->busy) {
7250 /*
7251 * The page is stable enough to be accessed by
7252 * the caller, so make sure its contents are
7253 * not encrypted.
7254 */
7255 vm_page_decrypt(dst_page, 0);
7256 } else {
7257 /*
7258 * The page is not busy, so don't bother
7259 * decrypting it, since anything could
7260 * happen to it between now and when the
7261 * caller wants to access it.
7262 * We should not give the caller access
7263 * to this page.
7264 */
7265 assert(!phys_entry);
7266 }
7267 }
7268
7269 if (phys_entry) {
7270 /*
7271 * The physical page number will remain valid
7272 * only if the page is kept busy.
7273 * ENCRYPTED SWAP: make sure we don't let the
7274 * caller access an encrypted page.
7275 */
7276 assert(dst_page->busy);
7277 assert(!dst_page->encrypted);
7278 *phys_entry = dst_page->phys_page;
7279 }
7280
7281 break;
7282 }
7283
7284 vm_object_unlock(object);
7285 return KERN_SUCCESS;
7286
7287 }
7288
7289 /*
7290 * vm_object_range_op offers performance enhancement over
7291 * vm_object_page_op for page_op functions which do not require page
7292 * level state to be returned from the call. Page_op was created to provide
7293 * a low-cost alternative to page manipulation via UPLs when only a single
7294 * page was involved. The range_op call establishes the ability in the _op
7295 * family of functions to work on multiple pages where the lack of page level
7296 * state handling allows the caller to avoid the overhead of the upl structures.
7297 */
7298
7299 kern_return_t
7300 vm_object_range_op(
7301 vm_object_t object,
7302 vm_object_offset_t offset_beg,
7303 vm_object_offset_t offset_end,
7304 int ops,
7305 uint32_t *range)
7306 {
7307 vm_object_offset_t offset;
7308 vm_page_t dst_page;
7309
7310 if (offset_end - offset_beg > (uint32_t) -1) {
7311 /* range is too big and would overflow "*range" */
7312 return KERN_INVALID_ARGUMENT;
7313 }
7314 if (object->resident_page_count == 0) {
7315 if (range) {
7316 if (ops & UPL_ROP_PRESENT) {
7317 *range = 0;
7318 } else {
7319 *range = (uint32_t) (offset_end - offset_beg);
7320 assert(*range == (offset_end - offset_beg));
7321 }
7322 }
7323 return KERN_SUCCESS;
7324 }
7325 vm_object_lock(object);
7326
7327 if (object->phys_contiguous) {
7328 vm_object_unlock(object);
7329 return KERN_INVALID_OBJECT;
7330 }
7331
7332 offset = offset_beg & ~PAGE_MASK_64;
7333
7334 while (offset < offset_end) {
7335 dst_page = vm_page_lookup(object, offset);
7336 if (dst_page != VM_PAGE_NULL) {
7337 if (ops & UPL_ROP_DUMP) {
7338 if (dst_page->busy || dst_page->cleaning) {
7339 /*
7340 * someone else is playing with the
7341 * page, we will have to wait
7342 */
7343 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
7344 /*
7345 * need to relook the page up since it's
7346 * state may have changed while we slept
7347 * it might even belong to a different object
7348 * at this point
7349 */
7350 continue;
7351 }
7352 if (dst_page->pmapped == TRUE)
7353 pmap_disconnect(dst_page->phys_page);
7354
7355 VM_PAGE_FREE(dst_page);
7356
7357 } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent)
7358 break;
7359 } else if (ops & UPL_ROP_PRESENT)
7360 break;
7361
7362 offset += PAGE_SIZE;
7363 }
7364 vm_object_unlock(object);
7365
7366 if (range) {
7367 if (offset > offset_end)
7368 offset = offset_end;
7369 if(offset > offset_beg) {
7370 *range = (uint32_t) (offset - offset_beg);
7371 assert(*range == (offset - offset_beg));
7372 } else {
7373 *range = 0;
7374 }
7375 }
7376 return KERN_SUCCESS;
7377 }
7378
7379
7380 uint32_t scan_object_collision = 0;
7381
7382 void
7383 vm_object_lock(vm_object_t object)
7384 {
7385 if (object == vm_pageout_scan_wants_object) {
7386 scan_object_collision++;
7387 mutex_pause(2);
7388 }
7389 lck_rw_lock_exclusive(&object->Lock);
7390 }
7391
7392 boolean_t
7393 vm_object_lock_avoid(vm_object_t object)
7394 {
7395 if (object == vm_pageout_scan_wants_object) {
7396 scan_object_collision++;
7397 return TRUE;
7398 }
7399 return FALSE;
7400 }
7401
7402 boolean_t
7403 _vm_object_lock_try(vm_object_t object)
7404 {
7405 return (lck_rw_try_lock_exclusive(&object->Lock));
7406 }
7407
7408 boolean_t
7409 vm_object_lock_try(vm_object_t object)
7410 {
7411 if (vm_object_lock_avoid(object)) {
7412 mutex_pause(2);
7413 }
7414 return _vm_object_lock_try(object);
7415 }
7416 void
7417 vm_object_lock_shared(vm_object_t object)
7418 {
7419 if (vm_object_lock_avoid(object)) {
7420 mutex_pause(2);
7421 }
7422 lck_rw_lock_shared(&object->Lock);
7423 }
7424
7425 boolean_t
7426 vm_object_lock_try_shared(vm_object_t object)
7427 {
7428 if (vm_object_lock_avoid(object)) {
7429 mutex_pause(2);
7430 }
7431 return (lck_rw_try_lock_shared(&object->Lock));
7432 }