git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2007 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: vm/vm_object.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	61	*
	62	* Virtual memory object module.
	63	*/
	64
	65	#include <debug.h>
	66	#include <mach_pagemap.h>
	67	#include <task_swapper.h>
	68
	69	#include <mach/mach_types.h>
	70	#include <mach/memory_object.h>
	71	#include <mach/memory_object_default.h>
	72	#include <mach/memory_object_control_server.h>
	73	#include <mach/vm_param.h>
	74
	75	#include <mach/sdt.h>
	76
	77	#include <ipc/ipc_types.h>
	78	#include <ipc/ipc_port.h>
	79
	80	#include <kern/kern_types.h>
	81	#include <kern/assert.h>
	82	#include <kern/lock.h>
	83	#include <kern/queue.h>
	84	#include <kern/xpr.h>
	85	#include <kern/kalloc.h>
	86	#include <kern/zalloc.h>
	87	#include <kern/host.h>
	88	#include <kern/host_statistics.h>
	89	#include <kern/processor.h>
	90	#include <kern/misc_protos.h>
	91
	92	#include <vm/memory_object.h>
	93	#include <vm/vm_fault.h>
	94	#include <vm/vm_map.h>
	95	#include <vm/vm_object.h>
	96	#include <vm/vm_page.h>
	97	#include <vm/vm_pageout.h>
	98	#include <vm/vm_protos.h>
	99	#include <vm/vm_purgeable_internal.h>
	100
	101	/*
	102	* Virtual memory objects maintain the actual data
	103	* associated with allocated virtual memory. A given
	104	* page of memory exists within exactly one object.
	105	*
	106	* An object is only deallocated when all "references"
	107	* are given up.
	108	*
	109	* Associated with each object is a list of all resident
	110	* memory pages belonging to that object; this list is
	111	* maintained by the "vm_page" module, but locked by the object's
	112	* lock.
	113	*
	114	* Each object also records the memory object reference
	115	* that is used by the kernel to request and write
	116	* back data (the memory object, field "pager"), etc...
	117	*
	118	* Virtual memory objects are allocated to provide
	119	* zero-filled memory (vm_allocate) or map a user-defined
	120	* memory object into a virtual address space (vm_map).
	121	*
	122	* Virtual memory objects that refer to a user-defined
	123	* memory object are called "permanent", because all changes
	124	* made in virtual memory are reflected back to the
	125	* memory manager, which may then store it permanently.
	126	* Other virtual memory objects are called "temporary",
	127	* meaning that changes need be written back only when
	128	* necessary to reclaim pages, and that storage associated
	129	* with the object can be discarded once it is no longer
	130	* mapped.
	131	*
	132	* A permanent memory object may be mapped into more
	133	* than one virtual address space. Moreover, two threads
	134	* may attempt to make the first mapping of a memory
	135	* object concurrently. Only one thread is allowed to
	136	* complete this mapping; all others wait for the
	137	* "pager_initialized" field is asserted, indicating
	138	* that the first thread has initialized all of the
	139	* necessary fields in the virtual memory object structure.
	140	*
	141	* The kernel relies on a default memory manager to
	142	* provide backing storage for the zero-filled virtual
	143	* memory objects. The pager memory objects associated
	144	* with these temporary virtual memory objects are only
	145	* requested from the default memory manager when it
	146	* becomes necessary. Virtual memory objects
	147	* that depend on the default memory manager are called
	148	* "internal". The "pager_created" field is provided to
	149	* indicate whether these ports have ever been allocated.
	150	*
	151	* The kernel may also create virtual memory objects to
	152	* hold changed pages after a copy-on-write operation.
	153	* In this case, the virtual memory object (and its
	154	* backing storage -- its memory object) only contain
	155	* those pages that have been changed. The "shadow"
	156	* field refers to the virtual memory object that contains
	157	* the remainder of the contents. The "shadow_offset"
	158	* field indicates where in the "shadow" these contents begin.
	159	* The "copy" field refers to a virtual memory object
	160	* to which changed pages must be copied before changing
	161	* this object, in order to implement another form
	162	* of copy-on-write optimization.
	163	*
	164	* The virtual memory object structure also records
	165	* the attributes associated with its memory object.
	166	* The "pager_ready", "can_persist" and "copy_strategy"
	167	* fields represent those attributes. The "cached_list"
	168	* field is used in the implementation of the persistence
	169	* attribute.
	170	*
	171	* ZZZ Continue this comment.
	172	*/
	173
	174	/* Forward declarations for internal functions. */
	175	static kern_return_t vm_object_terminate(
	176	vm_object_t object);
	177
	178	extern void vm_object_remove(
	179	vm_object_t object);
	180
	181	static kern_return_t vm_object_copy_call(
	182	vm_object_t src_object,
	183	vm_object_offset_t src_offset,
	184	vm_object_size_t size,
	185	vm_object_t *_result_object);
	186
	187	static void vm_object_do_collapse(
	188	vm_object_t object,
	189	vm_object_t backing_object);
	190
	191	static void vm_object_do_bypass(
	192	vm_object_t object,
	193	vm_object_t backing_object);
	194
	195	static void vm_object_release_pager(
	196	memory_object_t pager,
	197	boolean_t hashed);
	198
	199	static zone_t vm_object_zone; /* vm backing store zone */
	200
	201	/*
	202	* All wired-down kernel memory belongs to a single virtual
	203	* memory object (kernel_object) to avoid wasting data structures.
	204	*/
	205	static struct vm_object kernel_object_store;
	206	vm_object_t kernel_object;
	207
	208
	209	/*
	210	* The submap object is used as a placeholder for vm_map_submap
	211	* operations. The object is declared in vm_map.c because it
	212	* is exported by the vm_map module. The storage is declared
	213	* here because it must be initialized here.
	214	*/
	215	static struct vm_object vm_submap_object_store;
	216
	217	/*
	218	* Virtual memory objects are initialized from
	219	* a template (see vm_object_allocate).
	220	*
	221	* When adding a new field to the virtual memory
	222	* object structure, be sure to add initialization
	223	* (see _vm_object_allocate()).
	224	*/
	225	static struct vm_object vm_object_template;
	226
	227	unsigned int vm_page_purged_wired = 0;
	228	unsigned int vm_page_purged_busy = 0;
	229	unsigned int vm_page_purged_others = 0;
	230
	231	#if VM_OBJECT_CACHE
	232	/*
	233	* Virtual memory objects that are not referenced by
	234	* any address maps, but that are allowed to persist
	235	* (an attribute specified by the associated memory manager),
	236	* are kept in a queue (vm_object_cached_list).
	237	*
	238	* When an object from this queue is referenced again,
	239	* for example to make another address space mapping,
	240	* it must be removed from the queue. That is, the
	241	* queue contains only objects with zero references.
	242	*
	243	* The kernel may choose to terminate objects from this
	244	* queue in order to reclaim storage. The current policy
	245	* is to permit a fixed maximum number of unreferenced
	246	* objects (vm_object_cached_max).
	247	*
	248	* A spin lock (accessed by routines
	249	* vm_object_cache_{lock,lock_try,unlock}) governs the
	250	* object cache. It must be held when objects are
	251	* added to or removed from the cache (in vm_object_terminate).
	252	* The routines that acquire a reference to a virtual
	253	* memory object based on one of the memory object ports
	254	* must also lock the cache.
	255	*
	256	* Ideally, the object cache should be more isolated
	257	* from the reference mechanism, so that the lock need
	258	* not be held to make simple references.
	259	*/
	260	static vm_object_t vm_object_cache_trim(
	261	boolean_t called_from_vm_object_deallocate);
	262
	263	static void vm_object_deactivate_all_pages(
	264	vm_object_t object);
	265
	266	static int vm_object_cached_high; /* highest # cached objects */
	267	static int vm_object_cached_max = 512; /* may be patched*/
	268
	269	#define vm_object_cache_lock() \
	270	lck_mtx_lock(&vm_object_cached_lock_data)
	271	#define vm_object_cache_lock_try() \
	272	lck_mtx_try_lock(&vm_object_cached_lock_data)
	273
	274	#endif /* VM_OBJECT_CACHE */
	275
	276	static queue_head_t vm_object_cached_list;
	277	static uint32_t vm_object_cache_pages_freed = 0;
	278	static uint32_t vm_object_cache_pages_moved = 0;
	279	static uint32_t vm_object_cache_pages_skipped = 0;
	280	static uint32_t vm_object_cache_adds = 0;
	281	static uint32_t vm_object_cached_count = 0;
	282	static lck_mtx_t vm_object_cached_lock_data;
	283	static lck_mtx_ext_t vm_object_cached_lock_data_ext;
	284
	285	static uint32_t vm_object_page_grab_failed = 0;
	286	static uint32_t vm_object_page_grab_skipped = 0;
	287	static uint32_t vm_object_page_grab_returned = 0;
	288	static uint32_t vm_object_page_grab_pmapped = 0;
	289	static uint32_t vm_object_page_grab_reactivations = 0;
	290
	291	#define vm_object_cache_lock_spin() \
	292	lck_mtx_lock_spin(&vm_object_cached_lock_data)
	293	#define vm_object_cache_unlock() \
	294	lck_mtx_unlock(&vm_object_cached_lock_data)
	295
	296	static void vm_object_cache_remove_locked(vm_object_t);
	297
	298
	299	#define VM_OBJECT_HASH_COUNT 1024
	300	#define VM_OBJECT_HASH_LOCK_COUNT 512
	301
	302	static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT];
	303	static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT];
	304
	305	static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
	306	static struct zone *vm_object_hash_zone;
	307
	308	struct vm_object_hash_entry {
	309	queue_chain_t hash_link; /* hash chain link */
	310	memory_object_t pager; /* pager we represent */
	311	vm_object_t object; /* corresponding object */
	312	boolean_t waiting; /* someone waiting for
	313	* termination */
	314	};
	315
	316	typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
	317	#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
	318
	319	#define VM_OBJECT_HASH_SHIFT 5
	320	#define vm_object_hash(pager) \
	321	((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT))
	322
	323	#define vm_object_lock_hash(pager) \
	324	((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT))
	325
	326	void vm_object_hash_entry_free(
	327	vm_object_hash_entry_t entry);
	328
	329	static void vm_object_reap(vm_object_t object);
	330	static void vm_object_reap_async(vm_object_t object);
	331	static void vm_object_reaper_thread(void);
	332
	333	static lck_mtx_t vm_object_reaper_lock_data;
	334	static lck_mtx_ext_t vm_object_reaper_lock_data_ext;
	335
	336	static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */
	337	unsigned int vm_object_reap_count = 0;
	338	unsigned int vm_object_reap_count_async = 0;
	339
	340	#define vm_object_reaper_lock() \
	341	lck_mtx_lock(&vm_object_reaper_lock_data)
	342	#define vm_object_reaper_lock_spin() \
	343	lck_mtx_lock_spin(&vm_object_reaper_lock_data)
	344	#define vm_object_reaper_unlock() \
	345	lck_mtx_unlock(&vm_object_reaper_lock_data)
	346
	347	#if 0
	348	#undef KERNEL_DEBUG
	349	#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
	350	#endif
	351
	352
	353	static lck_mtx_t *
	354	vm_object_hash_lock_spin(
	355	memory_object_t pager)
	356	{
	357	int index;
	358
	359	index = vm_object_lock_hash(pager);
	360
	361	lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]);
	362
	363	return (&vm_object_hashed_lock_data[index]);
	364	}
	365
	366	static void
	367	vm_object_hash_unlock(lck_mtx_t *lck)
	368	{
	369	lck_mtx_unlock(lck);
	370	}
	371
	372
	373	/*
	374	* vm_object_hash_lookup looks up a pager in the hashtable
	375	* and returns the corresponding entry, with optional removal.
	376	*/
	377	static vm_object_hash_entry_t
	378	vm_object_hash_lookup(
	379	memory_object_t pager,
	380	boolean_t remove_entry)
	381	{
	382	queue_t bucket;
	383	vm_object_hash_entry_t entry;
	384
	385	bucket = &vm_object_hashtable[vm_object_hash(pager)];
	386
	387	entry = (vm_object_hash_entry_t)queue_first(bucket);
	388	while (!queue_end(bucket, (queue_entry_t)entry)) {
	389	if (entry->pager == pager) {
	390	if (remove_entry) {
	391	queue_remove(bucket, entry,
	392	vm_object_hash_entry_t, hash_link);
	393	}
	394	return(entry);
	395	}
	396	entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
	397	}
	398	return(VM_OBJECT_HASH_ENTRY_NULL);
	399	}
	400
	401	/*
	402	* vm_object_hash_enter enters the specified
	403	* pager / cache object association in the hashtable.
	404	*/
	405
	406	static void
	407	vm_object_hash_insert(
	408	vm_object_hash_entry_t entry,
	409	vm_object_t object)
	410	{
	411	queue_t bucket;
	412
	413	bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
	414
	415	queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
	416
	417	entry->object = object;
	418	object->hashed = TRUE;
	419	}
	420
	421	static vm_object_hash_entry_t
	422	vm_object_hash_entry_alloc(
	423	memory_object_t pager)
	424	{
	425	vm_object_hash_entry_t entry;
	426
	427	entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
	428	entry->pager = pager;
	429	entry->object = VM_OBJECT_NULL;
	430	entry->waiting = FALSE;
	431
	432	return(entry);
	433	}
	434
	435	void
	436	vm_object_hash_entry_free(
	437	vm_object_hash_entry_t entry)
	438	{
	439	zfree(vm_object_hash_zone, entry);
	440	}
	441
	442	/*
	443	* vm_object_allocate:
	444	*
	445	* Returns a new object with the given size.
	446	*/
	447
	448	__private_extern__ void
	449	_vm_object_allocate(
	450	vm_object_size_t size,
	451	vm_object_t object)
	452	{
	453	XPR(XPR_VM_OBJECT,
	454	"vm_object_allocate, object 0x%X size 0x%X\n",
	455	object, size, 0,0,0);
	456
	457	*object = vm_object_template;
	458	queue_init(&object->memq);
	459	queue_init(&object->msr_q);
	460	#if UPL_DEBUG
	461	queue_init(&object->uplq);
	462	#endif /* UPL_DEBUG */
	463	vm_object_lock_init(object);
	464	object->vo_size = size;
	465	}
	466
	467	__private_extern__ vm_object_t
	468	vm_object_allocate(
	469	vm_object_size_t size)
	470	{
	471	register vm_object_t object;
	472
	473	object = (vm_object_t) zalloc(vm_object_zone);
	474
	475	// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
	476
	477	if (object != VM_OBJECT_NULL)
	478	_vm_object_allocate(size, object);
	479
	480	return object;
	481	}
	482
	483
	484	lck_grp_t vm_object_lck_grp;
	485	lck_grp_t vm_object_cache_lck_grp;
	486	lck_grp_attr_t vm_object_lck_grp_attr;
	487	lck_attr_t vm_object_lck_attr;
	488	lck_attr_t kernel_object_lck_attr;
	489
	490	/*
	491	* vm_object_bootstrap:
	492	*
	493	* Initialize the VM objects module.
	494	*/
	495	__private_extern__ void
	496	vm_object_bootstrap(void)
	497	{
	498	register int i;
	499
	500	vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
	501	round_page(512*1024),
	502	round_page(12*1024),
	503	"vm objects");
	504	zone_change(vm_object_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
	505	zone_change(vm_object_zone, Z_NOENCRYPT, TRUE);
	506
	507	vm_object_init_lck_grp();
	508
	509	queue_init(&vm_object_cached_list);
	510
	511	lck_mtx_init_ext(&vm_object_cached_lock_data,
	512	&vm_object_cached_lock_data_ext,
	513	&vm_object_cache_lck_grp,
	514	&vm_object_lck_attr);
	515
	516	queue_init(&vm_object_reaper_queue);
	517
	518	for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) {
	519	lck_mtx_init_ext(&vm_object_hashed_lock_data[i],
	520	&vm_object_hashed_lock_data_ext[i],
	521	&vm_object_lck_grp,
	522	&vm_object_lck_attr);
	523	}
	524	lck_mtx_init_ext(&vm_object_reaper_lock_data,
	525	&vm_object_reaper_lock_data_ext,
	526	&vm_object_lck_grp,
	527	&vm_object_lck_attr);
	528
	529	vm_object_hash_zone =
	530	zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
	531	round_page(512*1024),
	532	round_page(12*1024),
	533	"vm object hash entries");
	534	zone_change(vm_object_hash_zone, Z_CALLERACCT, FALSE);
	535	zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE);
	536
	537	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
	538	queue_init(&vm_object_hashtable[i]);
	539
	540
	541	/*
	542	* Fill in a template object, for quick initialization
	543	*/
	544
	545	/* memq; Lock; init after allocation */
	546	vm_object_template.memq.prev = NULL;
	547	vm_object_template.memq.next = NULL;
	548	#if 0
	549	/*
	550	* We can't call vm_object_lock_init() here because that will
	551	* allocate some memory and VM is not fully initialized yet.
	552	* The lock will be initialized for each allocated object in
	553	* _vm_object_allocate(), so we don't need to initialize it in
	554	* the vm_object_template.
	555	*/
	556	vm_object_lock_init(&vm_object_template);
	557	#endif
	558	vm_object_template.vo_size = 0;
	559	vm_object_template.memq_hint = VM_PAGE_NULL;
	560	vm_object_template.ref_count = 1;
	561	#if TASK_SWAPPER
	562	vm_object_template.res_count = 1;
	563	#endif /* TASK_SWAPPER */
	564	vm_object_template.resident_page_count = 0;
	565	vm_object_template.wired_page_count = 0;
	566	vm_object_template.reusable_page_count = 0;
	567	vm_object_template.copy = VM_OBJECT_NULL;
	568	vm_object_template.shadow = VM_OBJECT_NULL;
	569	vm_object_template.vo_shadow_offset = (vm_object_offset_t) 0;
	570	vm_object_template.pager = MEMORY_OBJECT_NULL;
	571	vm_object_template.paging_offset = 0;
	572	vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
	573	vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
	574	vm_object_template.paging_in_progress = 0;
	575	vm_object_template.activity_in_progress = 0;
	576
	577	/* Begin bitfields */
	578	vm_object_template.all_wanted = 0; /* all bits FALSE */
	579	vm_object_template.pager_created = FALSE;
	580	vm_object_template.pager_initialized = FALSE;
	581	vm_object_template.pager_ready = FALSE;
	582	vm_object_template.pager_trusted = FALSE;
	583	vm_object_template.can_persist = FALSE;
	584	vm_object_template.internal = TRUE;
	585	vm_object_template.temporary = TRUE;
	586	vm_object_template.private = FALSE;
	587	vm_object_template.pageout = FALSE;
	588	vm_object_template.alive = TRUE;
	589	vm_object_template.purgable = VM_PURGABLE_DENY;
	590	vm_object_template.shadowed = FALSE;
	591	vm_object_template.silent_overwrite = FALSE;
	592	vm_object_template.advisory_pageout = FALSE;
	593	vm_object_template.true_share = FALSE;
	594	vm_object_template.terminating = FALSE;
	595	vm_object_template.named = FALSE;
	596	vm_object_template.shadow_severed = FALSE;
	597	vm_object_template.phys_contiguous = FALSE;
	598	vm_object_template.nophyscache = FALSE;
	599	/* End bitfields */
	600
	601	vm_object_template.cached_list.prev = NULL;
	602	vm_object_template.cached_list.next = NULL;
	603	vm_object_template.msr_q.prev = NULL;
	604	vm_object_template.msr_q.next = NULL;
	605
	606	vm_object_template.last_alloc = (vm_object_offset_t) 0;
	607	vm_object_template.sequential = (vm_object_offset_t) 0;
	608	vm_object_template.pages_created = 0;
	609	vm_object_template.pages_used = 0;
	610	vm_object_template.scan_collisions = 0;
	611
	612	#if MACH_PAGEMAP
	613	vm_object_template.existence_map = VM_EXTERNAL_NULL;
	614	#endif /* MACH_PAGEMAP */
	615	vm_object_template.cow_hint = ~(vm_offset_t)0;
	616	#if MACH_ASSERT
	617	vm_object_template.paging_object = VM_OBJECT_NULL;
	618	#endif /* MACH_ASSERT */
	619
	620	/* cache bitfields */
	621	vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT;
	622	vm_object_template.set_cache_attr = FALSE;
	623	vm_object_template.code_signed = FALSE;
	624	vm_object_template.hashed = FALSE;
	625	vm_object_template.transposed = FALSE;
	626	vm_object_template.mapping_in_progress = FALSE;
	627	vm_object_template.volatile_empty = FALSE;
	628	vm_object_template.volatile_fault = FALSE;
	629	vm_object_template.all_reusable = FALSE;
	630	vm_object_template.blocked_access = FALSE;
	631	vm_object_template.__object2_unused_bits = 0;
	632	#if UPL_DEBUG
	633	vm_object_template.uplq.prev = NULL;
	634	vm_object_template.uplq.next = NULL;
	635	#endif /* UPL_DEBUG */
	636	#ifdef VM_PIP_DEBUG
	637	bzero(&vm_object_template.pip_holders,
	638	sizeof (vm_object_template.pip_holders));
	639	#endif /* VM_PIP_DEBUG */
	640
	641	vm_object_template.objq.next=NULL;
	642	vm_object_template.objq.prev=NULL;
	643
	644	vm_object_template.vo_cache_ts = 0;
	645
	646	/*
	647	* Initialize the "kernel object"
	648	*/
	649
	650	kernel_object = &kernel_object_store;
	651
	652	/*
	653	* Note that in the following size specifications, we need to add 1 because
	654	* VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
	655	*/
	656
	657	#ifdef ppc
	658	_vm_object_allocate(vm_last_addr + 1,
	659	kernel_object);
	660	#else
	661	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
	662	kernel_object);
	663	#endif
	664	kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
	665
	666	/*
	667	* Initialize the "submap object". Make it as large as the
	668	* kernel object so that no limit is imposed on submap sizes.
	669	*/
	670
	671	vm_submap_object = &vm_submap_object_store;
	672	#ifdef ppc
	673	_vm_object_allocate(vm_last_addr + 1,
	674	vm_submap_object);
	675	#else
	676	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
	677	vm_submap_object);
	678	#endif
	679	vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
	680
	681	/*
	682	* Create an "extra" reference to this object so that we never
	683	* try to deallocate it; zfree doesn't like to be called with
	684	* non-zone memory.
	685	*/
	686	vm_object_reference(vm_submap_object);
	687
	688	#if MACH_PAGEMAP
	689	vm_external_module_initialize();
	690	#endif /* MACH_PAGEMAP */
	691	}
	692
	693	void
	694	vm_object_reaper_init(void)
	695	{
	696	kern_return_t kr;
	697	thread_t thread;
	698
	699	kr = kernel_thread_start_priority(
	700	(thread_continue_t) vm_object_reaper_thread,
	701	NULL,
	702	BASEPRI_PREEMPT - 1,
	703	&thread);
	704	if (kr != KERN_SUCCESS) {
	705	panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
	706	}
	707	thread_deallocate(thread);
	708	}
	709
	710	__private_extern__ void
	711	vm_object_init(void)
	712	{
	713	/*
	714	* Finish initializing the kernel object.
	715	*/
	716	}
	717
	718
	719	__private_extern__ void
	720	vm_object_init_lck_grp(void)
	721	{
	722	/*
	723	* initialze the vm_object lock world
	724	*/
	725	lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
	726	lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
	727	lck_grp_init(&vm_object_cache_lck_grp, "vm_object_cache", &vm_object_lck_grp_attr);
	728	lck_attr_setdefault(&vm_object_lck_attr);
	729	lck_attr_setdefault(&kernel_object_lck_attr);
	730	lck_attr_cleardebug(&kernel_object_lck_attr);
	731	}
	732
	733	#if VM_OBJECT_CACHE
	734	#define MIGHT_NOT_CACHE_SHADOWS 1
	735	#if MIGHT_NOT_CACHE_SHADOWS
	736	static int cache_shadows = TRUE;
	737	#endif /* MIGHT_NOT_CACHE_SHADOWS */
	738	#endif
	739
	740	/*
	741	* vm_object_deallocate:
	742	*
	743	* Release a reference to the specified object,
	744	* gained either through a vm_object_allocate
	745	* or a vm_object_reference call. When all references
	746	* are gone, storage associated with this object
	747	* may be relinquished.
	748	*
	749	* No object may be locked.
	750	*/
	751	unsigned long vm_object_deallocate_shared_successes = 0;
	752	unsigned long vm_object_deallocate_shared_failures = 0;
	753	unsigned long vm_object_deallocate_shared_swap_failures = 0;
	754	__private_extern__ void
	755	vm_object_deallocate(
	756	register vm_object_t object)
	757	{
	758	#if VM_OBJECT_CACHE
	759	boolean_t retry_cache_trim = FALSE;
	760	uint32_t try_failed_count = 0;
	761	#endif
	762	vm_object_t shadow = VM_OBJECT_NULL;
	763
	764	// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
	765	// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
	766
	767	if (object == VM_OBJECT_NULL)
	768	return;
	769
	770	if (object == kernel_object) {
	771	vm_object_lock_shared(object);
	772
	773	OSAddAtomic(-1, &object->ref_count);
	774
	775	if (object->ref_count == 0) {
	776	panic("vm_object_deallocate: losing kernel_object\n");
	777	}
	778	vm_object_unlock(object);
	779	return;
	780	}
	781
	782	if (object->ref_count > 2 \|\|
	783	(!object->named && object->ref_count > 1)) {
	784	UInt32 original_ref_count;
	785	volatile UInt32 *ref_count_p;
	786	Boolean atomic_swap;
	787
	788	/*
	789	* The object currently looks like it is not being
	790	* kept alive solely by the reference we're about to release.
	791	* Let's try and release our reference without taking
	792	* all the locks we would need if we had to terminate the
	793	* object (cache lock + exclusive object lock).
	794	* Lock the object "shared" to make sure we don't race with
	795	* anyone holding it "exclusive".
	796	*/
	797	vm_object_lock_shared(object);
	798	ref_count_p = (volatile UInt32 *) &object->ref_count;
	799	original_ref_count = object->ref_count;
	800	/*
	801	* Test again as "ref_count" could have changed.
	802	* "named" shouldn't change.
	803	*/
	804	if (original_ref_count > 2 \|\|
	805	(!object->named && original_ref_count > 1)) {
	806	atomic_swap = OSCompareAndSwap(
	807	original_ref_count,
	808	original_ref_count - 1,
	809	(UInt32 *) &object->ref_count);
	810	if (atomic_swap == FALSE) {
	811	vm_object_deallocate_shared_swap_failures++;
	812	}
	813
	814	} else {
	815	atomic_swap = FALSE;
	816	}
	817	vm_object_unlock(object);
	818
	819	if (atomic_swap) {
	820	/*
	821	* ref_count was updated atomically !
	822	*/
	823	vm_object_deallocate_shared_successes++;
	824	return;
	825	}
	826
	827	/*
	828	* Someone else updated the ref_count at the same
	829	* time and we lost the race. Fall back to the usual
	830	* slow but safe path...
	831	*/
	832	vm_object_deallocate_shared_failures++;
	833	}
	834
	835	while (object != VM_OBJECT_NULL) {
	836
	837	vm_object_lock(object);
	838
	839	assert(object->ref_count > 0);
	840
	841	/*
	842	* If the object has a named reference, and only
	843	* that reference would remain, inform the pager
	844	* about the last "mapping" reference going away.
	845	*/
	846	if ((object->ref_count == 2) && (object->named)) {
	847	memory_object_t pager = object->pager;
	848
	849	/* Notify the Pager that there are no */
	850	/* more mappers for this object */
	851
	852	if (pager != MEMORY_OBJECT_NULL) {
	853	vm_object_mapping_wait(object, THREAD_UNINT);
	854	vm_object_mapping_begin(object);
	855	vm_object_unlock(object);
	856
	857	memory_object_last_unmap(pager);
	858
	859	vm_object_lock(object);
	860	vm_object_mapping_end(object);
	861	}
	862	assert(object->ref_count > 0);
	863	}
	864
	865	/*
	866	* Lose the reference. If other references
	867	* remain, then we are done, unless we need
	868	* to retry a cache trim.
	869	* If it is the last reference, then keep it
	870	* until any pending initialization is completed.
	871	*/
	872
	873	/* if the object is terminating, it cannot go into */
	874	/* the cache and we obviously should not call */
	875	/* terminate again. */
	876
	877	if ((object->ref_count > 1) \|\| object->terminating) {
	878	vm_object_lock_assert_exclusive(object);
	879	object->ref_count--;
	880	vm_object_res_deallocate(object);
	881
	882	if (object->ref_count == 1 &&
	883	object->shadow != VM_OBJECT_NULL) {
	884	/*
	885	* There's only one reference left on this
	886	* VM object. We can't tell if it's a valid
	887	* one (from a mapping for example) or if this
	888	* object is just part of a possibly stale and
	889	* useless shadow chain.
	890	* We would like to try and collapse it into
	891	* its parent, but we don't have any pointers
	892	* back to this parent object.
	893	* But we can try and collapse this object with
	894	* its own shadows, in case these are useless
	895	* too...
	896	* We can't bypass this object though, since we
	897	* don't know if this last reference on it is
	898	* meaningful or not.
	899	*/
	900	vm_object_collapse(object, 0, FALSE);
	901	}
	902	vm_object_unlock(object);
	903	#if VM_OBJECT_CACHE
	904	if (retry_cache_trim &&
	905	((object = vm_object_cache_trim(TRUE)) !=
	906	VM_OBJECT_NULL)) {
	907	continue;
	908	}
	909	#endif
	910	return;
	911	}
	912
	913	/*
	914	* We have to wait for initialization
	915	* before destroying or caching the object.
	916	*/
	917
	918	if (object->pager_created && ! object->pager_initialized) {
	919	assert(! object->can_persist);
	920	vm_object_assert_wait(object,
	921	VM_OBJECT_EVENT_INITIALIZED,
	922	THREAD_UNINT);
	923	vm_object_unlock(object);
	924
	925	thread_block(THREAD_CONTINUE_NULL);
	926	continue;
	927	}
	928
	929	#if VM_OBJECT_CACHE
	930	/*
	931	* If this object can persist, then enter it in
	932	* the cache. Otherwise, terminate it.
	933	*
	934	* NOTE: Only permanent objects are cached, and
	935	* permanent objects cannot have shadows. This
	936	* affects the residence counting logic in a minor
	937	* way (can do it in-line, mostly).
	938	*/
	939
	940	if ((object->can_persist) && (object->alive)) {
	941	/*
	942	* Now it is safe to decrement reference count,
	943	* and to return if reference count is > 0.
	944	*/
	945
	946	vm_object_lock_assert_exclusive(object);
	947	if (--object->ref_count > 0) {
	948	vm_object_res_deallocate(object);
	949	vm_object_unlock(object);
	950
	951	if (retry_cache_trim &&
	952	((object = vm_object_cache_trim(TRUE)) !=
	953	VM_OBJECT_NULL)) {
	954	continue;
	955	}
	956	return;
	957	}
	958
	959	#if MIGHT_NOT_CACHE_SHADOWS
	960	/*
	961	* Remove shadow now if we don't
	962	* want to cache shadows.
	963	*/
	964	if (! cache_shadows) {
	965	shadow = object->shadow;
	966	object->shadow = VM_OBJECT_NULL;
	967	}
	968	#endif /* MIGHT_NOT_CACHE_SHADOWS */
	969
	970	/*
	971	* Enter the object onto the queue of
	972	* cached objects, and deactivate
	973	* all of its pages.
	974	*/
	975	assert(object->shadow == VM_OBJECT_NULL);
	976	VM_OBJ_RES_DECR(object);
	977	XPR(XPR_VM_OBJECT,
	978	"vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
	979	object,
	980	vm_object_cached_list.next,
	981	vm_object_cached_list.prev,0,0);
	982
	983
	984	vm_object_unlock(object);
	985
	986	try_failed_count = 0;
	987	for (;;) {
	988	vm_object_cache_lock();
	989
	990	/*
	991	* if we try to take a regular lock here
	992	* we risk deadlocking against someone
	993	* holding a lock on this object while
	994	* trying to vm_object_deallocate a different
	995	* object
	996	*/
	997	if (vm_object_lock_try(object))
	998	break;
	999	vm_object_cache_unlock();
	1000	try_failed_count++;
	1001
	1002	mutex_pause(try_failed_count); /* wait a bit */
	1003	}
	1004	vm_object_cached_count++;
	1005	if (vm_object_cached_count > vm_object_cached_high)
	1006	vm_object_cached_high = vm_object_cached_count;
	1007	queue_enter(&vm_object_cached_list, object,
	1008	vm_object_t, cached_list);
	1009	vm_object_cache_unlock();
	1010
	1011	vm_object_deactivate_all_pages(object);
	1012	vm_object_unlock(object);
	1013
	1014	#if MIGHT_NOT_CACHE_SHADOWS
	1015	/*
	1016	* If we have a shadow that we need
	1017	* to deallocate, do so now, remembering
	1018	* to trim the cache later.
	1019	*/
	1020	if (! cache_shadows && shadow != VM_OBJECT_NULL) {
	1021	object = shadow;
	1022	retry_cache_trim = TRUE;
	1023	continue;
	1024	}
	1025	#endif /* MIGHT_NOT_CACHE_SHADOWS */
	1026
	1027	/*
	1028	* Trim the cache. If the cache trim
	1029	* returns with a shadow for us to deallocate,
	1030	* then remember to retry the cache trim
	1031	* when we are done deallocating the shadow.
	1032	* Otherwise, we are done.
	1033	*/
	1034
	1035	object = vm_object_cache_trim(TRUE);
	1036	if (object == VM_OBJECT_NULL) {
	1037	return;
	1038	}
	1039	retry_cache_trim = TRUE;
	1040	} else
	1041	#endif /* VM_OBJECT_CACHE */
	1042	{
	1043	/*
	1044	* This object is not cachable; terminate it.
	1045	*/
	1046	XPR(XPR_VM_OBJECT,
	1047	"vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
	1048	object, object->resident_page_count,
	1049	object->paging_in_progress,
	1050	(void *)current_thread(),object->ref_count);
	1051
	1052	VM_OBJ_RES_DECR(object); /* XXX ? */
	1053	/*
	1054	* Terminate this object. If it had a shadow,
	1055	* then deallocate it; otherwise, if we need
	1056	* to retry a cache trim, do so now; otherwise,
	1057	* we are done. "pageout" objects have a shadow,
	1058	* but maintain a "paging reference" rather than
	1059	* a normal reference.
	1060	*/
	1061	shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
	1062
	1063	if (vm_object_terminate(object) != KERN_SUCCESS) {
	1064	return;
	1065	}
	1066	if (shadow != VM_OBJECT_NULL) {
	1067	object = shadow;
	1068	continue;
	1069	}
	1070	#if VM_OBJECT_CACHE
	1071	if (retry_cache_trim &&
	1072	((object = vm_object_cache_trim(TRUE)) !=
	1073	VM_OBJECT_NULL)) {
	1074	continue;
	1075	}
	1076	#endif
	1077	return;
	1078	}
	1079	}
	1080	#if VM_OBJECT_CACHE
	1081	assert(! retry_cache_trim);
	1082	#endif
	1083	}
	1084
	1085
	1086
	1087	vm_page_t
	1088	vm_object_page_grab(
	1089	vm_object_t object)
	1090	{
	1091	vm_page_t p, next_p;
	1092	int p_limit = 0;
	1093	int p_skipped = 0;
	1094
	1095	vm_object_lock_assert_exclusive(object);
	1096
	1097	next_p = (vm_page_t)queue_first(&object->memq);
	1098	p_limit = MIN(50, object->resident_page_count);
	1099
	1100	while (!queue_end(&object->memq, (queue_entry_t)next_p) && --p_limit > 0) {
	1101
	1102	p = next_p;
	1103	next_p = (vm_page_t)queue_next(&next_p->listq);
	1104
	1105	if (VM_PAGE_WIRED(p) \|\| p->busy \|\| p->cleaning \|\| p->laundry \|\| p->fictitious)
	1106	goto move_page_in_obj;
	1107
	1108	if (p->pmapped \|\| p->dirty \|\| p->precious) {
	1109	vm_page_lockspin_queues();
	1110
	1111	if (p->pmapped) {
	1112	int refmod_state;
	1113
	1114	vm_object_page_grab_pmapped++;
	1115
	1116	if (p->reference == FALSE \|\| p->dirty == FALSE) {
	1117
	1118	refmod_state = pmap_get_refmod(p->phys_page);
	1119
	1120	if (refmod_state & VM_MEM_REFERENCED)
	1121	p->reference = TRUE;
	1122	if (refmod_state & VM_MEM_MODIFIED) {
	1123	SET_PAGE_DIRTY(p, FALSE);
	1124	}
	1125	}
	1126	if (p->dirty == FALSE && p->precious == FALSE) {
	1127
	1128	refmod_state = pmap_disconnect(p->phys_page);
	1129
	1130	if (refmod_state & VM_MEM_REFERENCED)
	1131	p->reference = TRUE;
	1132	if (refmod_state & VM_MEM_MODIFIED) {
	1133	SET_PAGE_DIRTY(p, FALSE);
	1134	}
	1135
	1136	if (p->dirty == FALSE)
	1137	goto take_page;
	1138	}
	1139	}
	1140	if (p->inactive && p->reference == TRUE) {
	1141	vm_page_activate(p);
	1142
	1143	VM_STAT_INCR(reactivations);
	1144	vm_object_page_grab_reactivations++;
	1145	}
	1146	vm_page_unlock_queues();
	1147	move_page_in_obj:
	1148	queue_remove(&object->memq, p, vm_page_t, listq);
	1149	queue_enter(&object->memq, p, vm_page_t, listq);
	1150
	1151	p_skipped++;
	1152	continue;
	1153	}
	1154	vm_page_lockspin_queues();
	1155	take_page:
	1156	vm_page_free_prepare_queues(p);
	1157	vm_object_page_grab_returned++;
	1158	vm_object_page_grab_skipped += p_skipped;
	1159
	1160	vm_page_unlock_queues();
	1161
	1162	vm_page_free_prepare_object(p, TRUE);
	1163
	1164	return (p);
	1165	}
	1166	vm_object_page_grab_skipped += p_skipped;
	1167	vm_object_page_grab_failed++;
	1168
	1169	return (NULL);
	1170	}
	1171
	1172
	1173
	1174	#define EVICT_PREPARE_LIMIT 64
	1175	#define EVICT_AGE 10
	1176
	1177	static clock_sec_t vm_object_cache_aging_ts = 0;
	1178
	1179	static void
	1180	vm_object_cache_remove_locked(
	1181	vm_object_t object)
	1182	{
	1183	queue_remove(&vm_object_cached_list, object, vm_object_t, objq);
	1184	object->objq.next = NULL;
	1185	object->objq.prev = NULL;
	1186
	1187	vm_object_cached_count--;
	1188	}
	1189
	1190	void
	1191	vm_object_cache_remove(
	1192	vm_object_t object)
	1193	{
	1194	vm_object_cache_lock_spin();
	1195
	1196	if (object->objq.next \|\| object->objq.prev)
	1197	vm_object_cache_remove_locked(object);
	1198
	1199	vm_object_cache_unlock();
	1200	}
	1201
	1202	void
	1203	vm_object_cache_add(
	1204	vm_object_t object)
	1205	{
	1206	clock_sec_t sec;
	1207	clock_nsec_t nsec;
	1208
	1209	if (object->resident_page_count == 0)
	1210	return;
	1211	clock_get_system_nanotime(&sec, &nsec);
	1212
	1213	vm_object_cache_lock_spin();
	1214
	1215	if (object->objq.next == NULL && object->objq.prev == NULL) {
	1216	queue_enter(&vm_object_cached_list, object, vm_object_t, objq);
	1217	object->vo_cache_ts = sec + EVICT_AGE;
	1218	object->vo_cache_pages_to_scan = object->resident_page_count;
	1219
	1220	vm_object_cached_count++;
	1221	vm_object_cache_adds++;
	1222	}
	1223	vm_object_cache_unlock();
	1224	}
	1225
	1226	int
	1227	vm_object_cache_evict(
	1228	int num_to_evict,
	1229	int max_objects_to_examine)
	1230	{
	1231	vm_object_t object = VM_OBJECT_NULL;
	1232	vm_object_t next_obj = VM_OBJECT_NULL;
	1233	vm_page_t local_free_q = VM_PAGE_NULL;
	1234	vm_page_t p;
	1235	vm_page_t next_p;
	1236	int object_cnt = 0;
	1237	vm_page_t ep_array[EVICT_PREPARE_LIMIT];
	1238	int ep_count;
	1239	int ep_limit;
	1240	int ep_index;
	1241	int ep_freed = 0;
	1242	int ep_moved = 0;
	1243	uint32_t ep_skipped = 0;
	1244	clock_sec_t sec;
	1245	clock_nsec_t nsec;
	1246
	1247	KERNEL_DEBUG(0x13001ec \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	1248	/*
	1249	* do a couple of quick checks to see if it's
	1250	* worthwhile grabbing the lock
	1251	*/
	1252	if (queue_empty(&vm_object_cached_list)) {
	1253	KERNEL_DEBUG(0x13001ec \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	1254	return (0);
	1255	}
	1256	clock_get_system_nanotime(&sec, &nsec);
	1257
	1258	/*
	1259	* the object on the head of the queue has not
	1260	* yet sufficiently aged
	1261	*/
	1262	if (sec < vm_object_cache_aging_ts) {
	1263	KERNEL_DEBUG(0x13001ec \| DBG_FUNC_END, 0, 0, 0, 0, 0);
	1264	return (0);
	1265	}
	1266	/*
	1267	* don't need the queue lock to find
	1268	* and lock an object on the cached list
	1269	*/
	1270	vm_page_unlock_queues();
	1271
	1272	vm_object_cache_lock_spin();
	1273
	1274	for (;;) {
	1275	next_obj = (vm_object_t)queue_first(&vm_object_cached_list);
	1276
	1277	while (!queue_end(&vm_object_cached_list, (queue_entry_t)next_obj) && object_cnt++ < max_objects_to_examine) {
	1278
	1279	object = next_obj;
	1280	next_obj = (vm_object_t)queue_next(&next_obj->objq);
	1281
	1282	if (sec < object->vo_cache_ts) {
	1283	KERNEL_DEBUG(0x130020c, object, object->resident_page_count, object->vo_cache_ts, sec, 0);
	1284
	1285	vm_object_cache_aging_ts = object->vo_cache_ts;
	1286	object = VM_OBJECT_NULL;
	1287	break;
	1288	}
	1289	if (!vm_object_lock_try_scan(object)) {
	1290	/*
	1291	* just skip over this guy for now... if we find
	1292	* an object to steal pages from, we'll revist in a bit...
	1293	* hopefully, the lock will have cleared
	1294	*/
	1295	KERNEL_DEBUG(0x13001f8, object, object->resident_page_count, 0, 0, 0);
	1296
	1297	object = VM_OBJECT_NULL;
	1298	continue;
	1299	}
	1300	if (queue_empty(&object->memq) \|\| object->vo_cache_pages_to_scan == 0) {
	1301	/*
	1302	* this case really shouldn't happen, but it's not fatal
	1303	* so deal with it... if we don't remove the object from
	1304	* the list, we'll never move past it.
	1305	*/
	1306	KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0);
	1307
	1308	vm_object_cache_remove_locked(object);
	1309	vm_object_unlock(object);
	1310	object = VM_OBJECT_NULL;
	1311	continue;
	1312	}
	1313	/*
	1314	* we have a locked object with pages...
	1315	* time to start harvesting
	1316	*/
	1317	break;
	1318	}
	1319	vm_object_cache_unlock();
	1320
	1321	if (object == VM_OBJECT_NULL)
	1322	break;
	1323
	1324	/*
	1325	* object is locked at this point and
	1326	* has resident pages
	1327	*/
	1328	next_p = (vm_page_t)queue_first(&object->memq);
	1329
	1330	/*
	1331	* break the page scan into 2 pieces to minimize the time spent
	1332	* behind the page queue lock...
	1333	* the list of pages on these unused objects is likely to be cold
	1334	* w/r to the cpu cache which increases the time to scan the list
	1335	* tenfold... and we may have a 'run' of pages we can't utilize that
	1336	* needs to be skipped over...
	1337	*/
	1338	if ((ep_limit = num_to_evict - (ep_freed + ep_moved)) > EVICT_PREPARE_LIMIT)
	1339	ep_limit = EVICT_PREPARE_LIMIT;
	1340	ep_count = 0;
	1341
	1342	while (!queue_end(&object->memq, (queue_entry_t)next_p) && object->vo_cache_pages_to_scan && ep_count < ep_limit) {
	1343
	1344	p = next_p;
	1345	next_p = (vm_page_t)queue_next(&next_p->listq);
	1346
	1347	object->vo_cache_pages_to_scan--;
	1348
	1349	if (VM_PAGE_WIRED(p) \|\| p->busy \|\| p->cleaning \|\| p->laundry) {
	1350	queue_remove(&object->memq, p, vm_page_t, listq);
	1351	queue_enter(&object->memq, p, vm_page_t, listq);
	1352
	1353	ep_skipped++;
	1354	continue;
	1355	}
	1356	if (p->wpmapped \|\| p->dirty \|\| p->precious) {
	1357	queue_remove(&object->memq, p, vm_page_t, listq);
	1358	queue_enter(&object->memq, p, vm_page_t, listq);
	1359
	1360	pmap_clear_reference(p->phys_page);
	1361	}
	1362	ep_array[ep_count++] = p;
	1363	}
	1364	KERNEL_DEBUG(0x13001f4 \| DBG_FUNC_START, object, object->resident_page_count, ep_freed, ep_moved, 0);
	1365
	1366	vm_page_lockspin_queues();
	1367
	1368	for (ep_index = 0; ep_index < ep_count; ep_index++) {
	1369
	1370	p = ep_array[ep_index];
	1371
	1372	if (p->wpmapped \|\| p->dirty \|\| p->precious) {
	1373	p->reference = FALSE;
	1374	p->no_cache = FALSE;
	1375
	1376	/*
	1377	* we've already filtered out pages that are in the laundry
	1378	* so if we get here, this page can't be on the pageout queue
	1379	*/
	1380	assert(!p->pageout_queue);
	1381
	1382	VM_PAGE_QUEUES_REMOVE(p);
	1383	VM_PAGE_ENQUEUE_INACTIVE(p, TRUE);
	1384
	1385	ep_moved++;
	1386	} else {
	1387	vm_page_free_prepare_queues(p);
	1388
	1389	assert(p->pageq.next == NULL && p->pageq.prev == NULL);
	1390	/*
	1391	* Add this page to our list of reclaimed pages,
	1392	* to be freed later.
	1393	*/
	1394	p->pageq.next = (queue_entry_t) local_free_q;
	1395	local_free_q = p;
	1396
	1397	ep_freed++;
	1398	}
	1399	}
	1400	vm_page_unlock_queues();
	1401
	1402	KERNEL_DEBUG(0x13001f4 \| DBG_FUNC_END, object, object->resident_page_count, ep_freed, ep_moved, 0);
	1403
	1404	if (local_free_q) {
	1405	vm_page_free_list(local_free_q, TRUE);
	1406	local_free_q = VM_PAGE_NULL;
	1407	}
	1408	if (object->vo_cache_pages_to_scan == 0) {
	1409	KERNEL_DEBUG(0x1300208, object, object->resident_page_count, ep_freed, ep_moved, 0);
	1410
	1411	vm_object_cache_remove(object);
	1412
	1413	KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0);
	1414	}
	1415	/*
	1416	* done with this object
	1417	*/
	1418	vm_object_unlock(object);
	1419	object = VM_OBJECT_NULL;
	1420
	1421	/*
	1422	* at this point, we are not holding any locks
	1423	*/
	1424	if ((ep_freed + ep_moved) >= num_to_evict) {
	1425	/*
	1426	* we've reached our target for the
	1427	* number of pages to evict
	1428	*/
	1429	break;
	1430	}
	1431	vm_object_cache_lock_spin();
	1432	}
	1433	/*
	1434	* put the page queues lock back to the caller's
	1435	* idea of it
	1436	*/
	1437	vm_page_lock_queues();
	1438
	1439	vm_object_cache_pages_freed += ep_freed;
	1440	vm_object_cache_pages_moved += ep_moved;
	1441	vm_object_cache_pages_skipped += ep_skipped;
	1442
	1443	KERNEL_DEBUG(0x13001ec \| DBG_FUNC_END, ep_freed, 0, 0, 0, 0);
	1444	return (ep_freed);
	1445	}
	1446
	1447
	1448	#if VM_OBJECT_CACHE
	1449	/*
	1450	* Check to see whether we really need to trim
	1451	* down the cache. If so, remove an object from
	1452	* the cache, terminate it, and repeat.
	1453	*
	1454	* Called with, and returns with, cache lock unlocked.
	1455	*/
	1456	vm_object_t
	1457	vm_object_cache_trim(
	1458	boolean_t called_from_vm_object_deallocate)
	1459	{
	1460	register vm_object_t object = VM_OBJECT_NULL;
	1461	vm_object_t shadow;
	1462
	1463	for (;;) {
	1464
	1465	/*
	1466	* If we no longer need to trim the cache,
	1467	* then we are done.
	1468	*/
	1469	if (vm_object_cached_count <= vm_object_cached_max)
	1470	return VM_OBJECT_NULL;
	1471
	1472	vm_object_cache_lock();
	1473	if (vm_object_cached_count <= vm_object_cached_max) {
	1474	vm_object_cache_unlock();
	1475	return VM_OBJECT_NULL;
	1476	}
	1477
	1478	/*
	1479	* We must trim down the cache, so remove
	1480	* the first object in the cache.
	1481	*/
	1482	XPR(XPR_VM_OBJECT,
	1483	"vm_object_cache_trim: removing from front of cache (%x, %x)\n",
	1484	vm_object_cached_list.next,
	1485	vm_object_cached_list.prev, 0, 0, 0);
	1486
	1487	object = (vm_object_t) queue_first(&vm_object_cached_list);
	1488	if(object == (vm_object_t) &vm_object_cached_list) {
	1489	/* something's wrong with the calling parameter or */
	1490	/* the value of vm_object_cached_count, just fix */
	1491	/* and return */
	1492	if(vm_object_cached_max < 0)
	1493	vm_object_cached_max = 0;
	1494	vm_object_cached_count = 0;
	1495	vm_object_cache_unlock();
	1496	return VM_OBJECT_NULL;
	1497	}
	1498	vm_object_lock(object);
	1499	queue_remove(&vm_object_cached_list, object, vm_object_t,
	1500	cached_list);
	1501	vm_object_cached_count--;
	1502
	1503	vm_object_cache_unlock();
	1504	/*
	1505	* Since this object is in the cache, we know
	1506	* that it is initialized and has no references.
	1507	* Take a reference to avoid recursive deallocations.
	1508	*/
	1509
	1510	assert(object->pager_initialized);
	1511	assert(object->ref_count == 0);
	1512	vm_object_lock_assert_exclusive(object);
	1513	object->ref_count++;
	1514
	1515	/*
	1516	* Terminate the object.
	1517	* If the object had a shadow, we let vm_object_deallocate
	1518	* deallocate it. "pageout" objects have a shadow, but
	1519	* maintain a "paging reference" rather than a normal
	1520	* reference.
	1521	* (We are careful here to limit recursion.)
	1522	*/
	1523	shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
	1524
	1525	if(vm_object_terminate(object) != KERN_SUCCESS)
	1526	continue;
	1527
	1528	if (shadow != VM_OBJECT_NULL) {
	1529	if (called_from_vm_object_deallocate) {
	1530	return shadow;
	1531	} else {
	1532	vm_object_deallocate(shadow);
	1533	}
	1534	}
	1535	}
	1536	}
	1537	#endif
	1538
	1539
	1540	/*
	1541	* Routine: vm_object_terminate
	1542	* Purpose:
	1543	* Free all resources associated with a vm_object.
	1544	* In/out conditions:
	1545	* Upon entry, the object must be locked,
	1546	* and the object must have exactly one reference.
	1547	*
	1548	* The shadow object reference is left alone.
	1549	*
	1550	* The object must be unlocked if its found that pages
	1551	* must be flushed to a backing object. If someone
	1552	* manages to map the object while it is being flushed
	1553	* the object is returned unlocked and unchanged. Otherwise,
	1554	* upon exit, the cache will be unlocked, and the
	1555	* object will cease to exist.
	1556	*/
	1557	static kern_return_t
	1558	vm_object_terminate(
	1559	vm_object_t object)
	1560	{
	1561	vm_object_t shadow_object;
	1562
	1563	XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
	1564	object, object->ref_count, 0, 0, 0);
	1565
	1566	if (!object->pageout && (!object->temporary \|\| object->can_persist) &&
	1567	(object->pager != NULL \|\| object->shadow_severed)) {
	1568	/*
	1569	* Clear pager_trusted bit so that the pages get yanked
	1570	* out of the object instead of cleaned in place. This
	1571	* prevents a deadlock in XMM and makes more sense anyway.
	1572	*/
	1573	object->pager_trusted = FALSE;
	1574
	1575	vm_object_reap_pages(object, REAP_TERMINATE);
	1576	}
	1577	/*
	1578	* Make sure the object isn't already being terminated
	1579	*/
	1580	if (object->terminating) {
	1581	vm_object_lock_assert_exclusive(object);
	1582	object->ref_count--;
	1583	assert(object->ref_count > 0);
	1584	vm_object_unlock(object);
	1585	return KERN_FAILURE;
	1586	}
	1587
	1588	/*
	1589	* Did somebody get a reference to the object while we were
	1590	* cleaning it?
	1591	*/
	1592	if (object->ref_count != 1) {
	1593	vm_object_lock_assert_exclusive(object);
	1594	object->ref_count--;
	1595	assert(object->ref_count > 0);
	1596	vm_object_res_deallocate(object);
	1597	vm_object_unlock(object);
	1598	return KERN_FAILURE;
	1599	}
	1600
	1601	/*
	1602	* Make sure no one can look us up now.
	1603	*/
	1604
	1605	object->terminating = TRUE;
	1606	object->alive = FALSE;
	1607
	1608	if ( !object->internal && (object->objq.next \|\| object->objq.prev))
	1609	vm_object_cache_remove(object);
	1610
	1611	if (object->hashed) {
	1612	lck_mtx_t *lck;
	1613
	1614	lck = vm_object_hash_lock_spin(object->pager);
	1615	vm_object_remove(object);
	1616	vm_object_hash_unlock(lck);
	1617	}
	1618	/*
	1619	* Detach the object from its shadow if we are the shadow's
	1620	* copy. The reference we hold on the shadow must be dropped
	1621	* by our caller.
	1622	*/
	1623	if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
	1624	!(object->pageout)) {
	1625	vm_object_lock(shadow_object);
	1626	if (shadow_object->copy == object)
	1627	shadow_object->copy = VM_OBJECT_NULL;
	1628	vm_object_unlock(shadow_object);
	1629	}
	1630
	1631	if (object->paging_in_progress != 0 \|\|
	1632	object->activity_in_progress != 0) {
	1633	/*
	1634	* There are still some paging_in_progress references
	1635	* on this object, meaning that there are some paging
	1636	* or other I/O operations in progress for this VM object.
	1637	* Such operations take some paging_in_progress references
	1638	* up front to ensure that the object doesn't go away, but
	1639	* they may also need to acquire a reference on the VM object,
	1640	* to map it in kernel space, for example. That means that
	1641	* they may end up releasing the last reference on the VM
	1642	* object, triggering its termination, while still holding
	1643	* paging_in_progress references. Waiting for these
	1644	* pending paging_in_progress references to go away here would
	1645	* deadlock.
	1646	*
	1647	* To avoid deadlocking, we'll let the vm_object_reaper_thread
	1648	* complete the VM object termination if it still holds
	1649	* paging_in_progress references at this point.
	1650	*
	1651	* No new paging_in_progress should appear now that the
	1652	* VM object is "terminating" and not "alive".
	1653	*/
	1654	vm_object_reap_async(object);
	1655	vm_object_unlock(object);
	1656	/*
	1657	* Return KERN_FAILURE to let the caller know that we
	1658	* haven't completed the termination and it can't drop this
	1659	* object's reference on its shadow object yet.
	1660	* The reaper thread will take care of that once it has
	1661	* completed this object's termination.
	1662	*/
	1663	return KERN_FAILURE;
	1664	}
	1665	/*
	1666	* complete the VM object termination
	1667	*/
	1668	vm_object_reap(object);
	1669	object = VM_OBJECT_NULL;
	1670
	1671	/*
	1672	* the object lock was released by vm_object_reap()
	1673	*
	1674	* KERN_SUCCESS means that this object has been terminated
	1675	* and no longer needs its shadow object but still holds a
	1676	* reference on it.
	1677	* The caller is responsible for dropping that reference.
	1678	* We can't call vm_object_deallocate() here because that
	1679	* would create a recursion.
	1680	*/
	1681	return KERN_SUCCESS;
	1682	}
	1683
	1684
	1685	/*
	1686	* vm_object_reap():
	1687	*
	1688	* Complete the termination of a VM object after it's been marked
	1689	* as "terminating" and "!alive" by vm_object_terminate().
	1690	*
	1691	* The VM object must be locked by caller.
	1692	* The lock will be released on return and the VM object is no longer valid.
	1693	*/
	1694	void
	1695	vm_object_reap(
	1696	vm_object_t object)
	1697	{
	1698	memory_object_t pager;
	1699
	1700	vm_object_lock_assert_exclusive(object);
	1701	assert(object->paging_in_progress == 0);
	1702	assert(object->activity_in_progress == 0);
	1703
	1704	vm_object_reap_count++;
	1705
	1706	pager = object->pager;
	1707	object->pager = MEMORY_OBJECT_NULL;
	1708
	1709	if (pager != MEMORY_OBJECT_NULL)
	1710	memory_object_control_disable(object->pager_control);
	1711
	1712	object->ref_count--;
	1713	#if TASK_SWAPPER
	1714	assert(object->res_count == 0);
	1715	#endif /* TASK_SWAPPER */
	1716
	1717	assert (object->ref_count == 0);
	1718
	1719	/*
	1720	* remove from purgeable queue if it's on
	1721	*/
	1722	if (object->internal && (object->objq.next \|\| object->objq.prev)) {
	1723	purgeable_q_t queue = vm_purgeable_object_remove(object);
	1724	assert(queue);
	1725
	1726	/* Must take page lock for this - using it to protect token queue */
	1727	vm_page_lock_queues();
	1728	vm_purgeable_token_delete_first(queue);
	1729
	1730	assert(queue->debug_count_objects>=0);
	1731	vm_page_unlock_queues();
	1732	}
	1733
	1734	/*
	1735	* Clean or free the pages, as appropriate.
	1736	* It is possible for us to find busy/absent pages,
	1737	* if some faults on this object were aborted.
	1738	*/
	1739	if (object->pageout) {
	1740	assert(object->shadow != VM_OBJECT_NULL);
	1741
	1742	vm_pageout_object_terminate(object);
	1743
	1744	} else if (((object->temporary && !object->can_persist) \|\| (pager == MEMORY_OBJECT_NULL))) {
	1745
	1746	vm_object_reap_pages(object, REAP_REAP);
	1747	}
	1748	assert(queue_empty(&object->memq));
	1749	assert(object->paging_in_progress == 0);
	1750	assert(object->activity_in_progress == 0);
	1751	assert(object->ref_count == 0);
	1752
	1753	/*
	1754	* If the pager has not already been released by
	1755	* vm_object_destroy, we need to terminate it and
	1756	* release our reference to it here.
	1757	*/
	1758	if (pager != MEMORY_OBJECT_NULL) {
	1759	vm_object_unlock(object);
	1760	vm_object_release_pager(pager, object->hashed);
	1761	vm_object_lock(object);
	1762	}
	1763
	1764	/* kick off anyone waiting on terminating */
	1765	object->terminating = FALSE;
	1766	vm_object_paging_begin(object);
	1767	vm_object_paging_end(object);
	1768	vm_object_unlock(object);
	1769
	1770	#if MACH_PAGEMAP
	1771	vm_external_destroy(object->existence_map, object->vo_size);
	1772	#endif /* MACH_PAGEMAP */
	1773
	1774	object->shadow = VM_OBJECT_NULL;
	1775
	1776	vm_object_lock_destroy(object);
	1777	/*
	1778	* Free the space for the object.
	1779	*/
	1780	zfree(vm_object_zone, object);
	1781	object = VM_OBJECT_NULL;
	1782	}
	1783
	1784
	1785	unsigned int vm_max_batch = 256;
	1786
	1787	#define V_O_R_MAX_BATCH 128
	1788
	1789	#define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch)
	1790
	1791
	1792	#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \
	1793	MACRO_BEGIN \
	1794	if (_local_free_q) { \
	1795	if (do_disconnect) { \
	1796	vm_page_t m; \
	1797	for (m = _local_free_q; \
	1798	m != VM_PAGE_NULL; \
	1799	m = (vm_page_t) m->pageq.next) { \
	1800	if (m->pmapped) { \
	1801	pmap_disconnect(m->phys_page); \
	1802	} \
	1803	} \
	1804	} \
	1805	vm_page_free_list(_local_free_q, TRUE); \
	1806	_local_free_q = VM_PAGE_NULL; \
	1807	} \
	1808	MACRO_END
	1809
	1810
	1811	void
	1812	vm_object_reap_pages(
	1813	vm_object_t object,
	1814	int reap_type)
	1815	{
	1816	vm_page_t p;
	1817	vm_page_t next;
	1818	vm_page_t local_free_q = VM_PAGE_NULL;
	1819	int loop_count;
	1820	boolean_t disconnect_on_release;
	1821
	1822	if (reap_type == REAP_DATA_FLUSH) {
	1823	/*
	1824	* We need to disconnect pages from all pmaps before
	1825	* releasing them to the free list
	1826	*/
	1827	disconnect_on_release = TRUE;
	1828	} else {
	1829	/*
	1830	* Either the caller has already disconnected the pages
	1831	* from all pmaps, or we disconnect them here as we add
	1832	* them to out local list of pages to be released.
	1833	* No need to re-disconnect them when we release the pages
	1834	* to the free list.
	1835	*/
	1836	disconnect_on_release = FALSE;
	1837	}
	1838
	1839	restart_after_sleep:
	1840	if (queue_empty(&object->memq))
	1841	return;
	1842	loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH);
	1843
	1844	vm_page_lockspin_queues();
	1845
	1846	next = (vm_page_t)queue_first(&object->memq);
	1847
	1848	while (!queue_end(&object->memq, (queue_entry_t)next)) {
	1849
	1850	p = next;
	1851	next = (vm_page_t)queue_next(&next->listq);
	1852
	1853	if (--loop_count == 0) {
	1854
	1855	vm_page_unlock_queues();
	1856
	1857	if (local_free_q) {
	1858	/*
	1859	* Free the pages we reclaimed so far
	1860	* and take a little break to avoid
	1861	* hogging the page queue lock too long
	1862	*/
	1863	VM_OBJ_REAP_FREELIST(local_free_q,
	1864	disconnect_on_release);
	1865	} else
	1866	mutex_pause(0);
	1867
	1868	loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH);
	1869
	1870	vm_page_lockspin_queues();
	1871	}
	1872	if (reap_type == REAP_DATA_FLUSH \|\| reap_type == REAP_TERMINATE) {
	1873
	1874	if (p->busy \|\| p->cleaning) {
	1875
	1876	vm_page_unlock_queues();
	1877	/*
	1878	* free the pages reclaimed so far
	1879	*/
	1880	VM_OBJ_REAP_FREELIST(local_free_q,
	1881	disconnect_on_release);
	1882
	1883	PAGE_SLEEP(object, p, THREAD_UNINT);
	1884
	1885	goto restart_after_sleep;
	1886	}
	1887	if (p->laundry) {
	1888	p->pageout = FALSE;
	1889
	1890	vm_pageout_steal_laundry(p, TRUE);
	1891	}
	1892	}
	1893	switch (reap_type) {
	1894
	1895	case REAP_DATA_FLUSH:
	1896	if (VM_PAGE_WIRED(p)) {
	1897	/*
	1898	* this is an odd case... perhaps we should
	1899	* zero-fill this page since we're conceptually
	1900	* tossing its data at this point, but leaving
	1901	* it on the object to honor the 'wire' contract
	1902	*/
	1903	continue;
	1904	}
	1905	break;
	1906
	1907	case REAP_PURGEABLE:
	1908	if (VM_PAGE_WIRED(p)) {
	1909	/*
	1910	* can't purge a wired page
	1911	*/
	1912	vm_page_purged_wired++;
	1913	continue;
	1914	}
	1915	if (p->laundry && !p->busy && !p->cleaning) {
	1916	p->pageout = FALSE;
	1917
	1918	vm_pageout_steal_laundry(p, TRUE);
	1919	}
	1920	if (p->cleaning \|\| p->laundry) {
	1921	/*
	1922	* page is being acted upon,
	1923	* so don't mess with it
	1924	*/
	1925	vm_page_purged_others++;
	1926	continue;
	1927	}
	1928	if (p->busy) {
	1929	/*
	1930	* We can't reclaim a busy page but we can
	1931	* make it more likely to be paged (it's not wired) to make
	1932	* sure that it gets considered by
	1933	* vm_pageout_scan() later.
	1934	*/
	1935	vm_page_deactivate(p);
	1936	vm_page_purged_busy++;
	1937	continue;
	1938	}
	1939
	1940	assert(p->object != kernel_object);
	1941
	1942	/*
	1943	* we can discard this page...
	1944	*/
	1945	if (p->pmapped == TRUE) {
	1946	int refmod_state;
	1947	/*
	1948	* unmap the page
	1949	*/
	1950	refmod_state = pmap_disconnect(p->phys_page);
	1951	if (refmod_state & VM_MEM_MODIFIED) {
	1952	SET_PAGE_DIRTY(p, FALSE);
	1953	}
	1954	}
	1955	if (p->dirty \|\| p->precious) {
	1956	/*
	1957	* we saved the cost of cleaning this page !
	1958	*/
	1959	vm_page_purged_count++;
	1960	}
	1961
	1962	break;
	1963
	1964	case REAP_TERMINATE:
	1965	if (p->absent \|\| p->private) {
	1966	/*
	1967	* For private pages, VM_PAGE_FREE just
	1968	* leaves the page structure around for
	1969	* its owner to clean up. For absent
	1970	* pages, the structure is returned to
	1971	* the appropriate pool.
	1972	*/
	1973	break;
	1974	}
	1975	if (p->fictitious) {
	1976	assert (p->phys_page == vm_page_guard_addr);
	1977	break;
	1978	}
	1979	if (!p->dirty && p->wpmapped)
	1980	p->dirty = pmap_is_modified(p->phys_page);
	1981
	1982	if ((p->dirty \|\| p->precious) && !p->error && object->alive) {
	1983
	1984	if (!p->laundry) {
	1985	VM_PAGE_QUEUES_REMOVE(p);
	1986	/*
	1987	* flush page... page will be freed
	1988	* upon completion of I/O
	1989	*/
	1990	vm_pageout_cluster(p, TRUE);
	1991	}
	1992	vm_page_unlock_queues();
	1993	/*
	1994	* free the pages reclaimed so far
	1995	*/
	1996	VM_OBJ_REAP_FREELIST(local_free_q,
	1997	disconnect_on_release);
	1998
	1999	vm_object_paging_wait(object, THREAD_UNINT);
	2000
	2001	goto restart_after_sleep;
	2002	}
	2003	break;
	2004
	2005	case REAP_REAP:
	2006	break;
	2007	}
	2008	vm_page_free_prepare_queues(p);
	2009	assert(p->pageq.next == NULL && p->pageq.prev == NULL);
	2010	/*
	2011	* Add this page to our list of reclaimed pages,
	2012	* to be freed later.
	2013	*/
	2014	p->pageq.next = (queue_entry_t) local_free_q;
	2015	local_free_q = p;
	2016	}
	2017	vm_page_unlock_queues();
	2018
	2019	/*
	2020	* Free the remaining reclaimed pages
	2021	*/
	2022	VM_OBJ_REAP_FREELIST(local_free_q,
	2023	disconnect_on_release);
	2024	}
	2025
	2026
	2027	void
	2028	vm_object_reap_async(
	2029	vm_object_t object)
	2030	{
	2031	vm_object_lock_assert_exclusive(object);
	2032
	2033	vm_object_reaper_lock_spin();
	2034
	2035	vm_object_reap_count_async++;
	2036
	2037	/* enqueue the VM object... */
	2038	queue_enter(&vm_object_reaper_queue, object,
	2039	vm_object_t, cached_list);
	2040
	2041	vm_object_reaper_unlock();
	2042
	2043	/* ... and wake up the reaper thread */
	2044	thread_wakeup((event_t) &vm_object_reaper_queue);
	2045	}
	2046
	2047
	2048	void
	2049	vm_object_reaper_thread(void)
	2050	{
	2051	vm_object_t object, shadow_object;
	2052
	2053	vm_object_reaper_lock_spin();
	2054
	2055	while (!queue_empty(&vm_object_reaper_queue)) {
	2056	queue_remove_first(&vm_object_reaper_queue,
	2057	object,
	2058	vm_object_t,
	2059	cached_list);
	2060
	2061	vm_object_reaper_unlock();
	2062	vm_object_lock(object);
	2063
	2064	assert(object->terminating);
	2065	assert(!object->alive);
	2066
	2067	/*
	2068	* The pageout daemon might be playing with our pages.
	2069	* Now that the object is dead, it won't touch any more
	2070	* pages, but some pages might already be on their way out.
	2071	* Hence, we wait until the active paging activities have
	2072	* ceased before we break the association with the pager
	2073	* itself.
	2074	*/
	2075	while (object->paging_in_progress != 0 \|\|
	2076	object->activity_in_progress != 0) {
	2077	vm_object_wait(object,
	2078	VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
	2079	THREAD_UNINT);
	2080	vm_object_lock(object);
	2081	}
	2082
	2083	shadow_object =
	2084	object->pageout ? VM_OBJECT_NULL : object->shadow;
	2085
	2086	vm_object_reap(object);
	2087	/* cache is unlocked and object is no longer valid */
	2088	object = VM_OBJECT_NULL;
	2089
	2090	if (shadow_object != VM_OBJECT_NULL) {
	2091	/*
	2092	* Drop the reference "object" was holding on
	2093	* its shadow object.
	2094	*/
	2095	vm_object_deallocate(shadow_object);
	2096	shadow_object = VM_OBJECT_NULL;
	2097	}
	2098	vm_object_reaper_lock_spin();
	2099	}
	2100
	2101	/* wait for more work... */
	2102	assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT);
	2103
	2104	vm_object_reaper_unlock();
	2105
	2106	thread_block((thread_continue_t) vm_object_reaper_thread);
	2107	/NOTREACHED/
	2108	}
	2109
	2110	/*
	2111	* Routine: vm_object_pager_wakeup
	2112	* Purpose: Wake up anyone waiting for termination of a pager.
	2113	*/
	2114
	2115	static void
	2116	vm_object_pager_wakeup(
	2117	memory_object_t pager)
	2118	{
	2119	vm_object_hash_entry_t entry;
	2120	boolean_t waiting = FALSE;
	2121	lck_mtx_t *lck;
	2122
	2123	/*
	2124	* If anyone was waiting for the memory_object_terminate
	2125	* to be queued, wake them up now.
	2126	*/
	2127	lck = vm_object_hash_lock_spin(pager);
	2128	entry = vm_object_hash_lookup(pager, TRUE);
	2129	if (entry != VM_OBJECT_HASH_ENTRY_NULL)
	2130	waiting = entry->waiting;
	2131	vm_object_hash_unlock(lck);
	2132
	2133	if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
	2134	if (waiting)
	2135	thread_wakeup((event_t) pager);
	2136	vm_object_hash_entry_free(entry);
	2137	}
	2138	}
	2139
	2140	/*
	2141	* Routine: vm_object_release_pager
	2142	* Purpose: Terminate the pager and, upon completion,
	2143	* release our last reference to it.
	2144	* just like memory_object_terminate, except
	2145	* that we wake up anyone blocked in vm_object_enter
	2146	* waiting for termination message to be queued
	2147	* before calling memory_object_init.
	2148	*/
	2149	static void
	2150	vm_object_release_pager(
	2151	memory_object_t pager,
	2152	boolean_t hashed)
	2153	{
	2154
	2155	/*
	2156	* Terminate the pager.
	2157	*/
	2158
	2159	(void) memory_object_terminate(pager);
	2160
	2161	if (hashed == TRUE) {
	2162	/*
	2163	* Wakeup anyone waiting for this terminate
	2164	* and remove the entry from the hash
	2165	*/
	2166	vm_object_pager_wakeup(pager);
	2167	}
	2168	/*
	2169	* Release reference to pager.
	2170	*/
	2171	memory_object_deallocate(pager);
	2172	}
	2173
	2174	/*
	2175	* Routine: vm_object_destroy
	2176	* Purpose:
	2177	* Shut down a VM object, despite the
	2178	* presence of address map (or other) references
	2179	* to the vm_object.
	2180	*/
	2181	kern_return_t
	2182	vm_object_destroy(
	2183	vm_object_t object,
	2184	__unused kern_return_t reason)
	2185	{
	2186	memory_object_t old_pager;
	2187
	2188	if (object == VM_OBJECT_NULL)
	2189	return(KERN_SUCCESS);
	2190
	2191	/*
	2192	* Remove the pager association immediately.
	2193	*
	2194	* This will prevent the memory manager from further
	2195	* meddling. [If it wanted to flush data or make
	2196	* other changes, it should have done so before performing
	2197	* the destroy call.]
	2198	*/
	2199
	2200	vm_object_lock(object);
	2201	object->can_persist = FALSE;
	2202	object->named = FALSE;
	2203	object->alive = FALSE;
	2204
	2205	if (object->hashed) {
	2206	lck_mtx_t *lck;
	2207	/*
	2208	* Rip out the pager from the vm_object now...
	2209	*/
	2210	lck = vm_object_hash_lock_spin(object->pager);
	2211	vm_object_remove(object);
	2212	vm_object_hash_unlock(lck);
	2213	}
	2214	old_pager = object->pager;
	2215	object->pager = MEMORY_OBJECT_NULL;
	2216	if (old_pager != MEMORY_OBJECT_NULL)
	2217	memory_object_control_disable(object->pager_control);
	2218
	2219	/*
	2220	* Wait for the existing paging activity (that got
	2221	* through before we nulled out the pager) to subside.
	2222	*/
	2223
	2224	vm_object_paging_wait(object, THREAD_UNINT);
	2225	vm_object_unlock(object);
	2226
	2227	/*
	2228	* Terminate the object now.
	2229	*/
	2230	if (old_pager != MEMORY_OBJECT_NULL) {
	2231	vm_object_release_pager(old_pager, object->hashed);
	2232
	2233	/*
	2234	* JMM - Release the caller's reference. This assumes the
	2235	* caller had a reference to release, which is a big (but
	2236	* currently valid) assumption if this is driven from the
	2237	* vnode pager (it is holding a named reference when making
	2238	* this call)..
	2239	*/
	2240	vm_object_deallocate(object);
	2241
	2242	}
	2243	return(KERN_SUCCESS);
	2244	}
	2245
	2246
	2247	#if VM_OBJECT_CACHE
	2248
	2249	#define VM_OBJ_DEACT_ALL_STATS DEBUG
	2250	#if VM_OBJ_DEACT_ALL_STATS
	2251	uint32_t vm_object_deactivate_all_pages_batches = 0;
	2252	uint32_t vm_object_deactivate_all_pages_pages = 0;
	2253	#endif /* VM_OBJ_DEACT_ALL_STATS */
	2254	/*
	2255	* vm_object_deactivate_all_pages
	2256	*
	2257	* Deactivate all pages in the specified object. (Keep its pages
	2258	* in memory even though it is no longer referenced.)
	2259	*
	2260	* The object must be locked.
	2261	*/
	2262	static void
	2263	vm_object_deactivate_all_pages(
	2264	register vm_object_t object)
	2265	{
	2266	register vm_page_t p;
	2267	int loop_count;
	2268	#if VM_OBJ_DEACT_ALL_STATS
	2269	int pages_count;
	2270	#endif /* VM_OBJ_DEACT_ALL_STATS */
	2271	#define V_O_D_A_P_MAX_BATCH 256
	2272
	2273	loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH);
	2274	#if VM_OBJ_DEACT_ALL_STATS
	2275	pages_count = 0;
	2276	#endif /* VM_OBJ_DEACT_ALL_STATS */
	2277	vm_page_lock_queues();
	2278	queue_iterate(&object->memq, p, vm_page_t, listq) {
	2279	if (--loop_count == 0) {
	2280	#if VM_OBJ_DEACT_ALL_STATS
	2281	hw_atomic_add(&vm_object_deactivate_all_pages_batches,
	2282	1);
	2283	hw_atomic_add(&vm_object_deactivate_all_pages_pages,
	2284	pages_count);
	2285	pages_count = 0;
	2286	#endif /* VM_OBJ_DEACT_ALL_STATS */
	2287	lck_mtx_yield(&vm_page_queue_lock);
	2288	loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH);
	2289	}
	2290	if (!p->busy && !p->throttled) {
	2291	#if VM_OBJ_DEACT_ALL_STATS
	2292	pages_count++;
	2293	#endif /* VM_OBJ_DEACT_ALL_STATS */
	2294	vm_page_deactivate(p);
	2295	}
	2296	}
	2297	#if VM_OBJ_DEACT_ALL_STATS
	2298	if (pages_count) {
	2299	hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1);
	2300	hw_atomic_add(&vm_object_deactivate_all_pages_pages,
	2301	pages_count);
	2302	pages_count = 0;
	2303	}
	2304	#endif /* VM_OBJ_DEACT_ALL_STATS */
	2305	vm_page_unlock_queues();
	2306	}
	2307	#endif /* VM_OBJECT_CACHE */
	2308
	2309
	2310
	2311	/*
	2312	* The "chunk" macros are used by routines below when looking for pages to deactivate. These
	2313	* exist because of the need to handle shadow chains. When deactivating pages, we only
	2314	* want to deactive the ones at the top most level in the object chain. In order to do
	2315	* this efficiently, the specified address range is divided up into "chunks" and we use
	2316	* a bit map to keep track of which pages have already been processed as we descend down
	2317	* the shadow chain. These chunk macros hide the details of the bit map implementation
	2318	* as much as we can.
	2319	*
	2320	* For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is
	2321	* set to 64 pages. The bit map is indexed from the low-order end, so that the lowest
	2322	* order bit represents page 0 in the current range and highest order bit represents
	2323	* page 63.
	2324	*
	2325	* For further convenience, we also use negative logic for the page state in the bit map.
	2326	* The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has
	2327	* been processed. This way we can simply test the 64-bit long word to see if it's zero
	2328	* to easily tell if the whole range has been processed. Therefore, the bit map starts
	2329	* out with all the bits set. The macros below hide all these details from the caller.
	2330	*/
	2331
	2332	#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */
	2333	/* be the same as the number of bits in */
	2334	/* the chunk_state_t type. We use 64 */
	2335	/* just for convenience. */
	2336
	2337	#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */
	2338
	2339	typedef uint64_t chunk_state_t;
	2340
	2341	/*
	2342	* The bit map uses negative logic, so we start out with all 64 bits set to indicate
	2343	* that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE,
	2344	* then we mark pages beyond the len as having been "processed" so that we don't waste time
	2345	* looking at pages in that range. This can save us from unnecessarily chasing down the
	2346	* shadow chain.
	2347	*/
	2348
	2349	#define CHUNK_INIT(c, len) \
	2350	MACRO_BEGIN \
	2351	uint64_t p; \
	2352	\
	2353	(c) = 0xffffffffffffffffLL; \
	2354	\
	2355	for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \
	2356	MARK_PAGE_HANDLED(c, p); \
	2357	MACRO_END
	2358
	2359
	2360	/*
	2361	* Return true if all pages in the chunk have not yet been processed.
	2362	*/
	2363
	2364	#define CHUNK_NOT_COMPLETE(c) ((c) != 0)
	2365
	2366	/*
	2367	* Return true if the page at offset 'p' in the bit map has already been handled
	2368	* while processing a higher level object in the shadow chain.
	2369	*/
	2370
	2371	#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0)
	2372
	2373	/*
	2374	* Mark the page at offset 'p' in the bit map as having been processed.
	2375	*/
	2376
	2377	#define MARK_PAGE_HANDLED(c, p) \
	2378	MACRO_BEGIN \
	2379	(c) = (c) & ~(1LL << (p)); \
	2380	MACRO_END
	2381
	2382
	2383	/*
	2384	* Return true if the page at the given offset has been paged out. Object is
	2385	* locked upon entry and returned locked.
	2386	*/
	2387
	2388	static boolean_t
	2389	page_is_paged_out(
	2390	vm_object_t object,
	2391	vm_object_offset_t offset)
	2392	{
	2393	kern_return_t kr;
	2394	memory_object_t pager;
	2395
	2396	/*
	2397	* Check the existence map for the page if we have one, otherwise
	2398	* ask the pager about this page.
	2399	*/
	2400
	2401	#if MACH_PAGEMAP
	2402	if (object->existence_map) {
	2403	if (vm_external_state_get(object->existence_map, offset)
	2404	== VM_EXTERNAL_STATE_EXISTS) {
	2405	/*
	2406	* We found the page
	2407	*/
	2408
	2409	return TRUE;
	2410	}
	2411	} else
	2412	#endif
	2413	if (object->internal &&
	2414	object->alive &&
	2415	!object->terminating &&
	2416	object->pager_ready) {
	2417
	2418	/*
	2419	* We're already holding a "paging in progress" reference
	2420	* so the object can't disappear when we release the lock.
	2421	*/
	2422
	2423	assert(object->paging_in_progress);
	2424	pager = object->pager;
	2425	vm_object_unlock(object);
	2426
	2427	kr = memory_object_data_request(
	2428	pager,
	2429	offset + object->paging_offset,
	2430	0, /* just poke the pager */
	2431	VM_PROT_READ,
	2432	NULL);
	2433
	2434	vm_object_lock(object);
	2435
	2436	if (kr == KERN_SUCCESS) {
	2437
	2438	/*
	2439	* We found the page
	2440	*/
	2441
	2442	return TRUE;
	2443	}
	2444	}
	2445
	2446	return FALSE;
	2447	}
	2448
	2449
	2450
	2451	/*
	2452	* Deactivate the pages in the specified object and range. If kill_page is set, also discard any
	2453	* page modified state from the pmap. Update the chunk_state as we go along. The caller must specify
	2454	* a size that is less than or equal to the CHUNK_SIZE.
	2455	*/
	2456
	2457	static void
	2458	deactivate_pages_in_object(
	2459	vm_object_t object,
	2460	vm_object_offset_t offset,
	2461	vm_object_size_t size,
	2462	boolean_t kill_page,
	2463	boolean_t reusable_page,
	2464	#if !MACH_ASSERT
	2465	__unused
	2466	#endif
	2467	boolean_t all_reusable,
	2468	chunk_state_t *chunk_state)
	2469	{
	2470	vm_page_t m;
	2471	int p;
	2472	struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
	2473	struct vm_page_delayed_work *dwp;
	2474	int dw_count;
	2475	int dw_limit;
	2476	unsigned int reusable = 0;
	2477
	2478
	2479	/*
	2480	* Examine each page in the chunk. The variable 'p' is the page number relative to the start of the
	2481	* chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may
	2482	* have pages marked as having been processed already. We stop the loop early if we find we've handled
	2483	* all the pages in the chunk.
	2484	*/
	2485
	2486	dwp = &dw_array[0];
	2487	dw_count = 0;
	2488	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
	2489
	2490	for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) {
	2491
	2492	/*
	2493	* If this offset has already been found and handled in a higher level object, then don't
	2494	* do anything with it in the current shadow object.
	2495	*/
	2496
	2497	if (PAGE_ALREADY_HANDLED(*chunk_state, p))
	2498	continue;
	2499
	2500	/*
	2501	* See if the page at this offset is around. First check to see if the page is resident,
	2502	* then if not, check the existence map or with the pager.
	2503	*/
	2504
	2505	if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
	2506
	2507	/*
	2508	* We found a page we were looking for. Mark it as "handled" now in the chunk_state
	2509	* so that we won't bother looking for a page at this offset again if there are more
	2510	* shadow objects. Then deactivate the page.
	2511	*/
	2512
	2513	MARK_PAGE_HANDLED(*chunk_state, p);
	2514
	2515	if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) {
	2516	int clear_refmod;
	2517
	2518	clear_refmod = VM_MEM_REFERENCED;
	2519	dwp->dw_mask = DW_clear_reference;
	2520
	2521	if ((kill_page) && (object->internal)) {
	2522	m->precious = FALSE;
	2523	m->dirty = FALSE;
	2524
	2525	clear_refmod \|= VM_MEM_MODIFIED;
	2526	if (m->throttled) {
	2527	/*
	2528	* This page is now clean and
	2529	* reclaimable. Move it out
	2530	* of the throttled queue, so
	2531	* that vm_pageout_scan() can
	2532	* find it.
	2533	*/
	2534	dwp->dw_mask \|= DW_move_page;
	2535	}
	2536	#if MACH_PAGEMAP
	2537	vm_external_state_clr(object->existence_map, offset);
	2538	#endif /* MACH_PAGEMAP */
	2539
	2540	if (reusable_page && !m->reusable) {
	2541	assert(!all_reusable);
	2542	assert(!object->all_reusable);
	2543	m->reusable = TRUE;
	2544	object->reusable_page_count++;
	2545	assert(object->resident_page_count >= object->reusable_page_count);
	2546	reusable++;
	2547	}
	2548	}
	2549	pmap_clear_refmod(m->phys_page, clear_refmod);
	2550
	2551	if (!m->throttled && !(reusable_page \|\| all_reusable))
	2552	dwp->dw_mask \|= DW_move_page;
	2553
	2554	VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
	2555
	2556	if (dw_count >= dw_limit) {
	2557	if (reusable) {
	2558	OSAddAtomic(reusable,
	2559	&vm_page_stats_reusable.reusable_count);
	2560	vm_page_stats_reusable.reusable += reusable;
	2561	reusable = 0;
	2562	}
	2563	vm_page_do_delayed_work(object, &dw_array[0], dw_count);
	2564
	2565	dwp = &dw_array[0];
	2566	dw_count = 0;
	2567	}
	2568	}
	2569
	2570	} else {
	2571
	2572	/*
	2573	* The page at this offset isn't memory resident, check to see if it's
	2574	* been paged out. If so, mark it as handled so we don't bother looking
	2575	* for it in the shadow chain.
	2576	*/
	2577
	2578	if (page_is_paged_out(object, offset)) {
	2579	MARK_PAGE_HANDLED(*chunk_state, p);
	2580
	2581	/*
	2582	* If we're killing a non-resident page, then clear the page in the existence
	2583	* map so we don't bother paging it back in if it's touched again in the future.
	2584	*/
	2585
	2586	if ((kill_page) && (object->internal)) {
	2587	#if MACH_PAGEMAP
	2588	vm_external_state_clr(object->existence_map, offset);
	2589	#endif /* MACH_PAGEMAP */
	2590	}
	2591	}
	2592	}
	2593	}
	2594
	2595	if (reusable) {
	2596	OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count);
	2597	vm_page_stats_reusable.reusable += reusable;
	2598	reusable = 0;
	2599	}
	2600
	2601	if (dw_count)
	2602	vm_page_do_delayed_work(object, &dw_array[0], dw_count);
	2603	}
	2604
	2605
	2606	/*
	2607	* Deactive a "chunk" of the given range of the object starting at offset. A "chunk"
	2608	* will always be less than or equal to the given size. The total range is divided up
	2609	* into chunks for efficiency and performance related to the locks and handling the shadow
	2610	* chain. This routine returns how much of the given "size" it actually processed. It's
	2611	* up to the caler to loop and keep calling this routine until the entire range they want
	2612	* to process has been done.
	2613	*/
	2614
	2615	static vm_object_size_t
	2616	deactivate_a_chunk(
	2617	vm_object_t orig_object,
	2618	vm_object_offset_t offset,
	2619	vm_object_size_t size,
	2620	boolean_t kill_page,
	2621	boolean_t reusable_page,
	2622	boolean_t all_reusable)
	2623	{
	2624	vm_object_t object;
	2625	vm_object_t tmp_object;
	2626	vm_object_size_t length;
	2627	chunk_state_t chunk_state;
	2628
	2629
	2630	/*
	2631	* Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the
	2632	* remaining size the caller asked for.
	2633	*/
	2634
	2635	length = MIN(size, CHUNK_SIZE);
	2636
	2637	/*
	2638	* The chunk_state keeps track of which pages we've already processed if there's
	2639	* a shadow chain on this object. At this point, we haven't done anything with this
	2640	* range of pages yet, so initialize the state to indicate no pages processed yet.
	2641	*/
	2642
	2643	CHUNK_INIT(chunk_state, length);
	2644	object = orig_object;
	2645
	2646	/*
	2647	* Start at the top level object and iterate around the loop once for each object
	2648	* in the shadow chain. We stop processing early if we've already found all the pages
	2649	* in the range. Otherwise we stop when we run out of shadow objects.
	2650	*/
	2651
	2652	while (object && CHUNK_NOT_COMPLETE(chunk_state)) {
	2653	vm_object_paging_begin(object);
	2654
	2655	deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state);
	2656
	2657	vm_object_paging_end(object);
	2658
	2659	/*
	2660	* We've finished with this object, see if there's a shadow object. If
	2661	* there is, update the offset and lock the new object. We also turn off
	2662	* kill_page at this point since we only kill pages in the top most object.
	2663	*/
	2664
	2665	tmp_object = object->shadow;
	2666
	2667	if (tmp_object) {
	2668	kill_page = FALSE;
	2669	reusable_page = FALSE;
	2670	all_reusable = FALSE;
	2671	offset += object->vo_shadow_offset;
	2672	vm_object_lock(tmp_object);
	2673	}
	2674
	2675	if (object != orig_object)
	2676	vm_object_unlock(object);
	2677
	2678	object = tmp_object;
	2679	}
	2680
	2681	if (object && object != orig_object)
	2682	vm_object_unlock(object);
	2683
	2684	return length;
	2685	}
	2686
	2687
	2688
	2689	/*
	2690	* Move any resident pages in the specified range to the inactive queue. If kill_page is set,
	2691	* we also clear the modified status of the page and "forget" any changes that have been made
	2692	* to the page.
	2693	*/
	2694
	2695	__private_extern__ void
	2696	vm_object_deactivate_pages(
	2697	vm_object_t object,
	2698	vm_object_offset_t offset,
	2699	vm_object_size_t size,
	2700	boolean_t kill_page,
	2701	boolean_t reusable_page)
	2702	{
	2703	vm_object_size_t length;
	2704	boolean_t all_reusable;
	2705
	2706	/*
	2707	* We break the range up into chunks and do one chunk at a time. This is for
	2708	* efficiency and performance while handling the shadow chains and the locks.
	2709	* The deactivate_a_chunk() function returns how much of the range it processed.
	2710	* We keep calling this routine until the given size is exhausted.
	2711	*/
	2712
	2713
	2714	all_reusable = FALSE;
	2715	if (reusable_page &&
	2716	object->internal &&
	2717	object->vo_size != 0 &&
	2718	object->vo_size == size &&
	2719	object->reusable_page_count == 0) {
	2720	all_reusable = TRUE;
	2721	reusable_page = FALSE;
	2722	}
	2723
	2724	if ((reusable_page \|\| all_reusable) && object->all_reusable) {
	2725	/* This means MADV_FREE_REUSABLE has been called twice, which
	2726	* is probably illegal. */
	2727	return;
	2728	}
	2729
	2730	while (size) {
	2731	length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable);
	2732
	2733	size -= length;
	2734	offset += length;
	2735	}
	2736
	2737	if (all_reusable) {
	2738	if (!object->all_reusable) {
	2739	unsigned int reusable;
	2740
	2741	object->all_reusable = TRUE;
	2742	assert(object->reusable_page_count == 0);
	2743	/* update global stats */
	2744	reusable = object->resident_page_count;
	2745	OSAddAtomic(reusable,
	2746	&vm_page_stats_reusable.reusable_count);
	2747	vm_page_stats_reusable.reusable += reusable;
	2748	vm_page_stats_reusable.all_reusable_calls++;
	2749	}
	2750	} else if (reusable_page) {
	2751	vm_page_stats_reusable.partial_reusable_calls++;
	2752	}
	2753	}
	2754
	2755	void
	2756	vm_object_reuse_pages(
	2757	vm_object_t object,
	2758	vm_object_offset_t start_offset,
	2759	vm_object_offset_t end_offset,
	2760	boolean_t allow_partial_reuse)
	2761	{
	2762	vm_object_offset_t cur_offset;
	2763	vm_page_t m;
	2764	unsigned int reused, reusable;
	2765
	2766	#define VM_OBJECT_REUSE_PAGE(object, m, reused) \
	2767	MACRO_BEGIN \
	2768	if ((m) != VM_PAGE_NULL && \
	2769	(m)->reusable) { \
	2770	assert((object)->reusable_page_count <= \
	2771	(object)->resident_page_count); \
	2772	assert((object)->reusable_page_count > 0); \
	2773	(object)->reusable_page_count--; \
	2774	(m)->reusable = FALSE; \
	2775	(reused)++; \
	2776	} \
	2777	MACRO_END
	2778
	2779	reused = 0;
	2780	reusable = 0;
	2781
	2782	vm_object_lock_assert_exclusive(object);
	2783
	2784	if (object->all_reusable) {
	2785	assert(object->reusable_page_count == 0);
	2786	object->all_reusable = FALSE;
	2787	if (end_offset - start_offset == object->vo_size \|\|
	2788	!allow_partial_reuse) {
	2789	vm_page_stats_reusable.all_reuse_calls++;
	2790	reused = object->resident_page_count;
	2791	} else {
	2792	vm_page_stats_reusable.partial_reuse_calls++;
	2793	queue_iterate(&object->memq, m, vm_page_t, listq) {
	2794	if (m->offset < start_offset \|\|
	2795	m->offset >= end_offset) {
	2796	m->reusable = TRUE;
	2797	object->reusable_page_count++;
	2798	assert(object->resident_page_count >= object->reusable_page_count);
	2799	continue;
	2800	} else {
	2801	assert(!m->reusable);
	2802	reused++;
	2803	}
	2804	}
	2805	}
	2806	} else if (object->resident_page_count >
	2807	((end_offset - start_offset) >> PAGE_SHIFT)) {
	2808	vm_page_stats_reusable.partial_reuse_calls++;
	2809	for (cur_offset = start_offset;
	2810	cur_offset < end_offset;
	2811	cur_offset += PAGE_SIZE_64) {
	2812	if (object->reusable_page_count == 0) {
	2813	break;
	2814	}
	2815	m = vm_page_lookup(object, cur_offset);
	2816	VM_OBJECT_REUSE_PAGE(object, m, reused);
	2817	}
	2818	} else {
	2819	vm_page_stats_reusable.partial_reuse_calls++;
	2820	queue_iterate(&object->memq, m, vm_page_t, listq) {
	2821	if (object->reusable_page_count == 0) {
	2822	break;
	2823	}
	2824	if (m->offset < start_offset \|\|
	2825	m->offset >= end_offset) {
	2826	continue;
	2827	}
	2828	VM_OBJECT_REUSE_PAGE(object, m, reused);
	2829	}
	2830	}
	2831
	2832	/* update global stats */
	2833	OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count);
	2834	vm_page_stats_reusable.reused += reused;
	2835	vm_page_stats_reusable.reusable += reusable;
	2836	}
	2837
	2838	/*
	2839	* Routine: vm_object_pmap_protect
	2840	*
	2841	* Purpose:
	2842	* Reduces the permission for all physical
	2843	* pages in the specified object range.
	2844	*
	2845	* If removing write permission only, it is
	2846	* sufficient to protect only the pages in
	2847	* the top-level object; only those pages may
	2848	* have write permission.
	2849	*
	2850	* If removing all access, we must follow the
	2851	* shadow chain from the top-level object to
	2852	* remove access to all pages in shadowed objects.
	2853	*
	2854	* The object must not be locked. The object must
	2855	* be temporary/internal.
	2856	*
	2857	* If pmap is not NULL, this routine assumes that
	2858	* the only mappings for the pages are in that
	2859	* pmap.
	2860	*/
	2861
	2862	__private_extern__ void
	2863	vm_object_pmap_protect(
	2864	register vm_object_t object,
	2865	register vm_object_offset_t offset,
	2866	vm_object_size_t size,
	2867	pmap_t pmap,
	2868	vm_map_offset_t pmap_start,
	2869	vm_prot_t prot)
	2870	{
	2871	if (object == VM_OBJECT_NULL)
	2872	return;
	2873	size = vm_object_round_page(size);
	2874	offset = vm_object_trunc_page(offset);
	2875
	2876	vm_object_lock(object);
	2877
	2878	if (object->phys_contiguous) {
	2879	if (pmap != NULL) {
	2880	vm_object_unlock(object);
	2881	pmap_protect(pmap, pmap_start, pmap_start + size, prot);
	2882	} else {
	2883	vm_object_offset_t phys_start, phys_end, phys_addr;
	2884
	2885	phys_start = object->vo_shadow_offset + offset;
	2886	phys_end = phys_start + size;
	2887	assert(phys_start <= phys_end);
	2888	assert(phys_end <= object->vo_shadow_offset + object->vo_size);
	2889	vm_object_unlock(object);
	2890
	2891	for (phys_addr = phys_start;
	2892	phys_addr < phys_end;
	2893	phys_addr += PAGE_SIZE_64) {
	2894	pmap_page_protect((ppnum_t) (phys_addr >> PAGE_SHIFT), prot);
	2895	}
	2896	}
	2897	return;
	2898	}
	2899
	2900	assert(object->internal);
	2901
	2902	while (TRUE) {
	2903	if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
	2904	vm_object_unlock(object);
	2905	pmap_protect(pmap, pmap_start, pmap_start + size, prot);
	2906	return;
	2907	}
	2908
	2909	/* if we are doing large ranges with respect to resident */
	2910	/* page count then we should interate over pages otherwise */
	2911	/* inverse page look-up will be faster */
	2912	if (ptoa_64(object->resident_page_count / 4) < size) {
	2913	vm_page_t p;
	2914	vm_object_offset_t end;
	2915
	2916	end = offset + size;
	2917
	2918	if (pmap != PMAP_NULL) {
	2919	queue_iterate(&object->memq, p, vm_page_t, listq) {
	2920	if (!p->fictitious &&
	2921	(offset <= p->offset) && (p->offset < end)) {
	2922	vm_map_offset_t start;
	2923
	2924	start = pmap_start + p->offset - offset;
	2925	pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
	2926	}
	2927	}
	2928	} else {
	2929	queue_iterate(&object->memq, p, vm_page_t, listq) {
	2930	if (!p->fictitious &&
	2931	(offset <= p->offset) && (p->offset < end)) {
	2932
	2933	pmap_page_protect(p->phys_page, prot);
	2934	}
	2935	}
	2936	}
	2937	} else {
	2938	vm_page_t p;
	2939	vm_object_offset_t end;
	2940	vm_object_offset_t target_off;
	2941
	2942	end = offset + size;
	2943
	2944	if (pmap != PMAP_NULL) {
	2945	for(target_off = offset;
	2946	target_off < end;
	2947	target_off += PAGE_SIZE) {
	2948	p = vm_page_lookup(object, target_off);
	2949	if (p != VM_PAGE_NULL) {
	2950	vm_object_offset_t start;
	2951	start = pmap_start +
	2952	(p->offset - offset);
	2953	pmap_protect(pmap, start,
	2954	start + PAGE_SIZE, prot);
	2955	}
	2956	}
	2957	} else {
	2958	for(target_off = offset;
	2959	target_off < end; target_off += PAGE_SIZE) {
	2960	p = vm_page_lookup(object, target_off);
	2961	if (p != VM_PAGE_NULL) {
	2962	pmap_page_protect(p->phys_page, prot);
	2963	}
	2964	}
	2965	}
	2966	}
	2967
	2968	if (prot == VM_PROT_NONE) {
	2969	/*
	2970	* Must follow shadow chain to remove access
	2971	* to pages in shadowed objects.
	2972	*/
	2973	register vm_object_t next_object;
	2974
	2975	next_object = object->shadow;
	2976	if (next_object != VM_OBJECT_NULL) {
	2977	offset += object->vo_shadow_offset;
	2978	vm_object_lock(next_object);
	2979	vm_object_unlock(object);
	2980	object = next_object;
	2981	}
	2982	else {
	2983	/*
	2984	* End of chain - we are done.
	2985	*/
	2986	break;
	2987	}
	2988	}
	2989	else {
	2990	/*
	2991	* Pages in shadowed objects may never have
	2992	* write permission - we may stop here.
	2993	*/
	2994	break;
	2995	}
	2996	}
	2997
	2998	vm_object_unlock(object);
	2999	}
	3000
	3001	/*
	3002	* Routine: vm_object_copy_slowly
	3003	*
	3004	* Description:
	3005	* Copy the specified range of the source
	3006	* virtual memory object without using
	3007	* protection-based optimizations (such
	3008	* as copy-on-write). The pages in the
	3009	* region are actually copied.
	3010	*
	3011	* In/out conditions:
	3012	* The caller must hold a reference and a lock
	3013	* for the source virtual memory object. The source
	3014	* object will be returned unlocked.
	3015	*
	3016	* Results:
	3017	* If the copy is completed successfully, KERN_SUCCESS is
	3018	* returned. If the caller asserted the interruptible
	3019	* argument, and an interruption occurred while waiting
	3020	* for a user-generated event, MACH_SEND_INTERRUPTED is
	3021	* returned. Other values may be returned to indicate
	3022	* hard errors during the copy operation.
	3023	*
	3024	* A new virtual memory object is returned in a
	3025	* parameter (_result_object). The contents of this
	3026	* new object, starting at a zero offset, are a copy
	3027	* of the source memory region. In the event of
	3028	* an error, this parameter will contain the value
	3029	* VM_OBJECT_NULL.
	3030	*/
	3031	__private_extern__ kern_return_t
	3032	vm_object_copy_slowly(
	3033	register vm_object_t src_object,
	3034	vm_object_offset_t src_offset,
	3035	vm_object_size_t size,
	3036	boolean_t interruptible,
	3037	vm_object_t _result_object) / OUT */
	3038	{
	3039	vm_object_t new_object;
	3040	vm_object_offset_t new_offset;
	3041
	3042	struct vm_object_fault_info fault_info;
	3043
	3044	XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
	3045	src_object, src_offset, size, 0, 0);
	3046
	3047	if (size == 0) {
	3048	vm_object_unlock(src_object);
	3049	*_result_object = VM_OBJECT_NULL;
	3050	return(KERN_INVALID_ARGUMENT);
	3051	}
	3052
	3053	/*
	3054	* Prevent destruction of the source object while we copy.
	3055	*/
	3056
	3057	vm_object_reference_locked(src_object);
	3058	vm_object_unlock(src_object);
	3059
	3060	/*
	3061	* Create a new object to hold the copied pages.
	3062	* A few notes:
	3063	* We fill the new object starting at offset 0,
	3064	* regardless of the input offset.
	3065	* We don't bother to lock the new object within
	3066	* this routine, since we have the only reference.
	3067	*/
	3068
	3069	new_object = vm_object_allocate(size);
	3070	new_offset = 0;
	3071
	3072	assert(size == trunc_page_64(size)); /* Will the loop terminate? */
	3073
	3074	fault_info.interruptible = interruptible;
	3075	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
	3076	fault_info.user_tag = 0;
	3077	fault_info.lo_offset = src_offset;
	3078	fault_info.hi_offset = src_offset + size;
	3079	fault_info.no_cache = FALSE;
	3080	fault_info.stealth = TRUE;
	3081	fault_info.io_sync = FALSE;
	3082	fault_info.cs_bypass = FALSE;
	3083	fault_info.mark_zf_absent = FALSE;
	3084	fault_info.batch_pmap_op = FALSE;
	3085
	3086	for ( ;
	3087	size != 0 ;
	3088	src_offset += PAGE_SIZE_64,
	3089	new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
	3090	) {
	3091	vm_page_t new_page;
	3092	vm_fault_return_t result;
	3093
	3094	vm_object_lock(new_object);
	3095
	3096	while ((new_page = vm_page_alloc(new_object, new_offset))
	3097	== VM_PAGE_NULL) {
	3098
	3099	vm_object_unlock(new_object);
	3100
	3101	if (!vm_page_wait(interruptible)) {
	3102	vm_object_deallocate(new_object);
	3103	vm_object_deallocate(src_object);
	3104	*_result_object = VM_OBJECT_NULL;
	3105	return(MACH_SEND_INTERRUPTED);
	3106	}
	3107	vm_object_lock(new_object);
	3108	}
	3109	vm_object_unlock(new_object);
	3110
	3111	do {
	3112	vm_prot_t prot = VM_PROT_READ;
	3113	vm_page_t _result_page;
	3114	vm_page_t top_page;
	3115	register
	3116	vm_page_t result_page;
	3117	kern_return_t error_code;
	3118
	3119	vm_object_lock(src_object);
	3120	vm_object_paging_begin(src_object);
	3121
	3122	if (size > (vm_size_t) -1) {
	3123	/* 32-bit overflow */
	3124	fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE);
	3125	} else {
	3126	fault_info.cluster_size = (vm_size_t) size;
	3127	assert(fault_info.cluster_size == size);
	3128	}
	3129
	3130	XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
	3131	result = vm_fault_page(src_object, src_offset,
	3132	VM_PROT_READ, FALSE,
	3133	&prot, &_result_page, &top_page,
	3134	(int *)0,
	3135	&error_code, FALSE, FALSE, &fault_info);
	3136
	3137	switch(result) {
	3138	case VM_FAULT_SUCCESS:
	3139	result_page = _result_page;
	3140
	3141	/*
	3142	* Copy the page to the new object.
	3143	*
	3144	* POLICY DECISION:
	3145	* If result_page is clean,
	3146	* we could steal it instead
	3147	* of copying.
	3148	*/
	3149
	3150	vm_page_copy(result_page, new_page);
	3151	vm_object_unlock(result_page->object);
	3152
	3153	/*
	3154	* Let go of both pages (make them
	3155	* not busy, perform wakeup, activate).
	3156	*/
	3157	vm_object_lock(new_object);
	3158	SET_PAGE_DIRTY(new_page, FALSE);
	3159	PAGE_WAKEUP_DONE(new_page);
	3160	vm_object_unlock(new_object);
	3161
	3162	vm_object_lock(result_page->object);
	3163	PAGE_WAKEUP_DONE(result_page);
	3164
	3165	vm_page_lockspin_queues();
	3166	if (!result_page->active &&
	3167	!result_page->inactive &&
	3168	!result_page->throttled)
	3169	vm_page_activate(result_page);
	3170	vm_page_activate(new_page);
	3171	vm_page_unlock_queues();
	3172
	3173	/*
	3174	* Release paging references and
	3175	* top-level placeholder page, if any.
	3176	*/
	3177
	3178	vm_fault_cleanup(result_page->object,
	3179	top_page);
	3180
	3181	break;
	3182
	3183	case VM_FAULT_RETRY:
	3184	break;
	3185
	3186	case VM_FAULT_MEMORY_SHORTAGE:
	3187	if (vm_page_wait(interruptible))
	3188	break;
	3189	/* fall thru */
	3190
	3191	case VM_FAULT_INTERRUPTED:
	3192	vm_object_lock(new_object);
	3193	VM_PAGE_FREE(new_page);
	3194	vm_object_unlock(new_object);
	3195
	3196	vm_object_deallocate(new_object);
	3197	vm_object_deallocate(src_object);
	3198	*_result_object = VM_OBJECT_NULL;
	3199	return(MACH_SEND_INTERRUPTED);
	3200
	3201	case VM_FAULT_SUCCESS_NO_VM_PAGE:
	3202	/* success but no VM page: fail */
	3203	vm_object_paging_end(src_object);
	3204	vm_object_unlock(src_object);
	3205	/FALLTHROUGH/
	3206	case VM_FAULT_MEMORY_ERROR:
	3207	/*
	3208	* A policy choice:
	3209	* (a) ignore pages that we can't
	3210	* copy
	3211	* (b) return the null object if
	3212	* any page fails [chosen]
	3213	*/
	3214
	3215	vm_object_lock(new_object);
	3216	VM_PAGE_FREE(new_page);
	3217	vm_object_unlock(new_object);
	3218
	3219	vm_object_deallocate(new_object);
	3220	vm_object_deallocate(src_object);
	3221	*_result_object = VM_OBJECT_NULL;
	3222	return(error_code ? error_code:
	3223	KERN_MEMORY_ERROR);
	3224
	3225	default:
	3226	panic("vm_object_copy_slowly: unexpected error"
	3227	" 0x%x from vm_fault_page()\n", result);
	3228	}
	3229	} while (result != VM_FAULT_SUCCESS);
	3230	}
	3231
	3232	/*
	3233	* Lose the extra reference, and return our object.
	3234	*/
	3235	vm_object_deallocate(src_object);
	3236	*_result_object = new_object;
	3237	return(KERN_SUCCESS);
	3238	}
	3239
	3240	/*
	3241	* Routine: vm_object_copy_quickly
	3242	*
	3243	* Purpose:
	3244	* Copy the specified range of the source virtual
	3245	* memory object, if it can be done without waiting
	3246	* for user-generated events.
	3247	*
	3248	* Results:
	3249	* If the copy is successful, the copy is returned in
	3250	* the arguments; otherwise, the arguments are not
	3251	* affected.
	3252	*
	3253	* In/out conditions:
	3254	* The object should be unlocked on entry and exit.
	3255	*/
	3256
	3257	/ARGSUSED/
	3258	__private_extern__ boolean_t
	3259	vm_object_copy_quickly(
	3260	vm_object_t _object, / INOUT */
	3261	__unused vm_object_offset_t offset, /* IN */
	3262	__unused vm_object_size_t size, /* IN */
	3263	boolean_t _src_needs_copy, / OUT */
	3264	boolean_t _dst_needs_copy) / OUT */
	3265	{
	3266	vm_object_t object = *_object;
	3267	memory_object_copy_strategy_t copy_strategy;
	3268
	3269	XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
	3270	*_object, offset, size, 0, 0);
	3271	if (object == VM_OBJECT_NULL) {
	3272	*_src_needs_copy = FALSE;
	3273	*_dst_needs_copy = FALSE;
	3274	return(TRUE);
	3275	}
	3276
	3277	vm_object_lock(object);
	3278
	3279	copy_strategy = object->copy_strategy;
	3280
	3281	switch (copy_strategy) {
	3282	case MEMORY_OBJECT_COPY_SYMMETRIC:
	3283
	3284	/*
	3285	* Symmetric copy strategy.
	3286	* Make another reference to the object.
	3287	* Leave object/offset unchanged.
	3288	*/
	3289
	3290	vm_object_reference_locked(object);
	3291	object->shadowed = TRUE;
	3292	vm_object_unlock(object);
	3293
	3294	/*
	3295	* Both source and destination must make
	3296	* shadows, and the source must be made
	3297	* read-only if not already.
	3298	*/
	3299
	3300	*_src_needs_copy = TRUE;
	3301	*_dst_needs_copy = TRUE;
	3302
	3303	break;
	3304
	3305	case MEMORY_OBJECT_COPY_DELAY:
	3306	vm_object_unlock(object);
	3307	return(FALSE);
	3308
	3309	default:
	3310	vm_object_unlock(object);
	3311	return(FALSE);
	3312	}
	3313	return(TRUE);
	3314	}
	3315
	3316	static int copy_call_count = 0;
	3317	static int copy_call_sleep_count = 0;
	3318	static int copy_call_restart_count = 0;
	3319
	3320	/*
	3321	* Routine: vm_object_copy_call [internal]
	3322	*
	3323	* Description:
	3324	* Copy the source object (src_object), using the
	3325	* user-managed copy algorithm.
	3326	*
	3327	* In/out conditions:
	3328	* The source object must be locked on entry. It
	3329	* will be unlocked on exit.
	3330	*
	3331	* Results:
	3332	* If the copy is successful, KERN_SUCCESS is returned.
	3333	* A new object that represents the copied virtual
	3334	* memory is returned in a parameter (*_result_object).
	3335	* If the return value indicates an error, this parameter
	3336	* is not valid.
	3337	*/
	3338	static kern_return_t
	3339	vm_object_copy_call(
	3340	vm_object_t src_object,
	3341	vm_object_offset_t src_offset,
	3342	vm_object_size_t size,
	3343	vm_object_t _result_object) / OUT */
	3344	{
	3345	kern_return_t kr;
	3346	vm_object_t copy;
	3347	boolean_t check_ready = FALSE;
	3348	uint32_t try_failed_count = 0;
	3349
	3350	/*
	3351	* If a copy is already in progress, wait and retry.
	3352	*
	3353	* XXX
	3354	* Consider making this call interruptable, as Mike
	3355	* intended it to be.
	3356	*
	3357	* XXXO
	3358	* Need a counter or version or something to allow
	3359	* us to use the copy that the currently requesting
	3360	* thread is obtaining -- is it worth adding to the
	3361	* vm object structure? Depends how common this case it.
	3362	*/
	3363	copy_call_count++;
	3364	while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
	3365	vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
	3366	THREAD_UNINT);
	3367	copy_call_restart_count++;
	3368	}
	3369
	3370	/*
	3371	* Indicate (for the benefit of memory_object_create_copy)
	3372	* that we want a copy for src_object. (Note that we cannot
	3373	* do a real assert_wait before calling memory_object_copy,
	3374	* so we simply set the flag.)
	3375	*/
	3376
	3377	vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
	3378	vm_object_unlock(src_object);
	3379
	3380	/*
	3381	* Ask the memory manager to give us a memory object
	3382	* which represents a copy of the src object.
	3383	* The memory manager may give us a memory object
	3384	* which we already have, or it may give us a
	3385	* new memory object. This memory object will arrive
	3386	* via memory_object_create_copy.
	3387	*/
	3388
	3389	kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
	3390	if (kr != KERN_SUCCESS) {
	3391	return kr;
	3392	}
	3393
	3394	/*
	3395	* Wait for the copy to arrive.
	3396	*/
	3397	vm_object_lock(src_object);
	3398	while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
	3399	vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
	3400	THREAD_UNINT);
	3401	copy_call_sleep_count++;
	3402	}
	3403	Retry:
	3404	assert(src_object->copy != VM_OBJECT_NULL);
	3405	copy = src_object->copy;
	3406	if (!vm_object_lock_try(copy)) {
	3407	vm_object_unlock(src_object);
	3408
	3409	try_failed_count++;
	3410	mutex_pause(try_failed_count); /* wait a bit */
	3411
	3412	vm_object_lock(src_object);
	3413	goto Retry;
	3414	}
	3415	if (copy->vo_size < src_offset+size)
	3416	copy->vo_size = src_offset+size;
	3417
	3418	if (!copy->pager_ready)
	3419	check_ready = TRUE;
	3420
	3421	/*
	3422	* Return the copy.
	3423	*/
	3424	*_result_object = copy;
	3425	vm_object_unlock(copy);
	3426	vm_object_unlock(src_object);
	3427
	3428	/* Wait for the copy to be ready. */
	3429	if (check_ready == TRUE) {
	3430	vm_object_lock(copy);
	3431	while (!copy->pager_ready) {
	3432	vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
	3433	}
	3434	vm_object_unlock(copy);
	3435	}
	3436
	3437	return KERN_SUCCESS;
	3438	}
	3439
	3440	static int copy_delayed_lock_collisions = 0;
	3441	static int copy_delayed_max_collisions = 0;
	3442	static int copy_delayed_lock_contention = 0;
	3443	static int copy_delayed_protect_iterate = 0;
	3444
	3445	/*
	3446	* Routine: vm_object_copy_delayed [internal]
	3447	*
	3448	* Description:
	3449	* Copy the specified virtual memory object, using
	3450	* the asymmetric copy-on-write algorithm.
	3451	*
	3452	* In/out conditions:
	3453	* The src_object must be locked on entry. It will be unlocked
	3454	* on exit - so the caller must also hold a reference to it.
	3455	*
	3456	* This routine will not block waiting for user-generated
	3457	* events. It is not interruptible.
	3458	*/
	3459	__private_extern__ vm_object_t
	3460	vm_object_copy_delayed(
	3461	vm_object_t src_object,
	3462	vm_object_offset_t src_offset,
	3463	vm_object_size_t size,
	3464	boolean_t src_object_shared)
	3465	{
	3466	vm_object_t new_copy = VM_OBJECT_NULL;
	3467	vm_object_t old_copy;
	3468	vm_page_t p;
	3469	vm_object_size_t copy_size = src_offset + size;
	3470
	3471
	3472	int collisions = 0;
	3473	/*
	3474	* The user-level memory manager wants to see all of the changes
	3475	* to this object, but it has promised not to make any changes on
	3476	* its own.
	3477	*
	3478	* Perform an asymmetric copy-on-write, as follows:
	3479	* Create a new object, called a "copy object" to hold
	3480	* pages modified by the new mapping (i.e., the copy,
	3481	* not the original mapping).
	3482	* Record the original object as the backing object for
	3483	* the copy object. If the original mapping does not
	3484	* change a page, it may be used read-only by the copy.
	3485	* Record the copy object in the original object.
	3486	* When the original mapping causes a page to be modified,
	3487	* it must be copied to a new page that is "pushed" to
	3488	* the copy object.
	3489	* Mark the new mapping (the copy object) copy-on-write.
	3490	* This makes the copy object itself read-only, allowing
	3491	* it to be reused if the original mapping makes no
	3492	* changes, and simplifying the synchronization required
	3493	* in the "push" operation described above.
	3494	*
	3495	* The copy-on-write is said to be assymetric because the original
	3496	* object is not marked copy-on-write. A copied page is pushed
	3497	* to the copy object, regardless which party attempted to modify
	3498	* the page.
	3499	*
	3500	* Repeated asymmetric copy operations may be done. If the
	3501	* original object has not been changed since the last copy, its
	3502	* copy object can be reused. Otherwise, a new copy object can be
	3503	* inserted between the original object and its previous copy
	3504	* object. Since any copy object is read-only, this cannot affect
	3505	* affect the contents of the previous copy object.
	3506	*
	3507	* Note that a copy object is higher in the object tree than the
	3508	* original object; therefore, use of the copy object recorded in
	3509	* the original object must be done carefully, to avoid deadlock.
	3510	*/
	3511
	3512	Retry:
	3513
	3514	/*
	3515	* Wait for paging in progress.
	3516	*/
	3517	if (!src_object->true_share &&
	3518	(src_object->paging_in_progress != 0 \|\|
	3519	src_object->activity_in_progress != 0)) {
	3520	if (src_object_shared == TRUE) {
	3521	vm_object_unlock(src_object);
	3522	vm_object_lock(src_object);
	3523	src_object_shared = FALSE;
	3524	goto Retry;
	3525	}
	3526	vm_object_paging_wait(src_object, THREAD_UNINT);
	3527	}
	3528	/*
	3529	* See whether we can reuse the result of a previous
	3530	* copy operation.
	3531	*/
	3532
	3533	old_copy = src_object->copy;
	3534	if (old_copy != VM_OBJECT_NULL) {
	3535	int lock_granted;
	3536
	3537	/*
	3538	* Try to get the locks (out of order)
	3539	*/
	3540	if (src_object_shared == TRUE)
	3541	lock_granted = vm_object_lock_try_shared(old_copy);
	3542	else
	3543	lock_granted = vm_object_lock_try(old_copy);
	3544
	3545	if (!lock_granted) {
	3546	vm_object_unlock(src_object);
	3547
	3548	if (collisions++ == 0)
	3549	copy_delayed_lock_contention++;
	3550	mutex_pause(collisions);
	3551
	3552	/* Heisenberg Rules */
	3553	copy_delayed_lock_collisions++;
	3554
	3555	if (collisions > copy_delayed_max_collisions)
	3556	copy_delayed_max_collisions = collisions;
	3557
	3558	if (src_object_shared == TRUE)
	3559	vm_object_lock_shared(src_object);
	3560	else
	3561	vm_object_lock(src_object);
	3562
	3563	goto Retry;
	3564	}
	3565
	3566	/*
	3567	* Determine whether the old copy object has
	3568	* been modified.
	3569	*/
	3570
	3571	if (old_copy->resident_page_count == 0 &&
	3572	!old_copy->pager_created) {
	3573	/*
	3574	* It has not been modified.
	3575	*
	3576	* Return another reference to
	3577	* the existing copy-object if
	3578	* we can safely grow it (if
	3579	* needed).
	3580	*/
	3581
	3582	if (old_copy->vo_size < copy_size) {
	3583	if (src_object_shared == TRUE) {
	3584	vm_object_unlock(old_copy);
	3585	vm_object_unlock(src_object);
	3586
	3587	vm_object_lock(src_object);
	3588	src_object_shared = FALSE;
	3589	goto Retry;
	3590	}
	3591	/*
	3592	* We can't perform a delayed copy if any of the
	3593	* pages in the extended range are wired (because
	3594	* we can't safely take write permission away from
	3595	* wired pages). If the pages aren't wired, then
	3596	* go ahead and protect them.
	3597	*/
	3598	copy_delayed_protect_iterate++;
	3599
	3600	queue_iterate(&src_object->memq, p, vm_page_t, listq) {
	3601	if (!p->fictitious &&
	3602	p->offset >= old_copy->vo_size &&
	3603	p->offset < copy_size) {
	3604	if (VM_PAGE_WIRED(p)) {
	3605	vm_object_unlock(old_copy);
	3606	vm_object_unlock(src_object);
	3607
	3608	if (new_copy != VM_OBJECT_NULL) {
	3609	vm_object_unlock(new_copy);
	3610	vm_object_deallocate(new_copy);
	3611	}
	3612
	3613	return VM_OBJECT_NULL;
	3614	} else {
	3615	pmap_page_protect(p->phys_page,
	3616	(VM_PROT_ALL & ~VM_PROT_WRITE));
	3617	}
	3618	}
	3619	}
	3620	old_copy->vo_size = copy_size;
	3621	}
	3622	if (src_object_shared == TRUE)
	3623	vm_object_reference_shared(old_copy);
	3624	else
	3625	vm_object_reference_locked(old_copy);
	3626	vm_object_unlock(old_copy);
	3627	vm_object_unlock(src_object);
	3628
	3629	if (new_copy != VM_OBJECT_NULL) {
	3630	vm_object_unlock(new_copy);
	3631	vm_object_deallocate(new_copy);
	3632	}
	3633	return(old_copy);
	3634	}
	3635
	3636
	3637
	3638	/*
	3639	* Adjust the size argument so that the newly-created
	3640	* copy object will be large enough to back either the
	3641	* old copy object or the new mapping.
	3642	*/
	3643	if (old_copy->vo_size > copy_size)
	3644	copy_size = old_copy->vo_size;
	3645
	3646	if (new_copy == VM_OBJECT_NULL) {
	3647	vm_object_unlock(old_copy);
	3648	vm_object_unlock(src_object);
	3649	new_copy = vm_object_allocate(copy_size);
	3650	vm_object_lock(src_object);
	3651	vm_object_lock(new_copy);
	3652
	3653	src_object_shared = FALSE;
	3654	goto Retry;
	3655	}
	3656	new_copy->vo_size = copy_size;
	3657
	3658	/*
	3659	* The copy-object is always made large enough to
	3660	* completely shadow the original object, since
	3661	* it may have several users who want to shadow
	3662	* the original object at different points.
	3663	*/
	3664
	3665	assert((old_copy->shadow == src_object) &&
	3666	(old_copy->vo_shadow_offset == (vm_object_offset_t) 0));
	3667
	3668	} else if (new_copy == VM_OBJECT_NULL) {
	3669	vm_object_unlock(src_object);
	3670	new_copy = vm_object_allocate(copy_size);
	3671	vm_object_lock(src_object);
	3672	vm_object_lock(new_copy);
	3673
	3674	src_object_shared = FALSE;
	3675	goto Retry;
	3676	}
	3677
	3678	/*
	3679	* We now have the src object locked, and the new copy object
	3680	* allocated and locked (and potentially the old copy locked).
	3681	* Before we go any further, make sure we can still perform
	3682	* a delayed copy, as the situation may have changed.
	3683	*
	3684	* Specifically, we can't perform a delayed copy if any of the
	3685	* pages in the range are wired (because we can't safely take
	3686	* write permission away from wired pages). If the pages aren't
	3687	* wired, then go ahead and protect them.
	3688	*/
	3689	copy_delayed_protect_iterate++;
	3690
	3691	queue_iterate(&src_object->memq, p, vm_page_t, listq) {
	3692	if (!p->fictitious && p->offset < copy_size) {
	3693	if (VM_PAGE_WIRED(p)) {
	3694	if (old_copy)
	3695	vm_object_unlock(old_copy);
	3696	vm_object_unlock(src_object);
	3697	vm_object_unlock(new_copy);
	3698	vm_object_deallocate(new_copy);
	3699	return VM_OBJECT_NULL;
	3700	} else {
	3701	pmap_page_protect(p->phys_page,
	3702	(VM_PROT_ALL & ~VM_PROT_WRITE));
	3703	}
	3704	}
	3705	}
	3706	if (old_copy != VM_OBJECT_NULL) {
	3707	/*
	3708	* Make the old copy-object shadow the new one.
	3709	* It will receive no more pages from the original
	3710	* object.
	3711	*/
	3712
	3713	/* remove ref. from old_copy */
	3714	vm_object_lock_assert_exclusive(src_object);
	3715	src_object->ref_count--;
	3716	assert(src_object->ref_count > 0);
	3717	vm_object_lock_assert_exclusive(old_copy);
	3718	old_copy->shadow = new_copy;
	3719	vm_object_lock_assert_exclusive(new_copy);
	3720	assert(new_copy->ref_count > 0);
	3721	new_copy->ref_count++; /* for old_copy->shadow ref. */
	3722
	3723	#if TASK_SWAPPER
	3724	if (old_copy->res_count) {
	3725	VM_OBJ_RES_INCR(new_copy);
	3726	VM_OBJ_RES_DECR(src_object);
	3727	}
	3728	#endif
	3729
	3730	vm_object_unlock(old_copy); /* done with old_copy */
	3731	}
	3732
	3733	/*
	3734	* Point the new copy at the existing object.
	3735	*/
	3736	vm_object_lock_assert_exclusive(new_copy);
	3737	new_copy->shadow = src_object;
	3738	new_copy->vo_shadow_offset = 0;
	3739	new_copy->shadowed = TRUE; /* caller must set needs_copy */
	3740
	3741	vm_object_lock_assert_exclusive(src_object);
	3742	vm_object_reference_locked(src_object);
	3743	src_object->copy = new_copy;
	3744	vm_object_unlock(src_object);
	3745	vm_object_unlock(new_copy);
	3746
	3747	XPR(XPR_VM_OBJECT,
	3748	"vm_object_copy_delayed: used copy object %X for source %X\n",
	3749	new_copy, src_object, 0, 0, 0);
	3750
	3751	return new_copy;
	3752	}
	3753
	3754	/*
	3755	* Routine: vm_object_copy_strategically
	3756	*
	3757	* Purpose:
	3758	* Perform a copy according to the source object's
	3759	* declared strategy. This operation may block,
	3760	* and may be interrupted.
	3761	*/
	3762	__private_extern__ kern_return_t
	3763	vm_object_copy_strategically(
	3764	register vm_object_t src_object,
	3765	vm_object_offset_t src_offset,
	3766	vm_object_size_t size,
	3767	vm_object_t dst_object, / OUT */
	3768	vm_object_offset_t dst_offset, / OUT */
	3769	boolean_t dst_needs_copy) / OUT */
	3770	{
	3771	boolean_t result;
	3772	boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
	3773	boolean_t object_lock_shared = FALSE;
	3774	memory_object_copy_strategy_t copy_strategy;
	3775
	3776	assert(src_object != VM_OBJECT_NULL);
	3777
	3778	copy_strategy = src_object->copy_strategy;
	3779
	3780	if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
	3781	vm_object_lock_shared(src_object);
	3782	object_lock_shared = TRUE;
	3783	} else
	3784	vm_object_lock(src_object);
	3785
	3786	/*
	3787	* The copy strategy is only valid if the memory manager
	3788	* is "ready". Internal objects are always ready.
	3789	*/
	3790
	3791	while (!src_object->internal && !src_object->pager_ready) {
	3792	wait_result_t wait_result;
	3793
	3794	if (object_lock_shared == TRUE) {
	3795	vm_object_unlock(src_object);
	3796	vm_object_lock(src_object);
	3797	object_lock_shared = FALSE;
	3798	continue;
	3799	}
	3800	wait_result = vm_object_sleep( src_object,
	3801	VM_OBJECT_EVENT_PAGER_READY,
	3802	interruptible);
	3803	if (wait_result != THREAD_AWAKENED) {
	3804	vm_object_unlock(src_object);
	3805	*dst_object = VM_OBJECT_NULL;
	3806	*dst_offset = 0;
	3807	*dst_needs_copy = FALSE;
	3808	return(MACH_SEND_INTERRUPTED);
	3809	}
	3810	}
	3811
	3812	/*
	3813	* Use the appropriate copy strategy.
	3814	*/
	3815
	3816	switch (copy_strategy) {
	3817	case MEMORY_OBJECT_COPY_DELAY:
	3818	*dst_object = vm_object_copy_delayed(src_object,
	3819	src_offset, size, object_lock_shared);
	3820	if (*dst_object != VM_OBJECT_NULL) {
	3821	*dst_offset = src_offset;
	3822	*dst_needs_copy = TRUE;
	3823	result = KERN_SUCCESS;
	3824	break;
	3825	}
	3826	vm_object_lock(src_object);
	3827	/* fall thru when delayed copy not allowed */
	3828
	3829	case MEMORY_OBJECT_COPY_NONE:
	3830	result = vm_object_copy_slowly(src_object, src_offset, size,
	3831	interruptible, dst_object);
	3832	if (result == KERN_SUCCESS) {
	3833	*dst_offset = 0;
	3834	*dst_needs_copy = FALSE;
	3835	}
	3836	break;
	3837
	3838	case MEMORY_OBJECT_COPY_CALL:
	3839	result = vm_object_copy_call(src_object, src_offset, size,
	3840	dst_object);
	3841	if (result == KERN_SUCCESS) {
	3842	*dst_offset = src_offset;
	3843	*dst_needs_copy = TRUE;
	3844	}
	3845	break;
	3846
	3847	case MEMORY_OBJECT_COPY_SYMMETRIC:
	3848	XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0);
	3849	vm_object_unlock(src_object);
	3850	result = KERN_MEMORY_RESTART_COPY;
	3851	break;
	3852
	3853	default:
	3854	panic("copy_strategically: bad strategy");
	3855	result = KERN_INVALID_ARGUMENT;
	3856	}
	3857	return(result);
	3858	}
	3859
	3860	/*
	3861	* vm_object_shadow:
	3862	*
	3863	* Create a new object which is backed by the
	3864	* specified existing object range. The source
	3865	* object reference is deallocated.
	3866	*
	3867	* The new object and offset into that object
	3868	* are returned in the source parameters.
	3869	*/
	3870	boolean_t vm_object_shadow_check = TRUE;
	3871
	3872	__private_extern__ boolean_t
	3873	vm_object_shadow(
	3874	vm_object_t object, / IN/OUT */
	3875	vm_object_offset_t offset, / IN/OUT */
	3876	vm_object_size_t length)
	3877	{
	3878	register vm_object_t source;
	3879	register vm_object_t result;
	3880
	3881	source = *object;
	3882	assert(source != VM_OBJECT_NULL);
	3883	if (source == VM_OBJECT_NULL)
	3884	return FALSE;
	3885
	3886	#if 0
	3887	/*
	3888	* XXX FBDP
	3889	* This assertion is valid but it gets triggered by Rosetta for example
	3890	* due to a combination of vm_remap() that changes a VM object's
	3891	* copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
	3892	* that then sets "needs_copy" on its map entry. This creates a
	3893	* mapping situation that VM should never see and doesn't know how to
	3894	* handle.
	3895	* It's not clear if this can create any real problem but we should
	3896	* look into fixing this, probably by having vm_protect(VM_PROT_COPY)
	3897	* do more than just set "needs_copy" to handle the copy-on-write...
	3898	* In the meantime, let's disable the assertion.
	3899	*/
	3900	assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
	3901	#endif
	3902
	3903	/*
	3904	* Determine if we really need a shadow.
	3905	*
	3906	* If the source object is larger than what we are trying
	3907	* to create, then force the shadow creation even if the
	3908	* ref count is 1. This will allow us to [potentially]
	3909	* collapse the underlying object away in the future
	3910	* (freeing up the extra data it might contain and that
	3911	* we don't need).
	3912	*/
	3913	if (vm_object_shadow_check &&
	3914	source->vo_size == length &&
	3915	source->ref_count == 1 &&
	3916	(source->shadow == VM_OBJECT_NULL \|\|
	3917	source->shadow->copy == VM_OBJECT_NULL) )
	3918	{
	3919	source->shadowed = FALSE;
	3920	return FALSE;
	3921	}
	3922
	3923	/*
	3924	* Allocate a new object with the given length
	3925	*/
	3926
	3927	if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
	3928	panic("vm_object_shadow: no object for shadowing");
	3929
	3930	/*
	3931	* The new object shadows the source object, adding
	3932	* a reference to it. Our caller changes his reference
	3933	* to point to the new object, removing a reference to
	3934	* the source object. Net result: no change of reference
	3935	* count.
	3936	*/
	3937	result->shadow = source;
	3938
	3939	/*
	3940	* Store the offset into the source object,
	3941	* and fix up the offset into the new object.
	3942	*/
	3943
	3944	result->vo_shadow_offset = *offset;
	3945
	3946	/*
	3947	* Return the new things
	3948	*/
	3949
	3950	*offset = 0;
	3951	*object = result;
	3952	return TRUE;
	3953	}
	3954
	3955	/*
	3956	* The relationship between vm_object structures and
	3957	* the memory_object requires careful synchronization.
	3958	*
	3959	* All associations are created by memory_object_create_named
	3960	* for external pagers and vm_object_pager_create for internal
	3961	* objects as follows:
	3962	*
	3963	* pager: the memory_object itself, supplied by
	3964	* the user requesting a mapping (or the kernel,
	3965	* when initializing internal objects); the
	3966	* kernel simulates holding send rights by keeping
	3967	* a port reference;
	3968	*
	3969	* pager_request:
	3970	* the memory object control port,
	3971	* created by the kernel; the kernel holds
	3972	* receive (and ownership) rights to this
	3973	* port, but no other references.
	3974	*
	3975	* When initialization is complete, the "initialized" field
	3976	* is asserted. Other mappings using a particular memory object,
	3977	* and any references to the vm_object gained through the
	3978	* port association must wait for this initialization to occur.
	3979	*
	3980	* In order to allow the memory manager to set attributes before
	3981	* requests (notably virtual copy operations, but also data or
	3982	* unlock requests) are made, a "ready" attribute is made available.
	3983	* Only the memory manager may affect the value of this attribute.
	3984	* Its value does not affect critical kernel functions, such as
	3985	* internal object initialization or destruction. [Furthermore,
	3986	* memory objects created by the kernel are assumed to be ready
	3987	* immediately; the default memory manager need not explicitly
	3988	* set the "ready" attribute.]
	3989	*
	3990	* [Both the "initialized" and "ready" attribute wait conditions
	3991	* use the "pager" field as the wait event.]
	3992	*
	3993	* The port associations can be broken down by any of the
	3994	* following routines:
	3995	* vm_object_terminate:
	3996	* No references to the vm_object remain, and
	3997	* the object cannot (or will not) be cached.
	3998	* This is the normal case, and is done even
	3999	* though one of the other cases has already been
	4000	* done.
	4001	* memory_object_destroy:
	4002	* The memory manager has requested that the
	4003	* kernel relinquish references to the memory
	4004	* object. [The memory manager may not want to
	4005	* destroy the memory object, but may wish to
	4006	* refuse or tear down existing memory mappings.]
	4007	*
	4008	* Each routine that breaks an association must break all of
	4009	* them at once. At some later time, that routine must clear
	4010	* the pager field and release the memory object references.
	4011	* [Furthermore, each routine must cope with the simultaneous
	4012	* or previous operations of the others.]
	4013	*
	4014	* In addition to the lock on the object, the vm_object_hash_lock
	4015	* governs the associations. References gained through the
	4016	* association require use of the hash lock.
	4017	*
	4018	* Because the pager field may be cleared spontaneously, it
	4019	* cannot be used to determine whether a memory object has
	4020	* ever been associated with a particular vm_object. [This
	4021	* knowledge is important to the shadow object mechanism.]
	4022	* For this reason, an additional "created" attribute is
	4023	* provided.
	4024	*
	4025	* During various paging operations, the pager reference found in the
	4026	* vm_object must be valid. To prevent this from being released,
	4027	* (other than being removed, i.e., made null), routines may use
	4028	* the vm_object_paging_begin/end routines [actually, macros].
	4029	* The implementation uses the "paging_in_progress" and "wanted" fields.
	4030	* [Operations that alter the validity of the pager values include the
	4031	* termination routines and vm_object_collapse.]
	4032	*/
	4033
	4034
	4035	/*
	4036	* Routine: vm_object_enter
	4037	* Purpose:
	4038	* Find a VM object corresponding to the given
	4039	* pager; if no such object exists, create one,
	4040	* and initialize the pager.
	4041	*/
	4042	vm_object_t
	4043	vm_object_enter(
	4044	memory_object_t pager,
	4045	vm_object_size_t size,
	4046	boolean_t internal,
	4047	boolean_t init,
	4048	boolean_t named)
	4049	{
	4050	register vm_object_t object;
	4051	vm_object_t new_object;
	4052	boolean_t must_init;
	4053	vm_object_hash_entry_t entry, new_entry;
	4054	uint32_t try_failed_count = 0;
	4055	lck_mtx_t *lck;
	4056
	4057	if (pager == MEMORY_OBJECT_NULL)
	4058	return(vm_object_allocate(size));
	4059
	4060	new_object = VM_OBJECT_NULL;
	4061	new_entry = VM_OBJECT_HASH_ENTRY_NULL;
	4062	must_init = init;
	4063
	4064	/*
	4065	* Look for an object associated with this port.
	4066	*/
	4067	Retry:
	4068	lck = vm_object_hash_lock_spin(pager);
	4069	do {
	4070	entry = vm_object_hash_lookup(pager, FALSE);
	4071
	4072	if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
	4073	if (new_object == VM_OBJECT_NULL) {
	4074	/*
	4075	* We must unlock to create a new object;
	4076	* if we do so, we must try the lookup again.
	4077	*/
	4078	vm_object_hash_unlock(lck);
	4079	assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
	4080	new_entry = vm_object_hash_entry_alloc(pager);
	4081	new_object = vm_object_allocate(size);
	4082	lck = vm_object_hash_lock_spin(pager);
	4083	} else {
	4084	/*
	4085	* Lookup failed twice, and we have something
	4086	* to insert; set the object.
	4087	*/
	4088	vm_object_hash_insert(new_entry, new_object);
	4089	entry = new_entry;
	4090	new_entry = VM_OBJECT_HASH_ENTRY_NULL;
	4091	new_object = VM_OBJECT_NULL;
	4092	must_init = TRUE;
	4093	}
	4094	} else if (entry->object == VM_OBJECT_NULL) {
	4095	/*
	4096	* If a previous object is being terminated,
	4097	* we must wait for the termination message
	4098	* to be queued (and lookup the entry again).
	4099	*/
	4100	entry->waiting = TRUE;
	4101	entry = VM_OBJECT_HASH_ENTRY_NULL;
	4102	assert_wait((event_t) pager, THREAD_UNINT);
	4103	vm_object_hash_unlock(lck);
	4104
	4105	thread_block(THREAD_CONTINUE_NULL);
	4106	lck = vm_object_hash_lock_spin(pager);
	4107	}
	4108	} while (entry == VM_OBJECT_HASH_ENTRY_NULL);
	4109
	4110	object = entry->object;
	4111	assert(object != VM_OBJECT_NULL);
	4112
	4113	if (!must_init) {
	4114	if ( !vm_object_lock_try(object)) {
	4115
	4116	vm_object_hash_unlock(lck);
	4117
	4118	try_failed_count++;
	4119	mutex_pause(try_failed_count); /* wait a bit */
	4120	goto Retry;
	4121	}
	4122	assert(!internal \|\| object->internal);
	4123	#if VM_OBJECT_CACHE
	4124	if (object->ref_count == 0) {
	4125	if ( !vm_object_cache_lock_try()) {
	4126
	4127	vm_object_hash_unlock(lck);
	4128	vm_object_unlock(object);
	4129
	4130	try_failed_count++;
	4131	mutex_pause(try_failed_count); /* wait a bit */
	4132	goto Retry;
	4133	}
	4134	XPR(XPR_VM_OBJECT_CACHE,
	4135	"vm_object_enter: removing %x from cache, head (%x, %x)\n",
	4136	object,
	4137	vm_object_cached_list.next,
	4138	vm_object_cached_list.prev, 0,0);
	4139	queue_remove(&vm_object_cached_list, object,
	4140	vm_object_t, cached_list);
	4141	vm_object_cached_count--;
	4142
	4143	vm_object_cache_unlock();
	4144	}
	4145	#endif
	4146	if (named) {
	4147	assert(!object->named);
	4148	object->named = TRUE;
	4149	}
	4150	vm_object_lock_assert_exclusive(object);
	4151	object->ref_count++;
	4152	vm_object_res_reference(object);
	4153
	4154	vm_object_hash_unlock(lck);
	4155	vm_object_unlock(object);
	4156
	4157	VM_STAT_INCR(hits);
	4158	} else
	4159	vm_object_hash_unlock(lck);
	4160
	4161	assert(object->ref_count > 0);
	4162
	4163	VM_STAT_INCR(lookups);
	4164
	4165	XPR(XPR_VM_OBJECT,
	4166	"vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
	4167	pager, object, must_init, 0, 0);
	4168
	4169	/*
	4170	* If we raced to create a vm_object but lost, let's
	4171	* throw away ours.
	4172	*/
	4173
	4174	if (new_object != VM_OBJECT_NULL)
	4175	vm_object_deallocate(new_object);
	4176
	4177	if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
	4178	vm_object_hash_entry_free(new_entry);
	4179
	4180	if (must_init) {
	4181	memory_object_control_t control;
	4182
	4183	/*
	4184	* Allocate request port.
	4185	*/
	4186
	4187	control = memory_object_control_allocate(object);
	4188	assert (control != MEMORY_OBJECT_CONTROL_NULL);
	4189
	4190	vm_object_lock(object);
	4191	assert(object != kernel_object);
	4192
	4193	/*
	4194	* Copy the reference we were given.
	4195	*/
	4196
	4197	memory_object_reference(pager);
	4198	object->pager_created = TRUE;
	4199	object->pager = pager;
	4200	object->internal = internal;
	4201	object->pager_trusted = internal;
	4202	if (!internal) {
	4203	/* copy strategy invalid until set by memory manager */
	4204	object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
	4205	}
	4206	object->pager_control = control;
	4207	object->pager_ready = FALSE;
	4208
	4209	vm_object_unlock(object);
	4210
	4211	/*
	4212	* Let the pager know we're using it.
	4213	*/
	4214
	4215	(void) memory_object_init(pager,
	4216	object->pager_control,
	4217	PAGE_SIZE);
	4218
	4219	vm_object_lock(object);
	4220	if (named)
	4221	object->named = TRUE;
	4222	if (internal) {
	4223	object->pager_ready = TRUE;
	4224	vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
	4225	}
	4226
	4227	object->pager_initialized = TRUE;
	4228	vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
	4229	} else {
	4230	vm_object_lock(object);
	4231	}
	4232
	4233	/*
	4234	* [At this point, the object must be locked]
	4235	*/
	4236
	4237	/*
	4238	* Wait for the work above to be done by the first
	4239	* thread to map this object.
	4240	*/
	4241
	4242	while (!object->pager_initialized) {
	4243	vm_object_sleep(object,
	4244	VM_OBJECT_EVENT_INITIALIZED,
	4245	THREAD_UNINT);
	4246	}
	4247	vm_object_unlock(object);
	4248
	4249	XPR(XPR_VM_OBJECT,
	4250	"vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
	4251	object, object->pager, internal, 0,0);
	4252	return(object);
	4253	}
	4254
	4255	/*
	4256	* Routine: vm_object_pager_create
	4257	* Purpose:
	4258	* Create a memory object for an internal object.
	4259	* In/out conditions:
	4260	* The object is locked on entry and exit;
	4261	* it may be unlocked within this call.
	4262	* Limitations:
	4263	* Only one thread may be performing a
	4264	* vm_object_pager_create on an object at
	4265	* a time. Presumably, only the pageout
	4266	* daemon will be using this routine.
	4267	*/
	4268
	4269	void
	4270	vm_object_pager_create(
	4271	register vm_object_t object)
	4272	{
	4273	memory_object_t pager;
	4274	vm_object_hash_entry_t entry;
	4275	lck_mtx_t *lck;
	4276	#if MACH_PAGEMAP
	4277	vm_object_size_t size;
	4278	vm_external_map_t map;
	4279	#endif /* MACH_PAGEMAP */
	4280
	4281	XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
	4282	object, 0,0,0,0);
	4283
	4284	assert(object != kernel_object);
	4285
	4286	if (memory_manager_default_check() != KERN_SUCCESS)
	4287	return;
	4288
	4289	/*
	4290	* Prevent collapse or termination by holding a paging reference
	4291	*/
	4292
	4293	vm_object_paging_begin(object);
	4294	if (object->pager_created) {
	4295	/*
	4296	* Someone else got to it first...
	4297	* wait for them to finish initializing the ports
	4298	*/
	4299	while (!object->pager_initialized) {
	4300	vm_object_sleep(object,
	4301	VM_OBJECT_EVENT_INITIALIZED,
	4302	THREAD_UNINT);
	4303	}
	4304	vm_object_paging_end(object);
	4305	return;
	4306	}
	4307
	4308	/*
	4309	* Indicate that a memory object has been assigned
	4310	* before dropping the lock, to prevent a race.
	4311	*/
	4312
	4313	object->pager_created = TRUE;
	4314	object->paging_offset = 0;
	4315
	4316	#if MACH_PAGEMAP
	4317	size = object->vo_size;
	4318	#endif /* MACH_PAGEMAP */
	4319	vm_object_unlock(object);
	4320
	4321	#if MACH_PAGEMAP
	4322	map = vm_external_create(size);
	4323	vm_object_lock(object);
	4324	assert(object->vo_size == size);
	4325	object->existence_map = map;
	4326	vm_object_unlock(object);
	4327	#endif /* MACH_PAGEMAP */
	4328
	4329	if ((uint32_t) object->vo_size != object->vo_size) {
	4330	panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n",
	4331	(uint64_t) object->vo_size);
	4332	}
	4333
	4334	/*
	4335	* Create the [internal] pager, and associate it with this object.
	4336	*
	4337	* We make the association here so that vm_object_enter()
	4338	* can look up the object to complete initializing it. No
	4339	* user will ever map this object.
	4340	*/
	4341	{
	4342	memory_object_default_t dmm;
	4343
	4344	/* acquire a reference for the default memory manager */
	4345	dmm = memory_manager_default_reference();
	4346
	4347	assert(object->temporary);
	4348
	4349	/* create our new memory object */
	4350	assert((vm_size_t) object->vo_size == object->vo_size);
	4351	(void) memory_object_create(dmm, (vm_size_t) object->vo_size,
	4352	&pager);
	4353
	4354	memory_object_default_deallocate(dmm);
	4355	}
	4356
	4357	entry = vm_object_hash_entry_alloc(pager);
	4358
	4359	lck = vm_object_hash_lock_spin(pager);
	4360	vm_object_hash_insert(entry, object);
	4361	vm_object_hash_unlock(lck);
	4362
	4363	/*
	4364	* A reference was returned by
	4365	* memory_object_create(), and it is
	4366	* copied by vm_object_enter().
	4367	*/
	4368
	4369	if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object)
	4370	panic("vm_object_pager_create: mismatch");
	4371
	4372	/*
	4373	* Drop the reference we were passed.
	4374	*/
	4375	memory_object_deallocate(pager);
	4376
	4377	vm_object_lock(object);
	4378
	4379	/*
	4380	* Release the paging reference
	4381	*/
	4382	vm_object_paging_end(object);
	4383	}
	4384
	4385	/*
	4386	* Routine: vm_object_remove
	4387	* Purpose:
	4388	* Eliminate the pager/object association
	4389	* for this pager.
	4390	* Conditions:
	4391	* The object cache must be locked.
	4392	*/
	4393	__private_extern__ void
	4394	vm_object_remove(
	4395	vm_object_t object)
	4396	{
	4397	memory_object_t pager;
	4398
	4399	if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
	4400	vm_object_hash_entry_t entry;
	4401
	4402	entry = vm_object_hash_lookup(pager, FALSE);
	4403	if (entry != VM_OBJECT_HASH_ENTRY_NULL)
	4404	entry->object = VM_OBJECT_NULL;
	4405	}
	4406
	4407	}
	4408
	4409	/*
	4410	* Global variables for vm_object_collapse():
	4411	*
	4412	* Counts for normal collapses and bypasses.
	4413	* Debugging variables, to watch or disable collapse.
	4414	*/
	4415	static long object_collapses = 0;
	4416	static long object_bypasses = 0;
	4417
	4418	static boolean_t vm_object_collapse_allowed = TRUE;
	4419	static boolean_t vm_object_bypass_allowed = TRUE;
	4420
	4421	#if MACH_PAGEMAP
	4422	static int vm_external_discarded;
	4423	static int vm_external_collapsed;
	4424	#endif
	4425
	4426	unsigned long vm_object_collapse_encrypted = 0;
	4427
	4428	/*
	4429	* Routine: vm_object_do_collapse
	4430	* Purpose:
	4431	* Collapse an object with the object backing it.
	4432	* Pages in the backing object are moved into the
	4433	* parent, and the backing object is deallocated.
	4434	* Conditions:
	4435	* Both objects and the cache are locked; the page
	4436	* queues are unlocked.
	4437	*
	4438	*/
	4439	static void
	4440	vm_object_do_collapse(
	4441	vm_object_t object,
	4442	vm_object_t backing_object)
	4443	{
	4444	vm_page_t p, pp;
	4445	vm_object_offset_t new_offset, backing_offset;
	4446	vm_object_size_t size;
	4447
	4448	vm_object_lock_assert_exclusive(object);
	4449	vm_object_lock_assert_exclusive(backing_object);
	4450
	4451	backing_offset = object->vo_shadow_offset;
	4452	size = object->vo_size;
	4453
	4454	/*
	4455	* Move all in-memory pages from backing_object
	4456	* to the parent. Pages that have been paged out
	4457	* will be overwritten by any of the parent's
	4458	* pages that shadow them.
	4459	*/
	4460
	4461	while (!queue_empty(&backing_object->memq)) {
	4462
	4463	p = (vm_page_t) queue_first(&backing_object->memq);
	4464
	4465	new_offset = (p->offset - backing_offset);
	4466
	4467	assert(!p->busy \|\| p->absent);
	4468
	4469	/*
	4470	* If the parent has a page here, or if
	4471	* this page falls outside the parent,
	4472	* dispose of it.
	4473	*
	4474	* Otherwise, move it as planned.
	4475	*/
	4476
	4477	if (p->offset < backing_offset \|\| new_offset >= size) {
	4478	VM_PAGE_FREE(p);
	4479	} else {
	4480	/*
	4481	* ENCRYPTED SWAP:
	4482	* The encryption key includes the "pager" and the
	4483	* "paging_offset". These will not change during the
	4484	* object collapse, so we can just move an encrypted
	4485	* page from one object to the other in this case.
	4486	* We can't decrypt the page here, since we can't drop
	4487	* the object lock.
	4488	*/
	4489	if (p->encrypted) {
	4490	vm_object_collapse_encrypted++;
	4491	}
	4492	pp = vm_page_lookup(object, new_offset);
	4493	if (pp == VM_PAGE_NULL) {
	4494
	4495	/*
	4496	* Parent now has no page.
	4497	* Move the backing object's page up.
	4498	*/
	4499
	4500	vm_page_rename(p, object, new_offset, TRUE);
	4501	#if MACH_PAGEMAP
	4502	} else if (pp->absent) {
	4503
	4504	/*
	4505	* Parent has an absent page...
	4506	* it's not being paged in, so
	4507	* it must really be missing from
	4508	* the parent.
	4509	*
	4510	* Throw out the absent page...
	4511	* any faults looking for that
	4512	* page will restart with the new
	4513	* one.
	4514	*/
	4515
	4516	VM_PAGE_FREE(pp);
	4517	vm_page_rename(p, object, new_offset, TRUE);
	4518	#endif /* MACH_PAGEMAP */
	4519	} else {
	4520	assert(! pp->absent);
	4521
	4522	/*
	4523	* Parent object has a real page.
	4524	* Throw away the backing object's
	4525	* page.
	4526	*/
	4527	VM_PAGE_FREE(p);
	4528	}
	4529	}
	4530	}
	4531
	4532	#if !MACH_PAGEMAP
	4533	assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL))
	4534	\|\| (!backing_object->pager_created
	4535	&& (backing_object->pager == MEMORY_OBJECT_NULL)));
	4536	#else
	4537	assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
	4538	#endif /* !MACH_PAGEMAP */
	4539
	4540	if (backing_object->pager != MEMORY_OBJECT_NULL) {
	4541	vm_object_hash_entry_t entry;
	4542
	4543	/*
	4544	* Move the pager from backing_object to object.
	4545	*
	4546	* XXX We're only using part of the paging space
	4547	* for keeps now... we ought to discard the
	4548	* unused portion.
	4549	*/
	4550
	4551	assert(!object->paging_in_progress);
	4552	assert(!object->activity_in_progress);
	4553	object->pager = backing_object->pager;
	4554
	4555	if (backing_object->hashed) {
	4556	lck_mtx_t *lck;
	4557
	4558	lck = vm_object_hash_lock_spin(backing_object->pager);
	4559	entry = vm_object_hash_lookup(object->pager, FALSE);
	4560	assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
	4561	entry->object = object;
	4562	vm_object_hash_unlock(lck);
	4563
	4564	object->hashed = TRUE;
	4565	}
	4566	object->pager_created = backing_object->pager_created;
	4567	object->pager_control = backing_object->pager_control;
	4568	object->pager_ready = backing_object->pager_ready;
	4569	object->pager_initialized = backing_object->pager_initialized;
	4570	object->paging_offset =
	4571	backing_object->paging_offset + backing_offset;
	4572	if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
	4573	memory_object_control_collapse(object->pager_control,
	4574	object);
	4575	}
	4576	}
	4577
	4578	#if MACH_PAGEMAP
	4579	/*
	4580	* If the shadow offset is 0, the use the existence map from
	4581	* the backing object if there is one. If the shadow offset is
	4582	* not zero, toss it.
	4583	*
	4584	* XXX - If the shadow offset is not 0 then a bit copy is needed
	4585	* if the map is to be salvaged. For now, we just just toss the
	4586	* old map, giving the collapsed object no map. This means that
	4587	* the pager is invoked for zero fill pages. If analysis shows
	4588	* that this happens frequently and is a performance hit, then
	4589	* this code should be fixed to salvage the map.
	4590	*/
	4591	assert(object->existence_map == VM_EXTERNAL_NULL);
	4592	if (backing_offset \|\| (size != backing_object->vo_size)) {
	4593	vm_external_discarded++;
	4594	vm_external_destroy(backing_object->existence_map,
	4595	backing_object->vo_size);
	4596	}
	4597	else {
	4598	vm_external_collapsed++;
	4599	object->existence_map = backing_object->existence_map;
	4600	}
	4601	backing_object->existence_map = VM_EXTERNAL_NULL;
	4602	#endif /* MACH_PAGEMAP */
	4603
	4604	/*
	4605	* Object now shadows whatever backing_object did.
	4606	* Note that the reference to backing_object->shadow
	4607	* moves from within backing_object to within object.
	4608	*/
	4609
	4610	assert(!object->phys_contiguous);
	4611	assert(!backing_object->phys_contiguous);
	4612	object->shadow = backing_object->shadow;
	4613	if (object->shadow) {
	4614	object->vo_shadow_offset += backing_object->vo_shadow_offset;
	4615	} else {
	4616	/* no shadow, therefore no shadow offset... */
	4617	object->vo_shadow_offset = 0;
	4618	}
	4619	assert((object->shadow == VM_OBJECT_NULL) \|\|
	4620	(object->shadow->copy != backing_object));
	4621
	4622	/*
	4623	* Discard backing_object.
	4624	*
	4625	* Since the backing object has no pages, no
	4626	* pager left, and no object references within it,
	4627	* all that is necessary is to dispose of it.
	4628	*/
	4629
	4630	assert((backing_object->ref_count == 1) &&
	4631	(backing_object->resident_page_count == 0) &&
	4632	(backing_object->paging_in_progress == 0) &&
	4633	(backing_object->activity_in_progress == 0));
	4634
	4635	backing_object->alive = FALSE;
	4636	vm_object_unlock(backing_object);
	4637
	4638	XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
	4639	backing_object, 0,0,0,0);
	4640
	4641	vm_object_lock_destroy(backing_object);
	4642
	4643	zfree(vm_object_zone, backing_object);
	4644
	4645	object_collapses++;
	4646	}
	4647
	4648	static void
	4649	vm_object_do_bypass(
	4650	vm_object_t object,
	4651	vm_object_t backing_object)
	4652	{
	4653	/*
	4654	* Make the parent shadow the next object
	4655	* in the chain.
	4656	*/
	4657
	4658	vm_object_lock_assert_exclusive(object);
	4659	vm_object_lock_assert_exclusive(backing_object);
	4660
	4661	#if TASK_SWAPPER
	4662	/*
	4663	* Do object reference in-line to
	4664	* conditionally increment shadow's
	4665	* residence count. If object is not
	4666	* resident, leave residence count
	4667	* on shadow alone.
	4668	*/
	4669	if (backing_object->shadow != VM_OBJECT_NULL) {
	4670	vm_object_lock(backing_object->shadow);
	4671	vm_object_lock_assert_exclusive(backing_object->shadow);
	4672	backing_object->shadow->ref_count++;
	4673	if (object->res_count != 0)
	4674	vm_object_res_reference(backing_object->shadow);
	4675	vm_object_unlock(backing_object->shadow);
	4676	}
	4677	#else /* TASK_SWAPPER */
	4678	vm_object_reference(backing_object->shadow);
	4679	#endif /* TASK_SWAPPER */
	4680
	4681	assert(!object->phys_contiguous);
	4682	assert(!backing_object->phys_contiguous);
	4683	object->shadow = backing_object->shadow;
	4684	if (object->shadow) {
	4685	object->vo_shadow_offset += backing_object->vo_shadow_offset;
	4686	} else {
	4687	/* no shadow, therefore no shadow offset... */
	4688	object->vo_shadow_offset = 0;
	4689	}
	4690
	4691	/*
	4692	* Backing object might have had a copy pointer
	4693	* to us. If it did, clear it.
	4694	*/
	4695	if (backing_object->copy == object) {
	4696	backing_object->copy = VM_OBJECT_NULL;
	4697	}
	4698
	4699	/*
	4700	* Drop the reference count on backing_object.
	4701	#if TASK_SWAPPER
	4702	* Since its ref_count was at least 2, it
	4703	* will not vanish; so we don't need to call
	4704	* vm_object_deallocate.
	4705	* [with a caveat for "named" objects]
	4706	*
	4707	* The res_count on the backing object is
	4708	* conditionally decremented. It's possible
	4709	* (via vm_pageout_scan) to get here with
	4710	* a "swapped" object, which has a 0 res_count,
	4711	* in which case, the backing object res_count
	4712	* is already down by one.
	4713	#else
	4714	* Don't call vm_object_deallocate unless
	4715	* ref_count drops to zero.
	4716	*
	4717	* The ref_count can drop to zero here if the
	4718	* backing object could be bypassed but not
	4719	* collapsed, such as when the backing object
	4720	* is temporary and cachable.
	4721	#endif
	4722	*/
	4723	if (backing_object->ref_count > 2 \|\|
	4724	(!backing_object->named && backing_object->ref_count > 1)) {
	4725	vm_object_lock_assert_exclusive(backing_object);
	4726	backing_object->ref_count--;
	4727	#if TASK_SWAPPER
	4728	if (object->res_count != 0)
	4729	vm_object_res_deallocate(backing_object);
	4730	assert(backing_object->ref_count > 0);
	4731	#endif /* TASK_SWAPPER */
	4732	vm_object_unlock(backing_object);
	4733	} else {
	4734
	4735	/*
	4736	* Drop locks so that we can deallocate
	4737	* the backing object.
	4738	*/
	4739
	4740	#if TASK_SWAPPER
	4741	if (object->res_count == 0) {
	4742	/* XXX get a reference for the deallocate below */
	4743	vm_object_res_reference(backing_object);
	4744	}
	4745	#endif /* TASK_SWAPPER */
	4746	/*
	4747	* vm_object_collapse (the caller of this function) is
	4748	* now called from contexts that may not guarantee that a
	4749	* valid reference is held on the object... w/o a valid
	4750	* reference, it is unsafe and unwise (you will definitely
	4751	* regret it) to unlock the object and then retake the lock
	4752	* since the object may be terminated and recycled in between.
	4753	* The "activity_in_progress" reference will keep the object
	4754	* 'stable'.
	4755	*/
	4756	vm_object_activity_begin(object);
	4757	vm_object_unlock(object);
	4758
	4759	vm_object_unlock(backing_object);
	4760	vm_object_deallocate(backing_object);
	4761
	4762	/*
	4763	* Relock object. We don't have to reverify
	4764	* its state since vm_object_collapse will
	4765	* do that for us as it starts at the
	4766	* top of its loop.
	4767	*/
	4768
	4769	vm_object_lock(object);
	4770	vm_object_activity_end(object);
	4771	}
	4772
	4773	object_bypasses++;
	4774	}
	4775
	4776
	4777	/*
	4778	* vm_object_collapse:
	4779	*
	4780	* Perform an object collapse or an object bypass if appropriate.
	4781	* The real work of collapsing and bypassing is performed in
	4782	* the routines vm_object_do_collapse and vm_object_do_bypass.
	4783	*
	4784	* Requires that the object be locked and the page queues be unlocked.
	4785	*
	4786	*/
	4787	static unsigned long vm_object_collapse_calls = 0;
	4788	static unsigned long vm_object_collapse_objects = 0;
	4789	static unsigned long vm_object_collapse_do_collapse = 0;
	4790	static unsigned long vm_object_collapse_do_bypass = 0;
	4791
	4792	__private_extern__ void
	4793	vm_object_collapse(
	4794	register vm_object_t object,
	4795	register vm_object_offset_t hint_offset,
	4796	boolean_t can_bypass)
	4797	{
	4798	register vm_object_t backing_object;
	4799	register unsigned int rcount;
	4800	register unsigned int size;
	4801	vm_object_t original_object;
	4802	int object_lock_type;
	4803	int backing_object_lock_type;
	4804
	4805	vm_object_collapse_calls++;
	4806
	4807	if (! vm_object_collapse_allowed &&
	4808	! (can_bypass && vm_object_bypass_allowed)) {
	4809	return;
	4810	}
	4811
	4812	XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
	4813	object, 0,0,0,0);
	4814
	4815	if (object == VM_OBJECT_NULL)
	4816	return;
	4817
	4818	original_object = object;
	4819
	4820	/*
	4821	* The top object was locked "exclusive" by the caller.
	4822	* In the first pass, to determine if we can collapse the shadow chain,
	4823	* take a "shared" lock on the shadow objects. If we can collapse,
	4824	* we'll have to go down the chain again with exclusive locks.
	4825	*/
	4826	object_lock_type = OBJECT_LOCK_EXCLUSIVE;
	4827	backing_object_lock_type = OBJECT_LOCK_SHARED;
	4828
	4829	retry:
	4830	object = original_object;
	4831	vm_object_lock_assert_exclusive(object);
	4832
	4833	while (TRUE) {
	4834	vm_object_collapse_objects++;
	4835	/*
	4836	* Verify that the conditions are right for either
	4837	* collapse or bypass:
	4838	*/
	4839
	4840	/*
	4841	* There is a backing object, and
	4842	*/
	4843
	4844	backing_object = object->shadow;
	4845	if (backing_object == VM_OBJECT_NULL) {
	4846	if (object != original_object) {
	4847	vm_object_unlock(object);
	4848	}
	4849	return;
	4850	}
	4851	if (backing_object_lock_type == OBJECT_LOCK_SHARED) {
	4852	vm_object_lock_shared(backing_object);
	4853	} else {
	4854	vm_object_lock(backing_object);
	4855	}
	4856
	4857	/*
	4858	* No pages in the object are currently
	4859	* being paged out, and
	4860	*/
	4861	if (object->paging_in_progress != 0 \|\|
	4862	object->activity_in_progress != 0) {
	4863	/* try and collapse the rest of the shadow chain */
	4864	if (object != original_object) {
	4865	vm_object_unlock(object);
	4866	}
	4867	object = backing_object;
	4868	object_lock_type = backing_object_lock_type;
	4869	continue;
	4870	}
	4871
	4872	/*
	4873	* ...
	4874	* The backing object is not read_only,
	4875	* and no pages in the backing object are
	4876	* currently being paged out.
	4877	* The backing object is internal.
	4878	*
	4879	*/
	4880
	4881	if (!backing_object->internal \|\|
	4882	backing_object->paging_in_progress != 0 \|\|
	4883	backing_object->activity_in_progress != 0) {
	4884	/* try and collapse the rest of the shadow chain */
	4885	if (object != original_object) {
	4886	vm_object_unlock(object);
	4887	}
	4888	object = backing_object;
	4889	object_lock_type = backing_object_lock_type;
	4890	continue;
	4891	}
	4892
	4893	/*
	4894	* The backing object can't be a copy-object:
	4895	* the shadow_offset for the copy-object must stay
	4896	* as 0. Furthermore (for the 'we have all the
	4897	* pages' case), if we bypass backing_object and
	4898	* just shadow the next object in the chain, old
	4899	* pages from that object would then have to be copied
	4900	* BOTH into the (former) backing_object and into the
	4901	* parent object.
	4902	*/
	4903	if (backing_object->shadow != VM_OBJECT_NULL &&
	4904	backing_object->shadow->copy == backing_object) {
	4905	/* try and collapse the rest of the shadow chain */
	4906	if (object != original_object) {
	4907	vm_object_unlock(object);
	4908	}
	4909	object = backing_object;
	4910	object_lock_type = backing_object_lock_type;
	4911	continue;
	4912	}
	4913
	4914	/*
	4915	* We can now try to either collapse the backing
	4916	* object (if the parent is the only reference to
	4917	* it) or (perhaps) remove the parent's reference
	4918	* to it.
	4919	*
	4920	* If there is exactly one reference to the backing
	4921	* object, we may be able to collapse it into the
	4922	* parent.
	4923	*
	4924	* If MACH_PAGEMAP is defined:
	4925	* The parent must not have a pager created for it,
	4926	* since collapsing a backing_object dumps new pages
	4927	* into the parent that its pager doesn't know about
	4928	* (and the collapse code can't merge the existence
	4929	* maps).
	4930	* Otherwise:
	4931	* As long as one of the objects is still not known
	4932	* to the pager, we can collapse them.
	4933	*/
	4934	if (backing_object->ref_count == 1 &&
	4935	(!object->pager_created
	4936	#if !MACH_PAGEMAP
	4937	\|\| !backing_object->pager_created
	4938	#endif /!MACH_PAGEMAP /
	4939	) && vm_object_collapse_allowed) {
	4940
	4941	/*
	4942	* We need the exclusive lock on the VM objects.
	4943	*/
	4944	if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) {
	4945	/*
	4946	* We have an object and its shadow locked
	4947	* "shared". We can't just upgrade the locks
	4948	* to "exclusive", as some other thread might
	4949	* also have these objects locked "shared" and
	4950	* attempt to upgrade one or the other to
	4951	* "exclusive". The upgrades would block
	4952	* forever waiting for the other "shared" locks
	4953	* to get released.
	4954	* So we have to release the locks and go
	4955	* down the shadow chain again (since it could
	4956	* have changed) with "exclusive" locking.
	4957	*/
	4958	vm_object_unlock(backing_object);
	4959	if (object != original_object)
	4960	vm_object_unlock(object);
	4961	object_lock_type = OBJECT_LOCK_EXCLUSIVE;
	4962	backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
	4963	goto retry;
	4964	}
	4965
	4966	XPR(XPR_VM_OBJECT,
	4967	"vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
	4968	backing_object, object,
	4969	backing_object->pager,
	4970	backing_object->pager_control, 0);
	4971
	4972	/*
	4973	* Collapse the object with its backing
	4974	* object, and try again with the object's
	4975	* new backing object.
	4976	*/
	4977
	4978	vm_object_do_collapse(object, backing_object);
	4979	vm_object_collapse_do_collapse++;
	4980	continue;
	4981	}
	4982
	4983	/*
	4984	* Collapsing the backing object was not possible
	4985	* or permitted, so let's try bypassing it.
	4986	*/
	4987
	4988	if (! (can_bypass && vm_object_bypass_allowed)) {
	4989	/* try and collapse the rest of the shadow chain */
	4990	if (object != original_object) {
	4991	vm_object_unlock(object);
	4992	}
	4993	object = backing_object;
	4994	object_lock_type = backing_object_lock_type;
	4995	continue;
	4996	}
	4997
	4998
	4999	/*
	5000	* If the object doesn't have all its pages present,
	5001	* we have to make sure no pages in the backing object
	5002	* "show through" before bypassing it.
	5003	*/
	5004	size = atop(object->vo_size);
	5005	rcount = object->resident_page_count;
	5006
	5007	if (rcount != size) {
	5008	vm_object_offset_t offset;
	5009	vm_object_offset_t backing_offset;
	5010	unsigned int backing_rcount;
	5011
	5012	/*
	5013	* If the backing object has a pager but no pagemap,
	5014	* then we cannot bypass it, because we don't know
	5015	* what pages it has.
	5016	*/
	5017	if (backing_object->pager_created
	5018	#if MACH_PAGEMAP
	5019	&& (backing_object->existence_map == VM_EXTERNAL_NULL)
	5020	#endif /* MACH_PAGEMAP */
	5021	) {
	5022	/* try and collapse the rest of the shadow chain */
	5023	if (object != original_object) {
	5024	vm_object_unlock(object);
	5025	}
	5026	object = backing_object;
	5027	object_lock_type = backing_object_lock_type;
	5028	continue;
	5029	}
	5030
	5031	/*
	5032	* If the object has a pager but no pagemap,
	5033	* then we cannot bypass it, because we don't know
	5034	* what pages it has.
	5035	*/
	5036	if (object->pager_created
	5037	#if MACH_PAGEMAP
	5038	&& (object->existence_map == VM_EXTERNAL_NULL)
	5039	#endif /* MACH_PAGEMAP */
	5040	) {
	5041	/* try and collapse the rest of the shadow chain */
	5042	if (object != original_object) {
	5043	vm_object_unlock(object);
	5044	}
	5045	object = backing_object;
	5046	object_lock_type = backing_object_lock_type;
	5047	continue;
	5048	}
	5049
	5050	backing_offset = object->vo_shadow_offset;
	5051	backing_rcount = backing_object->resident_page_count;
	5052
	5053	if ( (int)backing_rcount - (int)(atop(backing_object->vo_size) - size) > (int)rcount) {
	5054	/*
	5055	* we have enough pages in the backing object to guarantee that
	5056	* at least 1 of them must be 'uncovered' by a resident page
	5057	* in the object we're evaluating, so move on and
	5058	* try to collapse the rest of the shadow chain
	5059	*/
	5060	if (object != original_object) {
	5061	vm_object_unlock(object);
	5062	}
	5063	object = backing_object;
	5064	object_lock_type = backing_object_lock_type;
	5065	continue;
	5066	}
	5067
	5068	/*
	5069	* If all of the pages in the backing object are
	5070	* shadowed by the parent object, the parent
	5071	* object no longer has to shadow the backing
	5072	* object; it can shadow the next one in the
	5073	* chain.
	5074	*
	5075	* If the backing object has existence info,
	5076	* we must check examine its existence info
	5077	* as well.
	5078	*
	5079	*/
	5080
	5081	#if MACH_PAGEMAP
	5082	#define EXISTS_IN_OBJECT(obj, off, rc) \
	5083	(vm_external_state_get((obj)->existence_map, \
	5084	(vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS \|\| \
	5085	((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
	5086	#else
	5087	#define EXISTS_IN_OBJECT(obj, off, rc) \
	5088	(((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
	5089	#endif /* MACH_PAGEMAP */
	5090
	5091	/*
	5092	* Check the hint location first
	5093	* (since it is often the quickest way out of here).
	5094	*/
	5095	if (object->cow_hint != ~(vm_offset_t)0)
	5096	hint_offset = (vm_object_offset_t)object->cow_hint;
	5097	else
	5098	hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
	5099	(hint_offset - 8 * PAGE_SIZE_64) : 0;
	5100
	5101	if (EXISTS_IN_OBJECT(backing_object, hint_offset +
	5102	backing_offset, backing_rcount) &&
	5103	!EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
	5104	/* dependency right at the hint */
	5105	object->cow_hint = (vm_offset_t) hint_offset; /* atomic */
	5106	/* try and collapse the rest of the shadow chain */
	5107	if (object != original_object) {
	5108	vm_object_unlock(object);
	5109	}
	5110	object = backing_object;
	5111	object_lock_type = backing_object_lock_type;
	5112	continue;
	5113	}
	5114
	5115	/*
	5116	* If the object's window onto the backing_object
	5117	* is large compared to the number of resident
	5118	* pages in the backing object, it makes sense to
	5119	* walk the backing_object's resident pages first.
	5120	*
	5121	* NOTE: Pages may be in both the existence map and/or
	5122	* resident, so if we don't find a dependency while
	5123	* walking the backing object's resident page list
	5124	* directly, and there is an existence map, we'll have
	5125	* to run the offset based 2nd pass. Because we may
	5126	* have to run both passes, we need to be careful
	5127	* not to decrement 'rcount' in the 1st pass
	5128	*/
	5129	if (backing_rcount && backing_rcount < (size / 8)) {
	5130	unsigned int rc = rcount;
	5131	vm_page_t p;
	5132
	5133	backing_rcount = backing_object->resident_page_count;
	5134	p = (vm_page_t)queue_first(&backing_object->memq);
	5135	do {
	5136	offset = (p->offset - backing_offset);
	5137
	5138	if (offset < object->vo_size &&
	5139	offset != hint_offset &&
	5140	!EXISTS_IN_OBJECT(object, offset, rc)) {
	5141	/* found a dependency */
	5142	object->cow_hint = (vm_offset_t) offset; /* atomic */
	5143
	5144	break;
	5145	}
	5146	p = (vm_page_t) queue_next(&p->listq);
	5147
	5148	} while (--backing_rcount);
	5149
	5150	if (backing_rcount != 0 ) {
	5151	/* try and collapse the rest of the shadow chain */
	5152	if (object != original_object) {
	5153	vm_object_unlock(object);
	5154	}
	5155	object = backing_object;
	5156	object_lock_type = backing_object_lock_type;
	5157	continue;
	5158	}
	5159	}
	5160
	5161	/*
	5162	* Walk through the offsets looking for pages in the
	5163	* backing object that show through to the object.
	5164	*/
	5165	if (backing_rcount
	5166	#if MACH_PAGEMAP
	5167	\|\| backing_object->existence_map
	5168	#endif /* MACH_PAGEMAP */
	5169	) {
	5170	offset = hint_offset;
	5171
	5172	while((offset =
	5173	(offset + PAGE_SIZE_64 < object->vo_size) ?
	5174	(offset + PAGE_SIZE_64) : 0) != hint_offset) {
	5175
	5176	if (EXISTS_IN_OBJECT(backing_object, offset +
	5177	backing_offset, backing_rcount) &&
	5178	!EXISTS_IN_OBJECT(object, offset, rcount)) {
	5179	/* found a dependency */
	5180	object->cow_hint = (vm_offset_t) offset; /* atomic */
	5181	break;
	5182	}
	5183	}
	5184	if (offset != hint_offset) {
	5185	/* try and collapse the rest of the shadow chain */
	5186	if (object != original_object) {
	5187	vm_object_unlock(object);
	5188	}
	5189	object = backing_object;
	5190	object_lock_type = backing_object_lock_type;
	5191	continue;
	5192	}
	5193	}
	5194	}
	5195
	5196	/*
	5197	* We need "exclusive" locks on the 2 VM objects.
	5198	*/
	5199	if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) {
	5200	vm_object_unlock(backing_object);
	5201	if (object != original_object)
	5202	vm_object_unlock(object);
	5203	object_lock_type = OBJECT_LOCK_EXCLUSIVE;
	5204	backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
	5205	goto retry;
	5206	}
	5207
	5208	/* reset the offset hint for any objects deeper in the chain */
	5209	object->cow_hint = (vm_offset_t)0;
	5210
	5211	/*
	5212	* All interesting pages in the backing object
	5213	* already live in the parent or its pager.
	5214	* Thus we can bypass the backing object.
	5215	*/
	5216
	5217	vm_object_do_bypass(object, backing_object);
	5218	vm_object_collapse_do_bypass++;
	5219
	5220	/*
	5221	* Try again with this object's new backing object.
	5222	*/
	5223
	5224	continue;
	5225	}
	5226
	5227	if (object != original_object) {
	5228	vm_object_unlock(object);
	5229	}
	5230	}
	5231
	5232	/*
	5233	* Routine: vm_object_page_remove: [internal]
	5234	* Purpose:
	5235	* Removes all physical pages in the specified
	5236	* object range from the object's list of pages.
	5237	*
	5238	* In/out conditions:
	5239	* The object must be locked.
	5240	* The object must not have paging_in_progress, usually
	5241	* guaranteed by not having a pager.
	5242	*/
	5243	unsigned int vm_object_page_remove_lookup = 0;
	5244	unsigned int vm_object_page_remove_iterate = 0;
	5245
	5246	__private_extern__ void
	5247	vm_object_page_remove(
	5248	register vm_object_t object,
	5249	register vm_object_offset_t start,
	5250	register vm_object_offset_t end)
	5251	{
	5252	register vm_page_t p, next;
	5253
	5254	/*
	5255	* One and two page removals are most popular.
	5256	* The factor of 16 here is somewhat arbitrary.
	5257	* It balances vm_object_lookup vs iteration.
	5258	*/
	5259
	5260	if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
	5261	vm_object_page_remove_lookup++;
	5262
	5263	for (; start < end; start += PAGE_SIZE_64) {
	5264	p = vm_page_lookup(object, start);
	5265	if (p != VM_PAGE_NULL) {
	5266	assert(!p->cleaning && !p->pageout && !p->laundry);
	5267	if (!p->fictitious && p->pmapped)
	5268	pmap_disconnect(p->phys_page);
	5269	VM_PAGE_FREE(p);
	5270	}
	5271	}
	5272	} else {
	5273	vm_object_page_remove_iterate++;
	5274
	5275	p = (vm_page_t) queue_first(&object->memq);
	5276	while (!queue_end(&object->memq, (queue_entry_t) p)) {
	5277	next = (vm_page_t) queue_next(&p->listq);
	5278	if ((start <= p->offset) && (p->offset < end)) {
	5279	assert(!p->cleaning && !p->pageout && !p->laundry);
	5280	if (!p->fictitious && p->pmapped)
	5281	pmap_disconnect(p->phys_page);
	5282	VM_PAGE_FREE(p);
	5283	}
	5284	p = next;
	5285	}
	5286	}
	5287	}
	5288
	5289
	5290	/*
	5291	* Routine: vm_object_coalesce
	5292	* Function: Coalesces two objects backing up adjoining
	5293	* regions of memory into a single object.
	5294	*
	5295	* returns TRUE if objects were combined.
	5296	*
	5297	* NOTE: Only works at the moment if the second object is NULL -
	5298	* if it's not, which object do we lock first?
	5299	*
	5300	* Parameters:
	5301	* prev_object First object to coalesce
	5302	* prev_offset Offset into prev_object
	5303	* next_object Second object into coalesce
	5304	* next_offset Offset into next_object
	5305	*
	5306	* prev_size Size of reference to prev_object
	5307	* next_size Size of reference to next_object
	5308	*
	5309	* Conditions:
	5310	* The object(s) must not be locked. The map must be locked
	5311	* to preserve the reference to the object(s).
	5312	*/
	5313	static int vm_object_coalesce_count = 0;
	5314
	5315	__private_extern__ boolean_t
	5316	vm_object_coalesce(
	5317	register vm_object_t prev_object,
	5318	vm_object_t next_object,
	5319	vm_object_offset_t prev_offset,
	5320	__unused vm_object_offset_t next_offset,
	5321	vm_object_size_t prev_size,
	5322	vm_object_size_t next_size)
	5323	{
	5324	vm_object_size_t newsize;
	5325
	5326	#ifdef lint
	5327	next_offset++;
	5328	#endif /* lint */
	5329
	5330	if (next_object != VM_OBJECT_NULL) {
	5331	return(FALSE);
	5332	}
	5333
	5334	if (prev_object == VM_OBJECT_NULL) {
	5335	return(TRUE);
	5336	}
	5337
	5338	XPR(XPR_VM_OBJECT,
	5339	"vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
	5340	prev_object, prev_offset, prev_size, next_size, 0);
	5341
	5342	vm_object_lock(prev_object);
	5343
	5344	/*
	5345	* Try to collapse the object first
	5346	*/
	5347	vm_object_collapse(prev_object, prev_offset, TRUE);
	5348
	5349	/*
	5350	* Can't coalesce if pages not mapped to
	5351	* prev_entry may be in use any way:
	5352	* . more than one reference
	5353	* . paged out
	5354	* . shadows another object
	5355	* . has a copy elsewhere
	5356	* . is purgeable
	5357	* . paging references (pages might be in page-list)
	5358	*/
	5359
	5360	if ((prev_object->ref_count > 1) \|\|
	5361	prev_object->pager_created \|\|
	5362	(prev_object->shadow != VM_OBJECT_NULL) \|\|
	5363	(prev_object->copy != VM_OBJECT_NULL) \|\|
	5364	(prev_object->true_share != FALSE) \|\|
	5365	(prev_object->purgable != VM_PURGABLE_DENY) \|\|
	5366	(prev_object->paging_in_progress != 0) \|\|
	5367	(prev_object->activity_in_progress != 0)) {
	5368	vm_object_unlock(prev_object);
	5369	return(FALSE);
	5370	}
	5371
	5372	vm_object_coalesce_count++;
	5373
	5374	/*
	5375	* Remove any pages that may still be in the object from
	5376	* a previous deallocation.
	5377	*/
	5378	vm_object_page_remove(prev_object,
	5379	prev_offset + prev_size,
	5380	prev_offset + prev_size + next_size);
	5381
	5382	/*
	5383	* Extend the object if necessary.
	5384	*/
	5385	newsize = prev_offset + prev_size + next_size;
	5386	if (newsize > prev_object->vo_size) {
	5387	#if MACH_PAGEMAP
	5388	/*
	5389	* We cannot extend an object that has existence info,
	5390	* since the existence info might then fail to cover
	5391	* the entire object.
	5392	*
	5393	* This assertion must be true because the object
	5394	* has no pager, and we only create existence info
	5395	* for objects with pagers.
	5396	*/
	5397	assert(prev_object->existence_map == VM_EXTERNAL_NULL);
	5398	#endif /* MACH_PAGEMAP */
	5399	prev_object->vo_size = newsize;
	5400	}
	5401
	5402	vm_object_unlock(prev_object);
	5403	return(TRUE);
	5404	}
	5405
	5406	/*
	5407	* Attach a set of physical pages to an object, so that they can
	5408	* be mapped by mapping the object. Typically used to map IO memory.
	5409	*
	5410	* The mapping function and its private data are used to obtain the
	5411	* physical addresses for each page to be mapped.
	5412	*/
	5413	void
	5414	vm_object_page_map(
	5415	vm_object_t object,
	5416	vm_object_offset_t offset,
	5417	vm_object_size_t size,
	5418	vm_object_offset_t (map_fn)(void map_fn_data,
	5419	vm_object_offset_t offset),
	5420	void map_fn_data) / private to map_fn */
	5421	{
	5422	int64_t num_pages;
	5423	int i;
	5424	vm_page_t m;
	5425	vm_page_t old_page;
	5426	vm_object_offset_t addr;
	5427
	5428	num_pages = atop_64(size);
	5429
	5430	for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
	5431
	5432	addr = (*map_fn)(map_fn_data, offset);
	5433
	5434	while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
	5435	vm_page_more_fictitious();
	5436
	5437	vm_object_lock(object);
	5438	if ((old_page = vm_page_lookup(object, offset))
	5439	!= VM_PAGE_NULL)
	5440	{
	5441	VM_PAGE_FREE(old_page);
	5442	}
	5443
	5444	assert((ppnum_t) addr == addr);
	5445	vm_page_init(m, (ppnum_t) addr, FALSE);
	5446	/*
	5447	* private normally requires lock_queues but since we
	5448	* are initializing the page, its not necessary here
	5449	*/
	5450	m->private = TRUE; /* don`t free page */
	5451	m->wire_count = 1;
	5452	vm_page_insert(m, object, offset);
	5453
	5454	PAGE_WAKEUP_DONE(m);
	5455	vm_object_unlock(object);
	5456	}
	5457	}
	5458
	5459	kern_return_t
	5460	vm_object_populate_with_private(
	5461	vm_object_t object,
	5462	vm_object_offset_t offset,
	5463	ppnum_t phys_page,
	5464	vm_size_t size)
	5465	{
	5466	ppnum_t base_page;
	5467	vm_object_offset_t base_offset;
	5468
	5469
	5470	if (!object->private)
	5471	return KERN_FAILURE;
	5472
	5473	base_page = phys_page;
	5474
	5475	vm_object_lock(object);
	5476
	5477	if (!object->phys_contiguous) {
	5478	vm_page_t m;
	5479
	5480	if ((base_offset = trunc_page_64(offset)) != offset) {
	5481	vm_object_unlock(object);
	5482	return KERN_FAILURE;
	5483	}
	5484	base_offset += object->paging_offset;
	5485
	5486	while (size) {
	5487	m = vm_page_lookup(object, base_offset);
	5488
	5489	if (m != VM_PAGE_NULL) {
	5490	if (m->fictitious) {
	5491	if (m->phys_page != vm_page_guard_addr) {
	5492
	5493	vm_page_lockspin_queues();
	5494	m->private = TRUE;
	5495	vm_page_unlock_queues();
	5496
	5497	m->fictitious = FALSE;
	5498	m->phys_page = base_page;
	5499	}
	5500	} else if (m->phys_page != base_page) {
	5501
	5502	if ( !m->private) {
	5503	/*
	5504	* we'd leak a real page... that can't be right
	5505	*/
	5506	panic("vm_object_populate_with_private - %p not private", m);
	5507	}
	5508	if (m->pmapped) {
	5509	/*
	5510	* pmap call to clear old mapping
	5511	*/
	5512	pmap_disconnect(m->phys_page);
	5513	}
	5514	m->phys_page = base_page;
	5515	}
	5516	if (m->encrypted) {
	5517	/*
	5518	* we should never see this on a ficticious or private page
	5519	*/
	5520	panic("vm_object_populate_with_private - %p encrypted", m);
	5521	}
	5522
	5523	} else {
	5524	while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
	5525	vm_page_more_fictitious();
	5526
	5527	/*
	5528	* private normally requires lock_queues but since we
	5529	* are initializing the page, its not necessary here
	5530	*/
	5531	m->private = TRUE;
	5532	m->fictitious = FALSE;
	5533	m->phys_page = base_page;
	5534	m->unusual = TRUE;
	5535	m->busy = FALSE;
	5536
	5537	vm_page_insert(m, object, base_offset);
	5538	}
	5539	base_page++; /* Go to the next physical page */
	5540	base_offset += PAGE_SIZE;
	5541	size -= PAGE_SIZE;
	5542	}
	5543	} else {
	5544	/* NOTE: we should check the original settings here */
	5545	/* if we have a size > zero a pmap call should be made */
	5546	/* to disable the range */
	5547
	5548	/* pmap_? */
	5549
	5550	/* shadows on contiguous memory are not allowed */
	5551	/* we therefore can use the offset field */
	5552	object->vo_shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
	5553	object->vo_size = size;
	5554	}
	5555	vm_object_unlock(object);
	5556
	5557	return KERN_SUCCESS;
	5558	}
	5559
	5560	/*
	5561	* memory_object_free_from_cache:
	5562	*
	5563	* Walk the vm_object cache list, removing and freeing vm_objects
	5564	* which are backed by the pager identified by the caller, (pager_ops).
	5565	* Remove up to "count" objects, if there are that may available
	5566	* in the cache.
	5567	*
	5568	* Walk the list at most once, return the number of vm_objects
	5569	* actually freed.
	5570	*/
	5571
	5572	__private_extern__ kern_return_t
	5573	memory_object_free_from_cache(
	5574	__unused host_t host,
	5575	__unused memory_object_pager_ops_t pager_ops,
	5576	int *count)
	5577	{
	5578	#if VM_OBJECT_CACHE
	5579	int object_released = 0;
	5580
	5581	register vm_object_t object = VM_OBJECT_NULL;
	5582	vm_object_t shadow;
	5583
	5584	/*
	5585	if(host == HOST_NULL)
	5586	return(KERN_INVALID_ARGUMENT);
	5587	*/
	5588
	5589	try_again:
	5590	vm_object_cache_lock();
	5591
	5592	queue_iterate(&vm_object_cached_list, object,
	5593	vm_object_t, cached_list) {
	5594	if (object->pager &&
	5595	(pager_ops == object->pager->mo_pager_ops)) {
	5596	vm_object_lock(object);
	5597	queue_remove(&vm_object_cached_list, object,
	5598	vm_object_t, cached_list);
	5599	vm_object_cached_count--;
	5600
	5601	vm_object_cache_unlock();
	5602	/*
	5603	* Since this object is in the cache, we know
	5604	* that it is initialized and has only a pager's
	5605	* (implicit) reference. Take a reference to avoid
	5606	* recursive deallocations.
	5607	*/
	5608
	5609	assert(object->pager_initialized);
	5610	assert(object->ref_count == 0);
	5611	vm_object_lock_assert_exclusive(object);
	5612	object->ref_count++;
	5613
	5614	/*
	5615	* Terminate the object.
	5616	* If the object had a shadow, we let
	5617	* vm_object_deallocate deallocate it.
	5618	* "pageout" objects have a shadow, but
	5619	* maintain a "paging reference" rather
	5620	* than a normal reference.
	5621	* (We are careful here to limit recursion.)
	5622	*/
	5623	shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
	5624
	5625	if ((vm_object_terminate(object) == KERN_SUCCESS)
	5626	&& (shadow != VM_OBJECT_NULL)) {
	5627	vm_object_deallocate(shadow);
	5628	}
	5629
	5630	if(object_released++ == *count)
	5631	return KERN_SUCCESS;
	5632	goto try_again;
	5633	}
	5634	}
	5635	vm_object_cache_unlock();
	5636	*count = object_released;
	5637	#else
	5638	*count = 0;
	5639	#endif
	5640	return KERN_SUCCESS;
	5641	}
	5642
	5643
	5644
	5645	kern_return_t
	5646	memory_object_create_named(
	5647	memory_object_t pager,
	5648	memory_object_offset_t size,
	5649	memory_object_control_t *control)
	5650	{
	5651	vm_object_t object;
	5652	vm_object_hash_entry_t entry;
	5653	lck_mtx_t *lck;
	5654
	5655	*control = MEMORY_OBJECT_CONTROL_NULL;
	5656	if (pager == MEMORY_OBJECT_NULL)
	5657	return KERN_INVALID_ARGUMENT;
	5658
	5659	lck = vm_object_hash_lock_spin(pager);
	5660	entry = vm_object_hash_lookup(pager, FALSE);
	5661
	5662	if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
	5663	(entry->object != VM_OBJECT_NULL)) {
	5664	if (entry->object->named == TRUE)
	5665	panic("memory_object_create_named: caller already holds the right"); }
	5666	vm_object_hash_unlock(lck);
	5667
	5668	if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) {
	5669	return(KERN_INVALID_OBJECT);
	5670	}
	5671
	5672	/* wait for object (if any) to be ready */
	5673	if (object != VM_OBJECT_NULL) {
	5674	vm_object_lock(object);
	5675	object->named = TRUE;
	5676	while (!object->pager_ready) {
	5677	vm_object_sleep(object,
	5678	VM_OBJECT_EVENT_PAGER_READY,
	5679	THREAD_UNINT);
	5680	}
	5681	*control = object->pager_control;
	5682	vm_object_unlock(object);
	5683	}
	5684	return (KERN_SUCCESS);
	5685	}
	5686
	5687
	5688	/*
	5689	* Routine: memory_object_recover_named [user interface]
	5690	* Purpose:
	5691	* Attempt to recover a named reference for a VM object.
	5692	* VM will verify that the object has not already started
	5693	* down the termination path, and if it has, will optionally
	5694	* wait for that to finish.
	5695	* Returns:
	5696	* KERN_SUCCESS - we recovered a named reference on the object
	5697	* KERN_FAILURE - we could not recover a reference (object dead)
	5698	* KERN_INVALID_ARGUMENT - bad memory object control
	5699	*/
	5700	kern_return_t
	5701	memory_object_recover_named(
	5702	memory_object_control_t control,
	5703	boolean_t wait_on_terminating)
	5704	{
	5705	vm_object_t object;
	5706
	5707	object = memory_object_control_to_vm_object(control);
	5708	if (object == VM_OBJECT_NULL) {
	5709	return (KERN_INVALID_ARGUMENT);
	5710	}
	5711	restart:
	5712	vm_object_lock(object);
	5713
	5714	if (object->terminating && wait_on_terminating) {
	5715	vm_object_wait(object,
	5716	VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
	5717	THREAD_UNINT);
	5718	goto restart;
	5719	}
	5720
	5721	if (!object->alive) {
	5722	vm_object_unlock(object);
	5723	return KERN_FAILURE;
	5724	}
	5725
	5726	if (object->named == TRUE) {
	5727	vm_object_unlock(object);
	5728	return KERN_SUCCESS;
	5729	}
	5730	#if VM_OBJECT_CACHE
	5731	if ((object->ref_count == 0) && (!object->terminating)) {
	5732	if (!vm_object_cache_lock_try()) {
	5733	vm_object_unlock(object);
	5734	goto restart;
	5735	}
	5736	queue_remove(&vm_object_cached_list, object,
	5737	vm_object_t, cached_list);
	5738	vm_object_cached_count--;
	5739	XPR(XPR_VM_OBJECT_CACHE,
	5740	"memory_object_recover_named: removing %X, head (%X, %X)\n",
	5741	object,
	5742	vm_object_cached_list.next,
	5743	vm_object_cached_list.prev, 0,0);
	5744
	5745	vm_object_cache_unlock();
	5746	}
	5747	#endif
	5748	object->named = TRUE;
	5749	vm_object_lock_assert_exclusive(object);
	5750	object->ref_count++;
	5751	vm_object_res_reference(object);
	5752	while (!object->pager_ready) {
	5753	vm_object_sleep(object,
	5754	VM_OBJECT_EVENT_PAGER_READY,
	5755	THREAD_UNINT);
	5756	}
	5757	vm_object_unlock(object);
	5758	return (KERN_SUCCESS);
	5759	}
	5760
	5761
	5762	/*
	5763	* vm_object_release_name:
	5764	*
	5765	* Enforces name semantic on memory_object reference count decrement
	5766	* This routine should not be called unless the caller holds a name
	5767	* reference gained through the memory_object_create_named.
	5768	*
	5769	* If the TERMINATE_IDLE flag is set, the call will return if the
	5770	* reference count is not 1. i.e. idle with the only remaining reference
	5771	* being the name.
	5772	* If the decision is made to proceed the name field flag is set to
	5773	* false and the reference count is decremented. If the RESPECT_CACHE
	5774	* flag is set and the reference count has gone to zero, the
	5775	* memory_object is checked to see if it is cacheable otherwise when
	5776	* the reference count is zero, it is simply terminated.
	5777	*/
	5778
	5779	__private_extern__ kern_return_t
	5780	vm_object_release_name(
	5781	vm_object_t object,
	5782	int flags)
	5783	{
	5784	vm_object_t shadow;
	5785	boolean_t original_object = TRUE;
	5786
	5787	while (object != VM_OBJECT_NULL) {
	5788
	5789	vm_object_lock(object);
	5790
	5791	assert(object->alive);
	5792	if (original_object)
	5793	assert(object->named);
	5794	assert(object->ref_count > 0);
	5795
	5796	/*
	5797	* We have to wait for initialization before
	5798	* destroying or caching the object.
	5799	*/
	5800
	5801	if (object->pager_created && !object->pager_initialized) {
	5802	assert(!object->can_persist);
	5803	vm_object_assert_wait(object,
	5804	VM_OBJECT_EVENT_INITIALIZED,
	5805	THREAD_UNINT);
	5806	vm_object_unlock(object);
	5807	thread_block(THREAD_CONTINUE_NULL);
	5808	continue;
	5809	}
	5810
	5811	if (((object->ref_count > 1)
	5812	&& (flags & MEMORY_OBJECT_TERMINATE_IDLE))
	5813	\|\| (object->terminating)) {
	5814	vm_object_unlock(object);
	5815	return KERN_FAILURE;
	5816	} else {
	5817	if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
	5818	vm_object_unlock(object);
	5819	return KERN_SUCCESS;
	5820	}
	5821	}
	5822
	5823	if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
	5824	(object->ref_count == 1)) {
	5825	if (original_object)
	5826	object->named = FALSE;
	5827	vm_object_unlock(object);
	5828	/* let vm_object_deallocate push this thing into */
	5829	/* the cache, if that it is where it is bound */
	5830	vm_object_deallocate(object);
	5831	return KERN_SUCCESS;
	5832	}
	5833	VM_OBJ_RES_DECR(object);
	5834	shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
	5835
	5836	if (object->ref_count == 1) {
	5837	if (vm_object_terminate(object) != KERN_SUCCESS) {
	5838	if (original_object) {
	5839	return KERN_FAILURE;
	5840	} else {
	5841	return KERN_SUCCESS;
	5842	}
	5843	}
	5844	if (shadow != VM_OBJECT_NULL) {
	5845	original_object = FALSE;
	5846	object = shadow;
	5847	continue;
	5848	}
	5849	return KERN_SUCCESS;
	5850	} else {
	5851	vm_object_lock_assert_exclusive(object);
	5852	object->ref_count--;
	5853	assert(object->ref_count > 0);
	5854	if(original_object)
	5855	object->named = FALSE;
	5856	vm_object_unlock(object);
	5857	return KERN_SUCCESS;
	5858	}
	5859	}
	5860	/NOTREACHED/
	5861	assert(0);
	5862	return KERN_FAILURE;
	5863	}
	5864
	5865
	5866	__private_extern__ kern_return_t
	5867	vm_object_lock_request(
	5868	vm_object_t object,
	5869	vm_object_offset_t offset,
	5870	vm_object_size_t size,
	5871	memory_object_return_t should_return,
	5872	int flags,
	5873	vm_prot_t prot)
	5874	{
	5875	__unused boolean_t should_flush;
	5876
	5877	should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
	5878
	5879	XPR(XPR_MEMORY_OBJECT,
	5880	"vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
	5881	object, offset, size,
	5882	(((should_return&1)<<1)\|should_flush), prot);
	5883
	5884	/*
	5885	* Check for bogus arguments.
	5886	*/
	5887	if (object == VM_OBJECT_NULL)
	5888	return (KERN_INVALID_ARGUMENT);
	5889
	5890	if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
	5891	return (KERN_INVALID_ARGUMENT);
	5892
	5893	size = round_page_64(size);
	5894
	5895	/*
	5896	* Lock the object, and acquire a paging reference to
	5897	* prevent the memory_object reference from being released.
	5898	*/
	5899	vm_object_lock(object);
	5900	vm_object_paging_begin(object);
	5901
	5902	(void)vm_object_update(object,
	5903	offset, size, NULL, NULL, should_return, flags, prot);
	5904
	5905	vm_object_paging_end(object);
	5906	vm_object_unlock(object);
	5907
	5908	return (KERN_SUCCESS);
	5909	}
	5910
	5911	/*
	5912	* Empty a purgeable object by grabbing the physical pages assigned to it and
	5913	* putting them on the free queue without writing them to backing store, etc.
	5914	* When the pages are next touched they will be demand zero-fill pages. We
	5915	* skip pages which are busy, being paged in/out, wired, etc. We do _not_
	5916	* skip referenced/dirty pages, pages on the active queue, etc. We're more
	5917	* than happy to grab these since this is a purgeable object. We mark the
	5918	* object as "empty" after reaping its pages.
	5919	*
	5920	* On entry the object must be locked and it must be
	5921	* purgeable with no delayed copies pending.
	5922	*/
	5923	void
	5924	vm_object_purge(vm_object_t object)
	5925	{
	5926	vm_object_lock_assert_exclusive(object);
	5927
	5928	if (object->purgable == VM_PURGABLE_DENY)
	5929	return;
	5930
	5931	assert(object->copy == VM_OBJECT_NULL);
	5932	assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
	5933
	5934	if(object->purgable == VM_PURGABLE_VOLATILE) {
	5935	unsigned int delta;
	5936	assert(object->resident_page_count >=
	5937	object->wired_page_count);
	5938	delta = (object->resident_page_count -
	5939	object->wired_page_count);
	5940	if (delta != 0) {
	5941	assert(vm_page_purgeable_count >=
	5942	delta);
	5943	OSAddAtomic(-delta,
	5944	(SInt32 *)&vm_page_purgeable_count);
	5945	}
	5946	if (object->wired_page_count != 0) {
	5947	assert(vm_page_purgeable_wired_count >=
	5948	object->wired_page_count);
	5949	OSAddAtomic(-object->wired_page_count,
	5950	(SInt32 *)&vm_page_purgeable_wired_count);
	5951	}
	5952	}
	5953	object->purgable = VM_PURGABLE_EMPTY;
	5954
	5955	vm_object_reap_pages(object, REAP_PURGEABLE);
	5956	}
	5957
	5958
	5959	/*
	5960	* vm_object_purgeable_control() allows the caller to control and investigate the
	5961	* state of a purgeable object. A purgeable object is created via a call to
	5962	* vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
	5963	* never be coalesced with any other object -- even other purgeable objects --
	5964	* and will thus always remain a distinct object. A purgeable object has
	5965	* special semantics when its reference count is exactly 1. If its reference
	5966	* count is greater than 1, then a purgeable object will behave like a normal
	5967	* object and attempts to use this interface will result in an error return
	5968	* of KERN_INVALID_ARGUMENT.
	5969	*
	5970	* A purgeable object may be put into a "volatile" state which will make the
	5971	* object's pages elligable for being reclaimed without paging to backing
	5972	* store if the system runs low on memory. If the pages in a volatile
	5973	* purgeable object are reclaimed, the purgeable object is said to have been
	5974	* "emptied." When a purgeable object is emptied the system will reclaim as
	5975	* many pages from the object as it can in a convenient manner (pages already
	5976	* en route to backing store or busy for other reasons are left as is). When
	5977	* a purgeable object is made volatile, its pages will generally be reclaimed
	5978	* before other pages in the application's working set. This semantic is
	5979	* generally used by applications which can recreate the data in the object
	5980	* faster than it can be paged in. One such example might be media assets
	5981	* which can be reread from a much faster RAID volume.
	5982	*
	5983	* A purgeable object may be designated as "non-volatile" which means it will
	5984	* behave like all other objects in the system with pages being written to and
	5985	* read from backing store as needed to satisfy system memory needs. If the
	5986	* object was emptied before the object was made non-volatile, that fact will
	5987	* be returned as the old state of the purgeable object (see
	5988	* VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
	5989	* were reclaimed as part of emptying the object will be refaulted in as
	5990	* zero-fill on demand. It is up to the application to note that an object
	5991	* was emptied and recreate the objects contents if necessary. When a
	5992	* purgeable object is made non-volatile, its pages will generally not be paged
	5993	* out to backing store in the immediate future. A purgeable object may also
	5994	* be manually emptied.
	5995	*
	5996	* Finally, the current state (non-volatile, volatile, volatile & empty) of a
	5997	* volatile purgeable object may be queried at any time. This information may
	5998	* be used as a control input to let the application know when the system is
	5999	* experiencing memory pressure and is reclaiming memory.
	6000	*
	6001	* The specified address may be any address within the purgeable object. If
	6002	* the specified address does not represent any object in the target task's
	6003	* virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
	6004	* object containing the specified address is not a purgeable object, then
	6005	* KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
	6006	* returned.
	6007	*
	6008	* The control parameter may be any one of VM_PURGABLE_SET_STATE or
	6009	* VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
	6010	* state is used to set the new state of the purgeable object and return its
	6011	* old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
	6012	* object is returned in the parameter state.
	6013	*
	6014	* The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
	6015	* VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
	6016	* the non-volatile, volatile and volatile/empty states described above.
	6017	* Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
	6018	* immediately reclaim as many pages in the object as can be conveniently
	6019	* collected (some may have already been written to backing store or be
	6020	* otherwise busy).
	6021	*
	6022	* The process of making a purgeable object non-volatile and determining its
	6023	* previous state is atomic. Thus, if a purgeable object is made
	6024	* VM_PURGABLE_NONVOLATILE and the old state is returned as
	6025	* VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
	6026	* completely intact and will remain so until the object is made volatile
	6027	* again. If the old state is returned as VM_PURGABLE_EMPTY then the object
	6028	* was reclaimed while it was in a volatile state and its previous contents
	6029	* have been lost.
	6030	*/
	6031	/*
	6032	* The object must be locked.
	6033	*/
	6034	kern_return_t
	6035	vm_object_purgable_control(
	6036	vm_object_t object,
	6037	vm_purgable_t control,
	6038	int *state)
	6039	{
	6040	int old_state;
	6041	int new_state;
	6042
	6043	if (object == VM_OBJECT_NULL) {
	6044	/*
	6045	* Object must already be present or it can't be purgeable.
	6046	*/
	6047	return KERN_INVALID_ARGUMENT;
	6048	}
	6049
	6050	/*
	6051	* Get current state of the purgeable object.
	6052	*/
	6053	old_state = object->purgable;
	6054	if (old_state == VM_PURGABLE_DENY)
	6055	return KERN_INVALID_ARGUMENT;
	6056
	6057	/* purgeable cant have delayed copies - now or in the future */
	6058	assert(object->copy == VM_OBJECT_NULL);
	6059	assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
	6060
	6061	/*
	6062	* Execute the desired operation.
	6063	*/
	6064	if (control == VM_PURGABLE_GET_STATE) {
	6065	*state = old_state;
	6066	return KERN_SUCCESS;
	6067	}
	6068
	6069	if ((*state) & VM_PURGABLE_DEBUG_EMPTY) {
	6070	object->volatile_empty = TRUE;
	6071	}
	6072	if ((*state) & VM_PURGABLE_DEBUG_FAULT) {
	6073	object->volatile_fault = TRUE;
	6074	}
	6075
	6076	new_state = *state & VM_PURGABLE_STATE_MASK;
	6077	if (new_state == VM_PURGABLE_VOLATILE &&
	6078	object->volatile_empty) {
	6079	new_state = VM_PURGABLE_EMPTY;
	6080	}
	6081
	6082	switch (new_state) {
	6083	case VM_PURGABLE_DENY:
	6084	case VM_PURGABLE_NONVOLATILE:
	6085	object->purgable = new_state;
	6086
	6087	if (old_state == VM_PURGABLE_VOLATILE) {
	6088	unsigned int delta;
	6089
	6090	assert(object->resident_page_count >=
	6091	object->wired_page_count);
	6092	delta = (object->resident_page_count -
	6093	object->wired_page_count);
	6094
	6095	assert(vm_page_purgeable_count >= delta);
	6096
	6097	if (delta != 0) {
	6098	OSAddAtomic(-delta,
	6099	(SInt32 *)&vm_page_purgeable_count);
	6100	}
	6101	if (object->wired_page_count != 0) {
	6102	assert(vm_page_purgeable_wired_count >=
	6103	object->wired_page_count);
	6104	OSAddAtomic(-object->wired_page_count,
	6105	(SInt32 *)&vm_page_purgeable_wired_count);
	6106	}
	6107
	6108	vm_page_lock_queues();
	6109
	6110	assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
	6111	purgeable_q_t queue = vm_purgeable_object_remove(object);
	6112	assert(queue);
	6113
	6114	vm_purgeable_token_delete_last(queue);
	6115	assert(queue->debug_count_objects>=0);
	6116
	6117	vm_page_unlock_queues();
	6118	}
	6119	break;
	6120
	6121	case VM_PURGABLE_VOLATILE:
	6122	if (object->volatile_fault) {
	6123	vm_page_t p;
	6124	int refmod;
	6125
	6126	queue_iterate(&object->memq, p, vm_page_t, listq) {
	6127	if (p->busy \|\|
	6128	VM_PAGE_WIRED(p) \|\|
	6129	p->fictitious) {
	6130	continue;
	6131	}
	6132	refmod = pmap_disconnect(p->phys_page);
	6133	if ((refmod & VM_MEM_MODIFIED) &&
	6134	!p->dirty) {
	6135	SET_PAGE_DIRTY(p, FALSE);
	6136	}
	6137	}
	6138	}
	6139
	6140	if (old_state == VM_PURGABLE_EMPTY &&
	6141	object->resident_page_count == 0)
	6142	break;
	6143
	6144	purgeable_q_t queue;
	6145
	6146	/* find the correct queue */
	6147	if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
	6148	queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
	6149	else {
	6150	if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
	6151	queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
	6152	else
	6153	queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
	6154	}
	6155
	6156	if (old_state == VM_PURGABLE_NONVOLATILE \|\|
	6157	old_state == VM_PURGABLE_EMPTY) {
	6158	unsigned int delta;
	6159
	6160	/* try to add token... this can fail */
	6161	vm_page_lock_queues();
	6162
	6163	kern_return_t result = vm_purgeable_token_add(queue);
	6164	if (result != KERN_SUCCESS) {
	6165	vm_page_unlock_queues();
	6166	return result;
	6167	}
	6168	vm_page_unlock_queues();
	6169
	6170	assert(object->resident_page_count >=
	6171	object->wired_page_count);
	6172	delta = (object->resident_page_count -
	6173	object->wired_page_count);
	6174
	6175	if (delta != 0) {
	6176	OSAddAtomic(delta,
	6177	&vm_page_purgeable_count);
	6178	}
	6179	if (object->wired_page_count != 0) {
	6180	OSAddAtomic(object->wired_page_count,
	6181	&vm_page_purgeable_wired_count);
	6182	}
	6183
	6184	object->purgable = new_state;
	6185
	6186	/* object should not be on a queue */
	6187	assert(object->objq.next == NULL && object->objq.prev == NULL);
	6188	}
	6189	else if (old_state == VM_PURGABLE_VOLATILE) {
	6190	/*
	6191	* if reassigning priorities / purgeable groups, we don't change the
	6192	* token queue. So moving priorities will not make pages stay around longer.
	6193	* Reasoning is that the algorithm gives most priority to the most important
	6194	* object. If a new token is added, the most important object' priority is boosted.
	6195	* This biases the system already for purgeable queues that move a lot.
	6196	* It doesn't seem more biasing is neccessary in this case, where no new object is added.
	6197	*/
	6198	assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
	6199
	6200	purgeable_q_t old_queue=vm_purgeable_object_remove(object);
	6201	assert(old_queue);
	6202
	6203	if (old_queue != queue) {
	6204	kern_return_t result;
	6205
	6206	/* Changing queue. Have to move token. */
	6207	vm_page_lock_queues();
	6208	vm_purgeable_token_delete_last(old_queue);
	6209	result = vm_purgeable_token_add(queue);
	6210	vm_page_unlock_queues();
	6211
	6212	assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */
	6213	}
	6214	};
	6215	vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT );
	6216
	6217	assert(queue->debug_count_objects>=0);
	6218
	6219	break;
	6220
	6221
	6222	case VM_PURGABLE_EMPTY:
	6223	if (object->volatile_fault) {
	6224	vm_page_t p;
	6225	int refmod;
	6226
	6227	queue_iterate(&object->memq, p, vm_page_t, listq) {
	6228	if (p->busy \|\|
	6229	VM_PAGE_WIRED(p) \|\|
	6230	p->fictitious) {
	6231	continue;
	6232	}
	6233	refmod = pmap_disconnect(p->phys_page);
	6234	if ((refmod & VM_MEM_MODIFIED) &&
	6235	!p->dirty) {
	6236	SET_PAGE_DIRTY(p, FALSE);
	6237	}
	6238	}
	6239	}
	6240
	6241	if (old_state != new_state) {
	6242	assert(old_state == VM_PURGABLE_NONVOLATILE \|\|
	6243	old_state == VM_PURGABLE_VOLATILE);
	6244	if (old_state == VM_PURGABLE_VOLATILE) {
	6245	purgeable_q_t old_queue;
	6246
	6247	/* object should be on a queue */
	6248	assert(object->objq.next != NULL &&
	6249	object->objq.prev != NULL);
	6250	old_queue = vm_purgeable_object_remove(object);
	6251	assert(old_queue);
	6252	vm_page_lock_queues();
	6253	vm_purgeable_token_delete_last(old_queue);
	6254	vm_page_unlock_queues();
	6255	}
	6256	(void) vm_object_purge(object);
	6257	}
	6258	break;
	6259
	6260	}
	6261	*state = old_state;
	6262
	6263	return KERN_SUCCESS;
	6264	}
	6265
	6266	#if TASK_SWAPPER
	6267	/*
	6268	* vm_object_res_deallocate
	6269	*
	6270	* (recursively) decrement residence counts on vm objects and their shadows.
	6271	* Called from vm_object_deallocate and when swapping out an object.
	6272	*
	6273	* The object is locked, and remains locked throughout the function,
	6274	* even as we iterate down the shadow chain. Locks on intermediate objects
	6275	* will be dropped, but not the original object.
	6276	*
	6277	* NOTE: this function used to use recursion, rather than iteration.
	6278	*/
	6279
	6280	__private_extern__ void
	6281	vm_object_res_deallocate(
	6282	vm_object_t object)
	6283	{
	6284	vm_object_t orig_object = object;
	6285	/*
	6286	* Object is locked so it can be called directly
	6287	* from vm_object_deallocate. Original object is never
	6288	* unlocked.
	6289	*/
	6290	assert(object->res_count > 0);
	6291	while (--object->res_count == 0) {
	6292	assert(object->ref_count >= object->res_count);
	6293	vm_object_deactivate_all_pages(object);
	6294	/* iterate on shadow, if present */
	6295	if (object->shadow != VM_OBJECT_NULL) {
	6296	vm_object_t tmp_object = object->shadow;
	6297	vm_object_lock(tmp_object);
	6298	if (object != orig_object)
	6299	vm_object_unlock(object);
	6300	object = tmp_object;
	6301	assert(object->res_count > 0);
	6302	} else
	6303	break;
	6304	}
	6305	if (object != orig_object)
	6306	vm_object_unlock(object);
	6307	}
	6308
	6309	/*
	6310	* vm_object_res_reference
	6311	*
	6312	* Internal function to increment residence count on a vm object
	6313	* and its shadows. It is called only from vm_object_reference, and
	6314	* when swapping in a vm object, via vm_map_swap.
	6315	*
	6316	* The object is locked, and remains locked throughout the function,
	6317	* even as we iterate down the shadow chain. Locks on intermediate objects
	6318	* will be dropped, but not the original object.
	6319	*
	6320	* NOTE: this function used to use recursion, rather than iteration.
	6321	*/
	6322
	6323	__private_extern__ void
	6324	vm_object_res_reference(
	6325	vm_object_t object)
	6326	{
	6327	vm_object_t orig_object = object;
	6328	/*
	6329	* Object is locked, so this can be called directly
	6330	* from vm_object_reference. This lock is never released.
	6331	*/
	6332	while ((++object->res_count == 1) &&
	6333	(object->shadow != VM_OBJECT_NULL)) {
	6334	vm_object_t tmp_object = object->shadow;
	6335
	6336	assert(object->ref_count >= object->res_count);
	6337	vm_object_lock(tmp_object);
	6338	if (object != orig_object)
	6339	vm_object_unlock(object);
	6340	object = tmp_object;
	6341	}
	6342	if (object != orig_object)
	6343	vm_object_unlock(object);
	6344	assert(orig_object->ref_count >= orig_object->res_count);
	6345	}
	6346	#endif /* TASK_SWAPPER */
	6347
	6348	/*
	6349	* vm_object_reference:
	6350	*
	6351	* Gets another reference to the given object.
	6352	*/
	6353	#ifdef vm_object_reference
	6354	#undef vm_object_reference
	6355	#endif
	6356	__private_extern__ void
	6357	vm_object_reference(
	6358	register vm_object_t object)
	6359	{
	6360	if (object == VM_OBJECT_NULL)
	6361	return;
	6362
	6363	vm_object_lock(object);
	6364	assert(object->ref_count > 0);
	6365	vm_object_reference_locked(object);
	6366	vm_object_unlock(object);
	6367	}
	6368
	6369	#ifdef MACH_BSD
	6370	/*
	6371	* Scale the vm_object_cache
	6372	* This is required to make sure that the vm_object_cache is big
	6373	* enough to effectively cache the mapped file.
	6374	* This is really important with UBC as all the regular file vnodes
	6375	* have memory object associated with them. Havving this cache too
	6376	* small results in rapid reclaim of vnodes and hurts performance a LOT!
	6377	*
	6378	* This is also needed as number of vnodes can be dynamically scaled.
	6379	*/
	6380	kern_return_t
	6381	adjust_vm_object_cache(
	6382	__unused vm_size_t oval,
	6383	__unused vm_size_t nval)
	6384	{
	6385	#if VM_OBJECT_CACHE
	6386	vm_object_cached_max = nval;
	6387	vm_object_cache_trim(FALSE);
	6388	#endif
	6389	return (KERN_SUCCESS);
	6390	}
	6391	#endif /* MACH_BSD */
	6392
	6393
	6394	/*
	6395	* vm_object_transpose
	6396	*
	6397	* This routine takes two VM objects of the same size and exchanges
	6398	* their backing store.
	6399	* The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
	6400	* and UPL_BLOCK_ACCESS if they are referenced anywhere.
	6401	*
	6402	* The VM objects must not be locked by caller.
	6403	*/
	6404	unsigned int vm_object_transpose_count = 0;
	6405	kern_return_t
	6406	vm_object_transpose(
	6407	vm_object_t object1,
	6408	vm_object_t object2,
	6409	vm_object_size_t transpose_size)
	6410	{
	6411	vm_object_t tmp_object;
	6412	kern_return_t retval;
	6413	boolean_t object1_locked, object2_locked;
	6414	vm_page_t page;
	6415	vm_object_offset_t page_offset;
	6416	lck_mtx_t *hash_lck;
	6417	vm_object_hash_entry_t hash_entry;
	6418
	6419	tmp_object = VM_OBJECT_NULL;
	6420	object1_locked = FALSE; object2_locked = FALSE;
	6421
	6422	if (object1 == object2 \|\|
	6423	object1 == VM_OBJECT_NULL \|\|
	6424	object2 == VM_OBJECT_NULL) {
	6425	/*
	6426	* If the 2 VM objects are the same, there's
	6427	* no point in exchanging their backing store.
	6428	*/
	6429	retval = KERN_INVALID_VALUE;
	6430	goto done;
	6431	}
	6432
	6433	/*
	6434	* Since we need to lock both objects at the same time,
	6435	* make sure we always lock them in the same order to
	6436	* avoid deadlocks.
	6437	*/
	6438	if (object1 > object2) {
	6439	tmp_object = object1;
	6440	object1 = object2;
	6441	object2 = tmp_object;
	6442	}
	6443
	6444	/*
	6445	* Allocate a temporary VM object to hold object1's contents
	6446	* while we copy object2 to object1.
	6447	*/
	6448	tmp_object = vm_object_allocate(transpose_size);
	6449	vm_object_lock(tmp_object);
	6450	tmp_object->can_persist = FALSE;
	6451
	6452
	6453	/*
	6454	* Grab control of the 1st VM object.
	6455	*/
	6456	vm_object_lock(object1);
	6457	object1_locked = TRUE;
	6458	if (!object1->alive \|\| object1->terminating \|\|
	6459	object1->copy \|\| object1->shadow \|\| object1->shadowed \|\|
	6460	object1->purgable != VM_PURGABLE_DENY) {
	6461	/*
	6462	* We don't deal with copy or shadow objects (yet).
	6463	*/
	6464	retval = KERN_INVALID_VALUE;
	6465	goto done;
	6466	}
	6467	/*
	6468	* We're about to mess with the object's backing store and
	6469	* taking a "paging_in_progress" reference wouldn't be enough
	6470	* to prevent any paging activity on this object, so the caller should
	6471	* have "quiesced" the objects beforehand, via a UPL operation with
	6472	* UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
	6473	* and UPL_BLOCK_ACCESS (to mark the pages "busy").
	6474	*
	6475	* Wait for any paging operation to complete (but only paging, not
	6476	* other kind of activities not linked to the pager). After we're
	6477	* statisfied that there's no more paging in progress, we keep the
	6478	* object locked, to guarantee that no one tries to access its pager.
	6479	*/
	6480	vm_object_paging_only_wait(object1, THREAD_UNINT);
	6481
	6482	/*
	6483	* Same as above for the 2nd object...
	6484	*/
	6485	vm_object_lock(object2);
	6486	object2_locked = TRUE;
	6487	if (! object2->alive \|\| object2->terminating \|\|
	6488	object2->copy \|\| object2->shadow \|\| object2->shadowed \|\|
	6489	object2->purgable != VM_PURGABLE_DENY) {
	6490	retval = KERN_INVALID_VALUE;
	6491	goto done;
	6492	}
	6493	vm_object_paging_only_wait(object2, THREAD_UNINT);
	6494
	6495
	6496	if (object1->vo_size != object2->vo_size \|\|
	6497	object1->vo_size != transpose_size) {
	6498	/*
	6499	* If the 2 objects don't have the same size, we can't
	6500	* exchange their backing stores or one would overflow.
	6501	* If their size doesn't match the caller's
	6502	* "transpose_size", we can't do it either because the
	6503	* transpose operation will affect the entire span of
	6504	* the objects.
	6505	*/
	6506	retval = KERN_INVALID_VALUE;
	6507	goto done;
	6508	}
	6509
	6510
	6511	/*
	6512	* Transpose the lists of resident pages.
	6513	* This also updates the resident_page_count and the memq_hint.
	6514	*/
	6515	if (object1->phys_contiguous \|\| queue_empty(&object1->memq)) {
	6516	/*
	6517	* No pages in object1, just transfer pages
	6518	* from object2 to object1. No need to go through
	6519	* an intermediate object.
	6520	*/
	6521	while (!queue_empty(&object2->memq)) {
	6522	page = (vm_page_t) queue_first(&object2->memq);
	6523	vm_page_rename(page, object1, page->offset, FALSE);
	6524	}
	6525	assert(queue_empty(&object2->memq));
	6526	} else if (object2->phys_contiguous \|\| queue_empty(&object2->memq)) {
	6527	/*
	6528	* No pages in object2, just transfer pages
	6529	* from object1 to object2. No need to go through
	6530	* an intermediate object.
	6531	*/
	6532	while (!queue_empty(&object1->memq)) {
	6533	page = (vm_page_t) queue_first(&object1->memq);
	6534	vm_page_rename(page, object2, page->offset, FALSE);
	6535	}
	6536	assert(queue_empty(&object1->memq));
	6537	} else {
	6538	/* transfer object1's pages to tmp_object */
	6539	while (!queue_empty(&object1->memq)) {
	6540	page = (vm_page_t) queue_first(&object1->memq);
	6541	page_offset = page->offset;
	6542	vm_page_remove(page, TRUE);
	6543	page->offset = page_offset;
	6544	queue_enter(&tmp_object->memq, page, vm_page_t, listq);
	6545	}
	6546	assert(queue_empty(&object1->memq));
	6547	/* transfer object2's pages to object1 */
	6548	while (!queue_empty(&object2->memq)) {
	6549	page = (vm_page_t) queue_first(&object2->memq);
	6550	vm_page_rename(page, object1, page->offset, FALSE);
	6551	}
	6552	assert(queue_empty(&object2->memq));
	6553	/* transfer tmp_object's pages to object1 */
	6554	while (!queue_empty(&tmp_object->memq)) {
	6555	page = (vm_page_t) queue_first(&tmp_object->memq);
	6556	queue_remove(&tmp_object->memq, page,
	6557	vm_page_t, listq);
	6558	vm_page_insert(page, object2, page->offset);
	6559	}
	6560	assert(queue_empty(&tmp_object->memq));
	6561	}
	6562
	6563	#define __TRANSPOSE_FIELD(field) \
	6564	MACRO_BEGIN \
	6565	tmp_object->field = object1->field; \
	6566	object1->field = object2->field; \
	6567	object2->field = tmp_object->field; \
	6568	MACRO_END
	6569
	6570	/* "Lock" refers to the object not its contents */
	6571	/* "size" should be identical */
	6572	assert(object1->vo_size == object2->vo_size);
	6573	/* "memq_hint" was updated above when transposing pages */
	6574	/* "ref_count" refers to the object not its contents */
	6575	#if TASK_SWAPPER
	6576	/* "res_count" refers to the object not its contents */
	6577	#endif
	6578	/* "resident_page_count" was updated above when transposing pages */
	6579	/* "wired_page_count" was updated above when transposing pages */
	6580	/* "reusable_page_count" was updated above when transposing pages */
	6581	/* there should be no "copy" */
	6582	assert(!object1->copy);
	6583	assert(!object2->copy);
	6584	/* there should be no "shadow" */
	6585	assert(!object1->shadow);
	6586	assert(!object2->shadow);
	6587	__TRANSPOSE_FIELD(vo_shadow_offset); /* used by phys_contiguous objects */
	6588	__TRANSPOSE_FIELD(pager);
	6589	__TRANSPOSE_FIELD(paging_offset);
	6590	__TRANSPOSE_FIELD(pager_control);
	6591	/* update the memory_objects' pointers back to the VM objects */
	6592	if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
	6593	memory_object_control_collapse(object1->pager_control,
	6594	object1);
	6595	}
	6596	if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
	6597	memory_object_control_collapse(object2->pager_control,
	6598	object2);
	6599	}
	6600	__TRANSPOSE_FIELD(copy_strategy);
	6601	/* "paging_in_progress" refers to the object not its contents */
	6602	assert(!object1->paging_in_progress);
	6603	assert(!object2->paging_in_progress);
	6604	assert(object1->activity_in_progress);
	6605	assert(object2->activity_in_progress);
	6606	/* "all_wanted" refers to the object not its contents */
	6607	__TRANSPOSE_FIELD(pager_created);
	6608	__TRANSPOSE_FIELD(pager_initialized);
	6609	__TRANSPOSE_FIELD(pager_ready);
	6610	__TRANSPOSE_FIELD(pager_trusted);
	6611	__TRANSPOSE_FIELD(can_persist);
	6612	__TRANSPOSE_FIELD(internal);
	6613	__TRANSPOSE_FIELD(temporary);
	6614	__TRANSPOSE_FIELD(private);
	6615	__TRANSPOSE_FIELD(pageout);
	6616	/* "alive" should be set */
	6617	assert(object1->alive);
	6618	assert(object2->alive);
	6619	/* "purgeable" should be non-purgeable */
	6620	assert(object1->purgable == VM_PURGABLE_DENY);
	6621	assert(object2->purgable == VM_PURGABLE_DENY);
	6622	/* "shadowed" refers to the the object not its contents */
	6623	__TRANSPOSE_FIELD(silent_overwrite);
	6624	__TRANSPOSE_FIELD(advisory_pageout);
	6625	__TRANSPOSE_FIELD(true_share);
	6626	/* "terminating" should not be set */
	6627	assert(!object1->terminating);
	6628	assert(!object2->terminating);
	6629	__TRANSPOSE_FIELD(named);
	6630	/* "shadow_severed" refers to the object not its contents */
	6631	__TRANSPOSE_FIELD(phys_contiguous);
	6632	__TRANSPOSE_FIELD(nophyscache);
	6633	/* "cached_list.next" points to transposed object */
	6634	object1->cached_list.next = (queue_entry_t) object2;
	6635	object2->cached_list.next = (queue_entry_t) object1;
	6636	/* "cached_list.prev" should be NULL */
	6637	assert(object1->cached_list.prev == NULL);
	6638	assert(object2->cached_list.prev == NULL);
	6639	/* "msr_q" is linked to the object not its contents */
	6640	assert(queue_empty(&object1->msr_q));
	6641	assert(queue_empty(&object2->msr_q));
	6642	__TRANSPOSE_FIELD(last_alloc);
	6643	__TRANSPOSE_FIELD(sequential);
	6644	__TRANSPOSE_FIELD(pages_created);
	6645	__TRANSPOSE_FIELD(pages_used);
	6646	__TRANSPOSE_FIELD(scan_collisions);
	6647	#if MACH_PAGEMAP
	6648	__TRANSPOSE_FIELD(existence_map);
	6649	#endif
	6650	__TRANSPOSE_FIELD(cow_hint);
	6651	#if MACH_ASSERT
	6652	__TRANSPOSE_FIELD(paging_object);
	6653	#endif
	6654	__TRANSPOSE_FIELD(wimg_bits);
	6655	__TRANSPOSE_FIELD(set_cache_attr);
	6656	__TRANSPOSE_FIELD(code_signed);
	6657	if (object1->hashed) {
	6658	hash_lck = vm_object_hash_lock_spin(object2->pager);
	6659	hash_entry = vm_object_hash_lookup(object2->pager, FALSE);
	6660	assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
	6661	hash_entry->object = object2;
	6662	vm_object_hash_unlock(hash_lck);
	6663	}
	6664	if (object2->hashed) {
	6665	hash_lck = vm_object_hash_lock_spin(object1->pager);
	6666	hash_entry = vm_object_hash_lookup(object1->pager, FALSE);
	6667	assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL);
	6668	hash_entry->object = object1;
	6669	vm_object_hash_unlock(hash_lck);
	6670	}
	6671	__TRANSPOSE_FIELD(hashed);
	6672	object1->transposed = TRUE;
	6673	object2->transposed = TRUE;
	6674	__TRANSPOSE_FIELD(mapping_in_progress);
	6675	__TRANSPOSE_FIELD(volatile_empty);
	6676	__TRANSPOSE_FIELD(volatile_fault);
	6677	__TRANSPOSE_FIELD(all_reusable);
	6678	assert(object1->blocked_access);
	6679	assert(object2->blocked_access);
	6680	assert(object1->__object2_unused_bits == 0);
	6681	assert(object2->__object2_unused_bits == 0);
	6682	#if UPL_DEBUG
	6683	/* "uplq" refers to the object not its contents (see upl_transpose()) */
	6684	#endif
	6685	assert(object1->objq.next == NULL);
	6686	assert(object1->objq.prev == NULL);
	6687	assert(object2->objq.next == NULL);
	6688	assert(object2->objq.prev == NULL);
	6689
	6690	#undef __TRANSPOSE_FIELD
	6691
	6692	retval = KERN_SUCCESS;
	6693
	6694	done:
	6695	/*
	6696	* Cleanup.
	6697	*/
	6698	if (tmp_object != VM_OBJECT_NULL) {
	6699	vm_object_unlock(tmp_object);
	6700	/*
	6701	* Re-initialize the temporary object to avoid
	6702	* deallocating a real pager.
	6703	*/
	6704	_vm_object_allocate(transpose_size, tmp_object);
	6705	vm_object_deallocate(tmp_object);
	6706	tmp_object = VM_OBJECT_NULL;
	6707	}
	6708
	6709	if (object1_locked) {
	6710	vm_object_unlock(object1);
	6711	object1_locked = FALSE;
	6712	}
	6713	if (object2_locked) {
	6714	vm_object_unlock(object2);
	6715	object2_locked = FALSE;
	6716	}
	6717
	6718	vm_object_transpose_count++;
	6719
	6720	return retval;
	6721	}
	6722
	6723
	6724	/*
	6725	* vm_object_cluster_size
	6726	*
	6727	* Determine how big a cluster we should issue an I/O for...
	6728	*
	6729	* Inputs: *start == offset of page needed
	6730	* *length == maximum cluster pager can handle
	6731	* Outputs: *start == beginning offset of cluster
	6732	* *length == length of cluster to try
	6733	*
	6734	* The original *start will be encompassed by the cluster
	6735	*
	6736	*/
	6737	extern int speculative_reads_disabled;
	6738	extern int ignore_is_ssd;
	6739
	6740	#if CONFIG_EMBEDDED
	6741	unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
	6742	unsigned int preheat_pages_min = 10;
	6743	#else
	6744	unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
	6745	unsigned int preheat_pages_min = 8;
	6746	#endif
	6747
	6748	uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1];
	6749	uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1];
	6750
	6751
	6752	__private_extern__ void
	6753	vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
	6754	vm_size_t length, vm_object_fault_info_t fault_info, uint32_t io_streaming)
	6755	{
	6756	vm_size_t pre_heat_size;
	6757	vm_size_t tail_size;
	6758	vm_size_t head_size;
	6759	vm_size_t max_length;
	6760	vm_size_t cluster_size;
	6761	vm_object_offset_t object_size;
	6762	vm_object_offset_t orig_start;
	6763	vm_object_offset_t target_start;
	6764	vm_object_offset_t offset;
	6765	vm_behavior_t behavior;
	6766	boolean_t look_behind = TRUE;
	6767	boolean_t look_ahead = TRUE;
	6768	boolean_t isSSD = FALSE;
	6769	uint32_t throttle_limit;
	6770	int sequential_run;
	6771	int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
	6772	unsigned int max_ph_size;
	6773	unsigned int min_ph_size;
	6774	unsigned int min_ph_size_in_bytes;
	6775
	6776	assert( !(*length & PAGE_MASK));
	6777	assert( !(*start & PAGE_MASK_64));
	6778
	6779	/*
	6780	* remember maxiumum length of run requested
	6781	*/
	6782	max_length = *length;
	6783	/*
	6784	* we'll always return a cluster size of at least
	6785	* 1 page, since the original fault must always
	6786	* be processed
	6787	*/
	6788	*length = PAGE_SIZE;
	6789	*io_streaming = 0;
	6790
	6791	if (speculative_reads_disabled \|\| fault_info == NULL) {
	6792	/*
	6793	* no cluster... just fault the page in
	6794	*/
	6795	return;
	6796	}
	6797	orig_start = *start;
	6798	target_start = orig_start;
	6799	cluster_size = round_page(fault_info->cluster_size);
	6800	behavior = fault_info->behavior;
	6801
	6802	vm_object_lock(object);
	6803
	6804	if (object->pager == MEMORY_OBJECT_NULL)
	6805	goto out; /* pager is gone for this object, nothing more to do */
	6806
	6807	if (!ignore_is_ssd)
	6808	vnode_pager_get_isSSD(object->pager, &isSSD);
	6809
	6810	min_ph_size = preheat_pages_min;
	6811	max_ph_size = preheat_pages_max;
	6812
	6813	if (isSSD) {
	6814	min_ph_size /= 2;
	6815	max_ph_size /= 8;
	6816	}
	6817	if (min_ph_size < 1)
	6818	min_ph_size = 1;
	6819
	6820	if (max_ph_size < 1)
	6821	max_ph_size = 1;
	6822	else if (max_ph_size > MAX_UPL_TRANSFER)
	6823	max_ph_size = MAX_UPL_TRANSFER;
	6824
	6825	if (max_length > (max_ph_size * PAGE_SIZE))
	6826	max_length = max_ph_size * PAGE_SIZE;
	6827
	6828	if (max_length <= PAGE_SIZE)
	6829	goto out;
	6830
	6831	min_ph_size_in_bytes = min_ph_size * PAGE_SIZE;
	6832
	6833	if (object->internal)
	6834	object_size = object->vo_size;
	6835	else
	6836	vnode_pager_get_object_size(object->pager, &object_size);
	6837
	6838	object_size = round_page_64(object_size);
	6839
	6840	if (orig_start >= object_size) {
	6841	/*
	6842	* fault occurred beyond the EOF...
	6843	* we need to punt w/o changing the
	6844	* starting offset
	6845	*/
	6846	goto out;
	6847	}
	6848	if (object->pages_used > object->pages_created) {
	6849	/*
	6850	* must have wrapped our 32 bit counters
	6851	* so reset
	6852	*/
	6853	object->pages_used = object->pages_created = 0;
	6854	}
	6855	if ((sequential_run = object->sequential)) {
	6856	if (sequential_run < 0) {
	6857	sequential_behavior = VM_BEHAVIOR_RSEQNTL;
	6858	sequential_run = 0 - sequential_run;
	6859	} else {
	6860	sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
	6861	}
	6862
	6863	}
	6864	switch (behavior) {
	6865
	6866	default:
	6867	behavior = VM_BEHAVIOR_DEFAULT;
	6868
	6869	case VM_BEHAVIOR_DEFAULT:
	6870	if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
	6871	goto out;
	6872
	6873	if (sequential_run >= (3 * PAGE_SIZE)) {
	6874	pre_heat_size = sequential_run + PAGE_SIZE;
	6875
	6876	if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL)
	6877	look_behind = FALSE;
	6878	else
	6879	look_ahead = FALSE;
	6880
	6881	*io_streaming = 1;
	6882	} else {
	6883
	6884	if (object->pages_created < (20 * min_ph_size)) {
	6885	/*
	6886	* prime the pump
	6887	*/
	6888	pre_heat_size = min_ph_size_in_bytes;
	6889	} else {
	6890	/*
	6891	* Linear growth in PH size: The maximum size is max_length...
	6892	* this cacluation will result in a size that is neither a
	6893	* power of 2 nor a multiple of PAGE_SIZE... so round
	6894	* it up to the nearest PAGE_SIZE boundary
	6895	*/
	6896	pre_heat_size = (max_length * object->pages_used) / object->pages_created;
	6897
	6898	if (pre_heat_size < min_ph_size_in_bytes)
	6899	pre_heat_size = min_ph_size_in_bytes;
	6900	else
	6901	pre_heat_size = round_page(pre_heat_size);
	6902	}
	6903	}
	6904	break;
	6905
	6906	case VM_BEHAVIOR_RANDOM:
	6907	if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
	6908	goto out;
	6909	break;
	6910
	6911	case VM_BEHAVIOR_SEQUENTIAL:
	6912	if ((pre_heat_size = cluster_size) == 0)
	6913	pre_heat_size = sequential_run + PAGE_SIZE;
	6914	look_behind = FALSE;
	6915	*io_streaming = 1;
	6916
	6917	break;
	6918
	6919	case VM_BEHAVIOR_RSEQNTL:
	6920	if ((pre_heat_size = cluster_size) == 0)
	6921	pre_heat_size = sequential_run + PAGE_SIZE;
	6922	look_ahead = FALSE;
	6923	*io_streaming = 1;
	6924
	6925	break;
	6926
	6927	}
	6928	throttle_limit = (uint32_t) max_length;
	6929	assert(throttle_limit == max_length);
	6930
	6931	if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) {
	6932	if (max_length > throttle_limit)
	6933	max_length = throttle_limit;
	6934	}
	6935	if (pre_heat_size > max_length)
	6936	pre_heat_size = max_length;
	6937
	6938	if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) {
	6939
	6940	unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count;
	6941
	6942	if (consider_free < vm_page_throttle_limit) {
	6943	pre_heat_size = trunc_page(pre_heat_size / 16);
	6944	} else if (consider_free < vm_page_free_target) {
	6945	pre_heat_size = trunc_page(pre_heat_size / 4);
	6946	}
	6947
	6948	if (pre_heat_size < min_ph_size_in_bytes)
	6949	pre_heat_size = min_ph_size_in_bytes;
	6950	}
	6951	if (look_ahead == TRUE) {
	6952	if (look_behind == TRUE) {
	6953	/*
	6954	* if we get here its due to a random access...
	6955	* so we want to center the original fault address
	6956	* within the cluster we will issue... make sure
	6957	* to calculate 'head_size' as a multiple of PAGE_SIZE...
	6958	* 'pre_heat_size' is a multiple of PAGE_SIZE but not
	6959	* necessarily an even number of pages so we need to truncate
	6960	* the result to a PAGE_SIZE boundary
	6961	*/
	6962	head_size = trunc_page(pre_heat_size / 2);
	6963
	6964	if (target_start > head_size)
	6965	target_start -= head_size;
	6966	else
	6967	target_start = 0;
	6968
	6969	/*
	6970	* 'target_start' at this point represents the beginning offset
	6971	* of the cluster we are considering... 'orig_start' will be in
	6972	* the center of this cluster if we didn't have to clip the start
	6973	* due to running into the start of the file
	6974	*/
	6975	}
	6976	if ((target_start + pre_heat_size) > object_size)
	6977	pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start));
	6978	/*
	6979	* at this point caclulate the number of pages beyond the original fault
	6980	* address that we want to consider... this is guaranteed not to extend beyond
	6981	* the current EOF...
	6982	*/
	6983	assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start));
	6984	tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE;
	6985	} else {
	6986	if (pre_heat_size > target_start) {
	6987	/*
	6988	* since pre_heat_size is always smaller then 2^32,
	6989	* if it is larger then target_start (a 64 bit value)
	6990	* it is safe to clip target_start to 32 bits
	6991	*/
	6992	pre_heat_size = (vm_size_t) target_start;
	6993	}
	6994	tail_size = 0;
	6995	}
	6996	assert( !(target_start & PAGE_MASK_64));
	6997	assert( !(pre_heat_size & PAGE_MASK));
	6998
	6999	pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
	7000
	7001	if (pre_heat_size <= PAGE_SIZE)
	7002	goto out;
	7003
	7004	if (look_behind == TRUE) {
	7005	/*
	7006	* take a look at the pages before the original
	7007	* faulting offset... recalculate this in case
	7008	* we had to clip 'pre_heat_size' above to keep
	7009	* from running past the EOF.
	7010	*/
	7011	head_size = pre_heat_size - tail_size - PAGE_SIZE;
	7012
	7013	for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
	7014	/*
	7015	* don't poke below the lowest offset
	7016	*/
	7017	if (offset < fault_info->lo_offset)
	7018	break;
	7019	/*
	7020	* for external objects and internal objects w/o an existence map
	7021	* vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
	7022	*/
	7023	#if MACH_PAGEMAP
	7024	if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
	7025	/*
	7026	* we know for a fact that the pager can't provide the page
	7027	* so don't include it or any pages beyond it in this cluster
	7028	*/
	7029	break;
	7030	}
	7031	#endif
	7032	if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
	7033	/*
	7034	* don't bridge resident pages
	7035	*/
	7036	break;
	7037	}
	7038	*start = offset;
	7039	*length += PAGE_SIZE;
	7040	}
	7041	}
	7042	if (look_ahead == TRUE) {
	7043	for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
	7044	/*
	7045	* don't poke above the highest offset
	7046	*/
	7047	if (offset >= fault_info->hi_offset)
	7048	break;
	7049	assert(offset < object_size);
	7050
	7051	/*
	7052	* for external objects and internal objects w/o an existence map
	7053	* vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
	7054	*/
	7055	#if MACH_PAGEMAP
	7056	if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
	7057	/*
	7058	* we know for a fact that the pager can't provide the page
	7059	* so don't include it or any pages beyond it in this cluster
	7060	*/
	7061	break;
	7062	}
	7063	#endif
	7064	if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
	7065	/*
	7066	* don't bridge resident pages
	7067	*/
	7068	break;
	7069	}
	7070	*length += PAGE_SIZE;
	7071	}
	7072	}
	7073	out:
	7074	if (*length > max_length)
	7075	*length = max_length;
	7076
	7077	pre_heat_cluster[*length / PAGE_SIZE]++;
	7078
	7079	vm_object_unlock(object);
	7080
	7081	DTRACE_VM1(clustersize, vm_size_t, *length);
	7082	}
	7083
	7084
	7085	/*
	7086	* Allow manipulation of individual page state. This is actually part of
	7087	* the UPL regimen but takes place on the VM object rather than on a UPL
	7088	*/
	7089
	7090	kern_return_t
	7091	vm_object_page_op(
	7092	vm_object_t object,
	7093	vm_object_offset_t offset,
	7094	int ops,
	7095	ppnum_t *phys_entry,
	7096	int *flags)
	7097	{
	7098	vm_page_t dst_page;
	7099
	7100	vm_object_lock(object);
	7101
	7102	if(ops & UPL_POP_PHYSICAL) {
	7103	if(object->phys_contiguous) {
	7104	if (phys_entry) {
	7105	*phys_entry = (ppnum_t)
	7106	(object->vo_shadow_offset >> PAGE_SHIFT);
	7107	}
	7108	vm_object_unlock(object);
	7109	return KERN_SUCCESS;
	7110	} else {
	7111	vm_object_unlock(object);
	7112	return KERN_INVALID_OBJECT;
	7113	}
	7114	}
	7115	if(object->phys_contiguous) {
	7116	vm_object_unlock(object);
	7117	return KERN_INVALID_OBJECT;
	7118	}
	7119
	7120	while(TRUE) {
	7121	if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
	7122	vm_object_unlock(object);
	7123	return KERN_FAILURE;
	7124	}
	7125
	7126	/* Sync up on getting the busy bit */
	7127	if((dst_page->busy \|\| dst_page->cleaning) &&
	7128	(((ops & UPL_POP_SET) &&
	7129	(ops & UPL_POP_BUSY)) \|\| (ops & UPL_POP_DUMP))) {
	7130	/* someone else is playing with the page, we will */
	7131	/* have to wait */
	7132	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	7133	continue;
	7134	}
	7135
	7136	if (ops & UPL_POP_DUMP) {
	7137	if (dst_page->pmapped == TRUE)
	7138	pmap_disconnect(dst_page->phys_page);
	7139
	7140	VM_PAGE_FREE(dst_page);
	7141	break;
	7142	}
	7143
	7144	if (flags) {
	7145	*flags = 0;
	7146
	7147	/* Get the condition of flags before requested ops */
	7148	/* are undertaken */
	7149
	7150	if(dst_page->dirty) *flags \|= UPL_POP_DIRTY;
	7151	if(dst_page->pageout) *flags \|= UPL_POP_PAGEOUT;
	7152	if(dst_page->precious) *flags \|= UPL_POP_PRECIOUS;
	7153	if(dst_page->absent) *flags \|= UPL_POP_ABSENT;
	7154	if(dst_page->busy) *flags \|= UPL_POP_BUSY;
	7155	}
	7156
	7157	/* The caller should have made a call either contingent with */
	7158	/* or prior to this call to set UPL_POP_BUSY */
	7159	if(ops & UPL_POP_SET) {
	7160	/* The protection granted with this assert will */
	7161	/* not be complete. If the caller violates the */
	7162	/* convention and attempts to change page state */
	7163	/* without first setting busy we may not see it */
	7164	/* because the page may already be busy. However */
	7165	/* if such violations occur we will assert sooner */
	7166	/* or later. */
	7167	assert(dst_page->busy \|\| (ops & UPL_POP_BUSY));
	7168	if (ops & UPL_POP_DIRTY) {
	7169	SET_PAGE_DIRTY(dst_page, FALSE);
	7170	}
	7171	if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
	7172	if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
	7173	if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
	7174	if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
	7175	}
	7176
	7177	if(ops & UPL_POP_CLR) {
	7178	assert(dst_page->busy);
	7179	if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
	7180	if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
	7181	if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
	7182	if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
	7183	if (ops & UPL_POP_BUSY) {
	7184	dst_page->busy = FALSE;
	7185	PAGE_WAKEUP(dst_page);
	7186	}
	7187	}
	7188
	7189	if (dst_page->encrypted) {
	7190	/*
	7191	* ENCRYPTED SWAP:
	7192	* We need to decrypt this encrypted page before the
	7193	* caller can access its contents.
	7194	* But if the caller really wants to access the page's
	7195	* contents, they have to keep the page "busy".
	7196	* Otherwise, the page could get recycled or re-encrypted
	7197	* at any time.
	7198	*/
	7199	if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
	7200	dst_page->busy) {
	7201	/*
	7202	* The page is stable enough to be accessed by
	7203	* the caller, so make sure its contents are
	7204	* not encrypted.
	7205	*/
	7206	vm_page_decrypt(dst_page, 0);
	7207	} else {
	7208	/*
	7209	* The page is not busy, so don't bother
	7210	* decrypting it, since anything could
	7211	* happen to it between now and when the
	7212	* caller wants to access it.
	7213	* We should not give the caller access
	7214	* to this page.
	7215	*/
	7216	assert(!phys_entry);
	7217	}
	7218	}
	7219
	7220	if (phys_entry) {
	7221	/*
	7222	* The physical page number will remain valid
	7223	* only if the page is kept busy.
	7224	* ENCRYPTED SWAP: make sure we don't let the
	7225	* caller access an encrypted page.
	7226	*/
	7227	assert(dst_page->busy);
	7228	assert(!dst_page->encrypted);
	7229	*phys_entry = dst_page->phys_page;
	7230	}
	7231
	7232	break;
	7233	}
	7234
	7235	vm_object_unlock(object);
	7236	return KERN_SUCCESS;
	7237
	7238	}
	7239
	7240	/*
	7241	* vm_object_range_op offers performance enhancement over
	7242	* vm_object_page_op for page_op functions which do not require page
	7243	* level state to be returned from the call. Page_op was created to provide
	7244	* a low-cost alternative to page manipulation via UPLs when only a single
	7245	* page was involved. The range_op call establishes the ability in the _op
	7246	* family of functions to work on multiple pages where the lack of page level
	7247	* state handling allows the caller to avoid the overhead of the upl structures.
	7248	*/
	7249
	7250	kern_return_t
	7251	vm_object_range_op(
	7252	vm_object_t object,
	7253	vm_object_offset_t offset_beg,
	7254	vm_object_offset_t offset_end,
	7255	int ops,
	7256	uint32_t *range)
	7257	{
	7258	vm_object_offset_t offset;
	7259	vm_page_t dst_page;
	7260
	7261	if (offset_end - offset_beg > (uint32_t) -1) {
	7262	/* range is too big and would overflow "range" /
	7263	return KERN_INVALID_ARGUMENT;
	7264	}
	7265	if (object->resident_page_count == 0) {
	7266	if (range) {
	7267	if (ops & UPL_ROP_PRESENT) {
	7268	*range = 0;
	7269	} else {
	7270	*range = (uint32_t) (offset_end - offset_beg);
	7271	assert(*range == (offset_end - offset_beg));
	7272	}
	7273	}
	7274	return KERN_SUCCESS;
	7275	}
	7276	vm_object_lock(object);
	7277
	7278	if (object->phys_contiguous) {
	7279	vm_object_unlock(object);
	7280	return KERN_INVALID_OBJECT;
	7281	}
	7282
	7283	offset = offset_beg & ~PAGE_MASK_64;
	7284
	7285	while (offset < offset_end) {
	7286	dst_page = vm_page_lookup(object, offset);
	7287	if (dst_page != VM_PAGE_NULL) {
	7288	if (ops & UPL_ROP_DUMP) {
	7289	if (dst_page->busy \|\| dst_page->cleaning) {
	7290	/*
	7291	* someone else is playing with the
	7292	* page, we will have to wait
	7293	*/
	7294	PAGE_SLEEP(object, dst_page, THREAD_UNINT);
	7295	/*
	7296	* need to relook the page up since it's
	7297	* state may have changed while we slept
	7298	* it might even belong to a different object
	7299	* at this point
	7300	*/
	7301	continue;
	7302	}
	7303	if (dst_page->laundry) {
	7304	dst_page->pageout = FALSE;
	7305
	7306	vm_pageout_steal_laundry(dst_page, FALSE);
	7307	}
	7308	if (dst_page->pmapped == TRUE)
	7309	pmap_disconnect(dst_page->phys_page);
	7310
	7311	VM_PAGE_FREE(dst_page);
	7312
	7313	} else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent)
	7314	break;
	7315	} else if (ops & UPL_ROP_PRESENT)
	7316	break;
	7317
	7318	offset += PAGE_SIZE;
	7319	}
	7320	vm_object_unlock(object);
	7321
	7322	if (range) {
	7323	if (offset > offset_end)
	7324	offset = offset_end;
	7325	if(offset > offset_beg) {
	7326	*range = (uint32_t) (offset - offset_beg);
	7327	assert(*range == (offset - offset_beg));
	7328	} else {
	7329	*range = 0;
	7330	}
	7331	}
	7332	return KERN_SUCCESS;
	7333	}
	7334
	7335
	7336	uint32_t scan_object_collision = 0;
	7337
	7338	void
	7339	vm_object_lock(vm_object_t object)
	7340	{
	7341	if (object == vm_pageout_scan_wants_object) {
	7342	scan_object_collision++;
	7343	mutex_pause(2);
	7344	}
	7345	lck_rw_lock_exclusive(&object->Lock);
	7346	}
	7347
	7348	boolean_t
	7349	vm_object_lock_avoid(vm_object_t object)
	7350	{
	7351	if (object == vm_pageout_scan_wants_object) {
	7352	scan_object_collision++;
	7353	return TRUE;
	7354	}
	7355	return FALSE;
	7356	}
	7357
	7358	boolean_t
	7359	_vm_object_lock_try(vm_object_t object)
	7360	{
	7361	return (lck_rw_try_lock_exclusive(&object->Lock));
	7362	}
	7363
	7364	boolean_t
	7365	vm_object_lock_try(vm_object_t object)
	7366	{
	7367	/*
	7368	* Called from hibernate path so check before blocking.
	7369	*/
	7370	if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) {
	7371	mutex_pause(2);
	7372	}
	7373	return _vm_object_lock_try(object);
	7374	}
	7375
	7376	void
	7377	vm_object_lock_shared(vm_object_t object)
	7378	{
	7379	if (vm_object_lock_avoid(object)) {
	7380	mutex_pause(2);
	7381	}
	7382	lck_rw_lock_shared(&object->Lock);
	7383	}
	7384
	7385	boolean_t
	7386	vm_object_lock_try_shared(vm_object_t object)
	7387	{
	7388	if (vm_object_lock_avoid(object)) {
	7389	mutex_pause(2);
	7390	}
	7391	return (lck_rw_try_lock_shared(&object->Lock));
	7392	}
	7393
	7394
	7395	unsigned int vm_object_change_wimg_mode_count = 0;
	7396
	7397	/*
	7398	* The object must be locked
	7399	*/
	7400	void
	7401	vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode)
	7402	{
	7403	vm_page_t p;
	7404
	7405	vm_object_lock_assert_exclusive(object);
	7406
	7407	vm_object_paging_wait(object, THREAD_UNINT);
	7408
	7409	queue_iterate(&object->memq, p, vm_page_t, listq) {
	7410
	7411	if (!p->fictitious)
	7412	pmap_set_cache_attributes(p->phys_page, wimg_mode);
	7413	}
	7414	if (wimg_mode == VM_WIMG_USE_DEFAULT)
	7415	object->set_cache_attr = FALSE;
	7416	else
	7417	object->set_cache_attr = TRUE;
	7418
	7419	object->wimg_bits = wimg_mode;
	7420
	7421	vm_object_change_wimg_mode_count++;
	7422	}
	7423
	7424	#if CONFIG_FREEZE
	7425
	7426	kern_return_t vm_object_pack(
	7427	unsigned int *purgeable_count,
	7428	unsigned int *wired_count,
	7429	unsigned int *clean_count,
	7430	unsigned int *dirty_count,
	7431	unsigned int dirty_budget,
	7432	boolean_t *shared,
	7433	vm_object_t src_object,
	7434	struct default_freezer_handle *df_handle)
	7435	{
	7436	kern_return_t kr = KERN_SUCCESS;
	7437
	7438	vm_object_lock(src_object);
	7439
	7440	purgeable_count = wired_count = clean_count = dirty_count = 0;
	7441	*shared = FALSE;
	7442
	7443	if (!src_object->alive \|\| src_object->terminating){
	7444	kr = KERN_FAILURE;
	7445	goto done;
	7446	}
	7447
	7448	if (src_object->purgable == VM_PURGABLE_VOLATILE) {
	7449	*purgeable_count = src_object->resident_page_count;
	7450
	7451	/* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */
	7452	if (df_handle != NULL) {
	7453	purgeable_q_t queue;
	7454	/* object should be on a queue */
	7455	assert(src_object->objq.next != NULL &&
	7456	src_object->objq.prev != NULL);
	7457	queue = vm_purgeable_object_remove(src_object);
	7458	assert(queue);
	7459	vm_page_lock_queues();
	7460	vm_purgeable_token_delete_first(queue);
	7461	vm_page_unlock_queues();
	7462	vm_object_purge(src_object);
	7463	}
	7464	goto done;
	7465	}
	7466
	7467	if (src_object->ref_count == 1) {
	7468	vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle);
	7469	} else {
	7470	if (src_object->internal) {
	7471	*shared = TRUE;
	7472	}
	7473	}
	7474	done:
	7475	vm_object_unlock(src_object);
	7476
	7477	return kr;
	7478	}
	7479
	7480
	7481	void
	7482	vm_object_pack_pages(
	7483	unsigned int *wired_count,
	7484	unsigned int *clean_count,
	7485	unsigned int *dirty_count,
	7486	unsigned int dirty_budget,
	7487	vm_object_t src_object,
	7488	struct default_freezer_handle *df_handle)
	7489	{
	7490	vm_page_t p, next;
	7491
	7492	next = (vm_page_t)queue_first(&src_object->memq);
	7493
	7494	while (!queue_end(&src_object->memq, (queue_entry_t)next)) {
	7495	p = next;
	7496	next = (vm_page_t)queue_next(&next->listq);
	7497
	7498	/* Finish up if we've hit our pageout limit */
	7499	if (dirty_budget && (dirty_budget == *dirty_count)) {
	7500	break;
	7501	}
	7502	assert(!p->laundry);
	7503
	7504	if (p->fictitious \|\| p->busy )
	7505	continue;
	7506
	7507	if (p->absent \|\| p->unusual \|\| p->error)
	7508	continue;
	7509
	7510	if (VM_PAGE_WIRED(p)) {
	7511	(*wired_count)++;
	7512	continue;
	7513	}
	7514
	7515	if (df_handle == NULL) {
	7516	if (p->dirty \|\| pmap_is_modified(p->phys_page)) {
	7517	(*dirty_count)++;
	7518	} else {
	7519	(*clean_count)++;
	7520	}
	7521	continue;
	7522	}
	7523
	7524	if (p->cleaning) {
	7525	p->pageout = TRUE;
	7526	continue;
	7527	}
	7528
	7529	if (p->pmapped == TRUE) {
	7530	int refmod_state;
	7531	refmod_state = pmap_disconnect(p->phys_page);
	7532	if (refmod_state & VM_MEM_MODIFIED) {
	7533	SET_PAGE_DIRTY(p, FALSE);
	7534	}
	7535	}
	7536
	7537	if (p->dirty) {
	7538	default_freezer_pack_page(p, df_handle);
	7539	(*dirty_count)++;
	7540	}
	7541	else {
	7542	VM_PAGE_FREE(p);
	7543	(*clean_count)++;
	7544	}
	7545	}
	7546	}
	7547
	7548	void
	7549	vm_object_pageout(
	7550	vm_object_t object)
	7551	{
	7552	vm_page_t p, next;
	7553
	7554	assert(object != VM_OBJECT_NULL );
	7555
	7556	vm_object_lock(object);
	7557
	7558	next = (vm_page_t)queue_first(&object->memq);
	7559
	7560	while (!queue_end(&object->memq, (queue_entry_t)next)) {
	7561	p = next;
	7562	next = (vm_page_t)queue_next(&next->listq);
	7563
	7564	/* Throw to the pageout queue */
	7565	vm_page_lockspin_queues();
	7566
	7567	/*
	7568	* see if page is already in the process of
	7569	* being cleaned... if so, leave it alone
	7570	*/
	7571	if (!p->laundry) {
	7572	VM_PAGE_QUEUES_REMOVE(p);
	7573	vm_pageout_cluster(p, TRUE);
	7574	}
	7575	vm_page_unlock_queues();
	7576	}
	7577
	7578	vm_object_unlock(object);
	7579	}
	7580
	7581	kern_return_t
	7582	vm_object_pagein(
	7583	vm_object_t object)
	7584	{
	7585	memory_object_t pager;
	7586	kern_return_t kr;
	7587
	7588	vm_object_lock(object);
	7589
	7590	pager = object->pager;
	7591
	7592	if (!object->pager_ready \|\| pager == MEMORY_OBJECT_NULL) {
	7593	vm_object_unlock(object);
	7594	return KERN_FAILURE;
	7595	}
	7596
	7597	vm_object_paging_wait(object, THREAD_UNINT);
	7598	vm_object_paging_begin(object);
	7599
	7600	object->blocked_access = TRUE;
	7601	vm_object_unlock(object);
	7602
	7603	kr = memory_object_data_reclaim(pager, TRUE);
	7604
	7605	vm_object_lock(object);
	7606
	7607	object->blocked_access = FALSE;
	7608	vm_object_paging_end(object);
	7609
	7610	vm_object_unlock(object);
	7611
	7612	return kr;
	7613	}
	7614	#endif /* CONFIG_FREEZE */