git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
	11	*
	12	* This Original Code and all software distributed under the License are
	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22	/*
	23	* @OSF_COPYRIGHT@
	24	*/
	25	/*
	26	* Mach Operating System
	27	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	28	* All Rights Reserved.
	29	*
	30	* Permission to use, copy, modify and distribute this software and its
	31	* documentation is hereby granted, provided that both the copyright
	32	* notice and this permission notice appear in all copies of the
	33	* software, derivative works or modified versions, and any portions
	34	* thereof, and that both notices appear in supporting documentation.
	35	*
	36	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	37	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	38	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	39	*
	40	* Carnegie Mellon requests users of this software to return to
	41	*
	42	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	43	* School of Computer Science
	44	* Carnegie Mellon University
	45	* Pittsburgh PA 15213-3890
	46	*
	47	* any improvements or extensions that they make and grant Carnegie Mellon
	48	* the rights to redistribute these changes.
	49	*/
	50	/*
	51	*/
	52	/*
	53	* File: vm_fault.c
	54	* Author: Avadis Tevanian, Jr., Michael Wayne Young
	55	*
	56	* Page fault handling module.
	57	*/
	58	#ifdef MACH_BSD
	59	/* remove after component interface available */
	60	extern int vnode_pager_workaround;
	61	extern int device_pager_workaround;
	62	#endif
	63
	64	#include <mach_cluster_stats.h>
	65	#include <mach_pagemap.h>
	66	#include <mach_kdb.h>
	67
	68	#include <vm/vm_fault.h>
	69	#include <mach/kern_return.h>
	70	#include <mach/message.h> /* for error codes */
	71	#include <kern/host_statistics.h>
	72	#include <kern/counters.h>
	73	#include <kern/task.h>
	74	#include <kern/thread.h>
	75	#include <kern/sched_prim.h>
	76	#include <kern/host.h>
	77	#include <kern/xpr.h>
	78	#include <ppc/proc_reg.h>
	79	#include <ppc/pmap_internals.h>
	80	#include <vm/task_working_set.h>
	81	#include <vm/vm_map.h>
	82	#include <vm/vm_object.h>
	83	#include <vm/vm_page.h>
	84	#include <vm/pmap.h>
	85	#include <vm/vm_pageout.h>
	86	#include <mach/vm_param.h>
	87	#include <mach/vm_behavior.h>
	88	#include <mach/memory_object.h>
	89	/* For memory_object_data_{request,unlock} */
	90	#include <kern/mach_param.h>
	91	#include <kern/macro_help.h>
	92	#include <kern/zalloc.h>
	93	#include <kern/misc_protos.h>
	94
	95	#include <sys/kdebug.h>
	96
	97	#define VM_FAULT_CLASSIFY 0
	98	#define VM_FAULT_STATIC_CONFIG 1
	99
	100	#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
	101
	102	int vm_object_absent_max = 50;
	103
	104	int vm_fault_debug = 0;
	105	boolean_t vm_page_deactivate_behind = TRUE;
	106
	107
	108	#if !VM_FAULT_STATIC_CONFIG
	109	boolean_t vm_fault_dirty_handling = FALSE;
	110	boolean_t vm_fault_interruptible = FALSE;
	111	boolean_t software_reference_bits = TRUE;
	112	#endif
	113
	114	#if MACH_KDB
	115	extern struct db_watchpoint *db_watchpoint_list;
	116	#endif /* MACH_KDB */
	117
	118	/* Forward declarations of internal routines. */
	119	extern kern_return_t vm_fault_wire_fast(
	120	vm_map_t map,
	121	vm_offset_t va,
	122	vm_map_entry_t entry,
	123	pmap_t pmap);
	124
	125	extern void vm_fault_continue(void);
	126
	127	extern void vm_fault_copy_cleanup(
	128	vm_page_t page,
	129	vm_page_t top_page);
	130
	131	extern void vm_fault_copy_dst_cleanup(
	132	vm_page_t page);
	133
	134	#if VM_FAULT_CLASSIFY
	135	extern void vm_fault_classify(vm_object_t object,
	136	vm_object_offset_t offset,
	137	vm_prot_t fault_type);
	138
	139	extern void vm_fault_classify_init(void);
	140	#endif
	141
	142	/*
	143	* Routine: vm_fault_init
	144	* Purpose:
	145	* Initialize our private data structures.
	146	*/
	147	void
	148	vm_fault_init(void)
	149	{
	150	}
	151
	152	/*
	153	* Routine: vm_fault_cleanup
	154	* Purpose:
	155	* Clean up the result of vm_fault_page.
	156	* Results:
	157	* The paging reference for "object" is released.
	158	* "object" is unlocked.
	159	* If "top_page" is not null, "top_page" is
	160	* freed and the paging reference for the object
	161	* containing it is released.
	162	*
	163	* In/out conditions:
	164	* "object" must be locked.
	165	*/
	166	void
	167	vm_fault_cleanup(
	168	register vm_object_t object,
	169	register vm_page_t top_page)
	170	{
	171	vm_object_paging_end(object);
	172	vm_object_unlock(object);
	173
	174	if (top_page != VM_PAGE_NULL) {
	175	object = top_page->object;
	176	vm_object_lock(object);
	177	VM_PAGE_FREE(top_page);
	178	vm_object_paging_end(object);
	179	vm_object_unlock(object);
	180	}
	181	}
	182
	183	#if MACH_CLUSTER_STATS
	184	#define MAXCLUSTERPAGES 16
	185	struct {
	186	unsigned long pages_in_cluster;
	187	unsigned long pages_at_higher_offsets;
	188	unsigned long pages_at_lower_offsets;
	189	} cluster_stats_in[MAXCLUSTERPAGES];
	190	#define CLUSTER_STAT(clause) clause
	191	#define CLUSTER_STAT_HIGHER(x) \
	192	((cluster_stats_in[(x)].pages_at_higher_offsets)++)
	193	#define CLUSTER_STAT_LOWER(x) \
	194	((cluster_stats_in[(x)].pages_at_lower_offsets)++)
	195	#define CLUSTER_STAT_CLUSTER(x) \
	196	((cluster_stats_in[(x)].pages_in_cluster)++)
	197	#else /* MACH_CLUSTER_STATS */
	198	#define CLUSTER_STAT(clause)
	199	#endif /* MACH_CLUSTER_STATS */
	200
	201	/* XXX - temporary */
	202	boolean_t vm_allow_clustered_pagein = FALSE;
	203	int vm_pagein_cluster_used = 0;
	204
	205	/*
	206	* Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
	207	*/
	208	int vm_default_ahead = 1; /* Number of pages to prepage ahead */
	209	int vm_default_behind = 0; /* Number of pages to prepage behind */
	210
	211	#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
	212
	213	/*
	214	* Routine: vm_fault_page
	215	* Purpose:
	216	* Find the resident page for the virtual memory
	217	* specified by the given virtual memory object
	218	* and offset.
	219	* Additional arguments:
	220	* The required permissions for the page is given
	221	* in "fault_type". Desired permissions are included
	222	* in "protection". The minimum and maximum valid offsets
	223	* within the object for the relevant map entry are
	224	* passed in "lo_offset" and "hi_offset" respectively and
	225	* the expected page reference pattern is passed in "behavior".
	226	* These three parameters are used to determine pagein cluster
	227	* limits.
	228	*
	229	* If the desired page is known to be resident (for
	230	* example, because it was previously wired down), asserting
	231	* the "unwiring" parameter will speed the search.
	232	*
	233	* If the operation can be interrupted (by thread_abort
	234	* or thread_terminate), then the "interruptible"
	235	* parameter should be asserted.
	236	*
	237	* Results:
	238	* The page containing the proper data is returned
	239	* in "result_page".
	240	*
	241	* In/out conditions:
	242	* The source object must be locked and referenced,
	243	* and must donate one paging reference. The reference
	244	* is not affected. The paging reference and lock are
	245	* consumed.
	246	*
	247	* If the call succeeds, the object in which "result_page"
	248	* resides is left locked and holding a paging reference.
	249	* If this is not the original object, a busy page in the
	250	* original object is returned in "top_page", to prevent other
	251	* callers from pursuing this same data, along with a paging
	252	* reference for the original object. The "top_page" should
	253	* be destroyed when this guarantee is no longer required.
	254	* The "result_page" is also left busy. It is not removed
	255	* from the pageout queues.
	256	*/
	257
	258	vm_fault_return_t
	259	vm_fault_page(
	260	/* Arguments: */
	261	vm_object_t first_object, /* Object to begin search */
	262	vm_object_offset_t first_offset, /* Offset into object */
	263	vm_prot_t fault_type, /* What access is requested */
	264	boolean_t must_be_resident,/* Must page be resident? */
	265	int interruptible, /* how may fault be interrupted? */
	266	vm_object_offset_t lo_offset, /* Map entry start */
	267	vm_object_offset_t hi_offset, /* Map entry end */
	268	vm_behavior_t behavior, /* Page reference behavior */
	269	/* Modifies in place: */
	270	vm_prot_t protection, / Protection for mapping */
	271	/* Returns: */
	272	vm_page_t result_page, / Page found, if successful */
	273	vm_page_t top_page, / Page in top object, if
	274	* not result_page. */
	275	int type_of_fault, / if non-null, fill in with type of fault
	276	* COW, zero-fill, etc... returned in trace point */
	277	/* More arguments: */
	278	kern_return_t error_code, / code if page is in error */
	279	boolean_t no_zero_fill, /* don't zero fill absent pages */
	280	boolean_t data_supply, /* treat as data_supply if
	281	* it is a write fault and a full
	282	* page is provided */
	283	vm_map_t map,
	284	vm_offset_t vaddr)
	285	{
	286	register
	287	vm_page_t m;
	288	register
	289	vm_object_t object;
	290	register
	291	vm_object_offset_t offset;
	292	vm_page_t first_m;
	293	vm_object_t next_object;
	294	vm_object_t copy_object;
	295	boolean_t look_for_page;
	296	vm_prot_t access_required = fault_type;
	297	vm_prot_t wants_copy_flag;
	298	vm_size_t cluster_size, length;
	299	vm_object_offset_t cluster_offset;
	300	vm_object_offset_t cluster_start, cluster_end, paging_offset;
	301	vm_object_offset_t align_offset;
	302	CLUSTER_STAT(int pages_at_higher_offsets;)
	303	CLUSTER_STAT(int pages_at_lower_offsets;)
	304	kern_return_t wait_result;
	305	thread_t cur_thread;
	306	boolean_t interruptible_state;
	307	boolean_t bumped_pagein = FALSE;
	308
	309
	310	#if MACH_PAGEMAP
	311	/*
	312	* MACH page map - an optional optimization where a bit map is maintained
	313	* by the VM subsystem for internal objects to indicate which pages of
	314	* the object currently reside on backing store. This existence map
	315	* duplicates information maintained by the vnode pager. It is
	316	* created at the time of the first pageout against the object, i.e.
	317	* at the same time pager for the object is created. The optimization
	318	* is designed to eliminate pager interaction overhead, if it is
	319	* 'known' that the page does not exist on backing store.
	320	*
	321	* LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
	322	* either marked as paged out in the existence map for the object or no
	323	* existence map exists for the object. LOOK_FOR() is one of the
	324	* criteria in the decision to invoke the pager. It is also used as one
	325	* of the criteria to terminate the scan for adjacent pages in a clustered
	326	* pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
	327	* permanent objects. Note also that if the pager for an internal object
	328	* has not been created, the pager is not invoked regardless of the value
	329	* of LOOK_FOR() and that clustered pagein scans are only done on an object
	330	* for which a pager has been created.
	331	*
	332	* PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
	333	* is marked as paged out in the existence map for the object. PAGED_OUT()
	334	* PAGED_OUT() is used to determine if a page has already been pushed
	335	* into a copy object in order to avoid a redundant page out operation.
	336	*/
	337	#define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
	338	!= VM_EXTERNAL_STATE_ABSENT)
	339	#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
	340	== VM_EXTERNAL_STATE_EXISTS)
	341	#else /* MACH_PAGEMAP */
	342	/*
	343	* If the MACH page map optimization is not enabled,
	344	* LOOK_FOR() always evaluates to TRUE. The pager will always be
	345	* invoked to resolve missing pages in an object, assuming the pager
	346	* has been created for the object. In a clustered page operation, the
	347	* absence of a page on backing backing store cannot be used to terminate
	348	* a scan for adjacent pages since that information is available only in
	349	* the pager. Hence pages that may not be paged out are potentially
	350	* included in a clustered request. The vnode pager is coded to deal
	351	* with any combination of absent/present pages in a clustered
	352	* pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
	353	* will always be invoked to push a dirty page into a copy object assuming
	354	* a pager has been created. If the page has already been pushed, the
	355	* pager will ingore the new request.
	356	*/
	357	#define LOOK_FOR(o, f) TRUE
	358	#define PAGED_OUT(o, f) FALSE
	359	#endif /* MACH_PAGEMAP */
	360
	361	/*
	362	* Recovery actions
	363	*/
	364	#define PREPARE_RELEASE_PAGE(m) \
	365	MACRO_BEGIN \
	366	vm_page_lock_queues(); \
	367	MACRO_END
	368
	369	#define DO_RELEASE_PAGE(m) \
	370	MACRO_BEGIN \
	371	PAGE_WAKEUP_DONE(m); \
	372	if (!m->active && !m->inactive) \
	373	vm_page_activate(m); \
	374	vm_page_unlock_queues(); \
	375	MACRO_END
	376
	377	#define RELEASE_PAGE(m) \
	378	MACRO_BEGIN \
	379	PREPARE_RELEASE_PAGE(m); \
	380	DO_RELEASE_PAGE(m); \
	381	MACRO_END
	382
	383	#if TRACEFAULTPAGE
	384	dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
	385	#endif
	386
	387
	388
	389	#if !VM_FAULT_STATIC_CONFIG
	390	if (vm_fault_dirty_handling
	391	#if MACH_KDB
	392	/*
	393	* If there are watchpoints set, then
	394	* we don't want to give away write permission
	395	* on a read fault. Make the task write fault,
	396	* so that the watchpoint code notices the access.
	397	*/
	398	\|\| db_watchpoint_list
	399	#endif /* MACH_KDB */
	400	) {
	401	/*
	402	* If we aren't asking for write permission,
	403	* then don't give it away. We're using write
	404	* faults to set the dirty bit.
	405	*/
	406	if (!(fault_type & VM_PROT_WRITE))
	407	*protection &= ~VM_PROT_WRITE;
	408	}
	409
	410	if (!vm_fault_interruptible)
	411	interruptible = THREAD_UNINT;
	412	#else /* STATIC_CONFIG */
	413	#if MACH_KDB
	414	/*
	415	* If there are watchpoints set, then
	416	* we don't want to give away write permission
	417	* on a read fault. Make the task write fault,
	418	* so that the watchpoint code notices the access.
	419	*/
	420	if (db_watchpoint_list) {
	421	/*
	422	* If we aren't asking for write permission,
	423	* then don't give it away. We're using write
	424	* faults to set the dirty bit.
	425	*/
	426	if (!(fault_type & VM_PROT_WRITE))
	427	*protection &= ~VM_PROT_WRITE;
	428	}
	429
	430	#endif /* MACH_KDB */
	431	#endif /* STATIC_CONFIG */
	432
	433	cur_thread = current_thread();
	434
	435	interruptible_state = cur_thread->interruptible;
	436	if (interruptible == THREAD_UNINT)
	437	cur_thread->interruptible = FALSE;
	438
	439	/*
	440	* INVARIANTS (through entire routine):
	441	*
	442	* 1) At all times, we must either have the object
	443	* lock or a busy page in some object to prevent
	444	* some other thread from trying to bring in
	445	* the same page.
	446	*
	447	* Note that we cannot hold any locks during the
	448	* pager access or when waiting for memory, so
	449	* we use a busy page then.
	450	*
	451	* Note also that we aren't as concerned about more than
	452	* one thread attempting to memory_object_data_unlock
	453	* the same page at once, so we don't hold the page
	454	* as busy then, but do record the highest unlock
	455	* value so far. [Unlock requests may also be delivered
	456	* out of order.]
	457	*
	458	* 2) To prevent another thread from racing us down the
	459	* shadow chain and entering a new page in the top
	460	* object before we do, we must keep a busy page in
	461	* the top object while following the shadow chain.
	462	*
	463	* 3) We must increment paging_in_progress on any object
	464	* for which we have a busy page
	465	*
	466	* 4) We leave busy pages on the pageout queues.
	467	* If the pageout daemon comes across a busy page,
	468	* it will remove the page from the pageout queues.
	469	*/
	470
	471	/*
	472	* Search for the page at object/offset.
	473	*/
	474
	475	object = first_object;
	476	offset = first_offset;
	477	first_m = VM_PAGE_NULL;
	478	access_required = fault_type;
	479
	480	XPR(XPR_VM_FAULT,
	481	"vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
	482	(integer_t)object, offset, fault_type, *protection, 0);
	483
	484	/*
	485	* See whether this page is resident
	486	*/
	487
	488	while (TRUE) {
	489	#if TRACEFAULTPAGE
	490	dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
	491	#endif
	492	if (!object->alive) {
	493	vm_fault_cleanup(object, first_m);
	494	cur_thread->interruptible = interruptible_state;
	495	return(VM_FAULT_MEMORY_ERROR);
	496	}
	497	m = vm_page_lookup(object, offset);
	498	#if TRACEFAULTPAGE
	499	dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
	500	#endif
	501	if (m != VM_PAGE_NULL) {
	502	/*
	503	* If the page was pre-paged as part of a
	504	* cluster, record the fact.
	505	*/
	506	if (m->clustered) {
	507	vm_pagein_cluster_used++;
	508	m->clustered = FALSE;
	509	}
	510
	511	/*
	512	* If the page is being brought in,
	513	* wait for it and then retry.
	514	*
	515	* A possible optimization: if the page
	516	* is known to be resident, we can ignore
	517	* pages that are absent (regardless of
	518	* whether they're busy).
	519	*/
	520
	521	if (m->busy) {
	522	#if TRACEFAULTPAGE
	523	dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
	524	#endif
	525	PAGE_ASSERT_WAIT(m, interruptible);
	526	vm_object_unlock(object);
	527	XPR(XPR_VM_FAULT,
	528	"vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
	529	(integer_t)object, offset,
	530	(integer_t)m, 0, 0);
	531	counter(c_vm_fault_page_block_busy_kernel++);
	532	wait_result = thread_block((void (*)(void))0);
	533
	534	vm_object_lock(object);
	535	if (wait_result != THREAD_AWAKENED) {
	536	vm_fault_cleanup(object, first_m);
	537	cur_thread->interruptible = interruptible_state;
	538	if (wait_result == THREAD_RESTART)
	539	{
	540	return(VM_FAULT_RETRY);
	541	}
	542	else
	543	{
	544	return(VM_FAULT_INTERRUPTED);
	545	}
	546	}
	547	continue;
	548	}
	549
	550	/*
	551	* If the page is in error, give up now.
	552	*/
	553
	554	if (m->error) {
	555	#if TRACEFAULTPAGE
	556	dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
	557	#endif
	558	if (error_code)
	559	*error_code = m->page_error;
	560	VM_PAGE_FREE(m);
	561	vm_fault_cleanup(object, first_m);
	562	cur_thread->interruptible = interruptible_state;
	563	return(VM_FAULT_MEMORY_ERROR);
	564	}
	565
	566	/*
	567	* If the pager wants us to restart
	568	* at the top of the chain,
	569	* typically because it has moved the
	570	* page to another pager, then do so.
	571	*/
	572
	573	if (m->restart) {
	574	#if TRACEFAULTPAGE
	575	dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
	576	#endif
	577	VM_PAGE_FREE(m);
	578	vm_fault_cleanup(object, first_m);
	579	cur_thread->interruptible = interruptible_state;
	580	return(VM_FAULT_RETRY);
	581	}
	582
	583	/*
	584	* If the page isn't busy, but is absent,
	585	* then it was deemed "unavailable".
	586	*/
	587
	588	if (m->absent) {
	589	/*
	590	* Remove the non-existent page (unless it's
	591	* in the top object) and move on down to the
	592	* next object (if there is one).
	593	*/
	594	#if TRACEFAULTPAGE
	595	dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
	596	#endif
	597
	598	next_object = object->shadow;
	599	if (next_object == VM_OBJECT_NULL) {
	600	vm_page_t real_m;
	601
	602	assert(!must_be_resident);
	603
	604	if (object->shadow_severed) {
	605	vm_fault_cleanup(
	606	object, first_m);
	607	cur_thread->interruptible = interruptible_state;
	608	return VM_FAULT_MEMORY_ERROR;
	609	}
	610
	611	/*
	612	* Absent page at bottom of shadow
	613	* chain; zero fill the page we left
	614	* busy in the first object, and flush
	615	* the absent page. But first we
	616	* need to allocate a real page.
	617	*/
	618	if (VM_PAGE_THROTTLED() \|\|
	619	(real_m = vm_page_grab()) == VM_PAGE_NULL) {
	620	vm_fault_cleanup(object, first_m);
	621	cur_thread->interruptible = interruptible_state;
	622	return(VM_FAULT_MEMORY_SHORTAGE);
	623	}
	624
	625	XPR(XPR_VM_FAULT,
	626	"vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
	627	(integer_t)object, offset,
	628	(integer_t)m,
	629	(integer_t)first_object, 0);
	630	if (object != first_object) {
	631	VM_PAGE_FREE(m);
	632	vm_object_paging_end(object);
	633	vm_object_unlock(object);
	634	object = first_object;
	635	offset = first_offset;
	636	m = first_m;
	637	first_m = VM_PAGE_NULL;
	638	vm_object_lock(object);
	639	}
	640
	641	VM_PAGE_FREE(m);
	642	assert(real_m->busy);
	643	vm_page_insert(real_m, object, offset);
	644	m = real_m;
	645
	646	/*
	647	* Drop the lock while zero filling
	648	* page. Then break because this
	649	* is the page we wanted. Checking
	650	* the page lock is a waste of time;
	651	* this page was either absent or
	652	* newly allocated -- in both cases
	653	* it can't be page locked by a pager.
	654	*/
	655	m->no_isync = FALSE;
	656
	657	if (!no_zero_fill) {
	658	vm_object_unlock(object);
	659	vm_page_zero_fill(m);
	660	if (type_of_fault)
	661	*type_of_fault = DBG_ZERO_FILL_FAULT;
	662	VM_STAT(zero_fill_count++);
	663
	664	if (bumped_pagein == TRUE) {
	665	VM_STAT(pageins--);
	666	current_task()->pageins--;
	667	}
	668	vm_object_lock(object);
	669	}
	670	pmap_clear_modify(m->phys_addr);
	671	vm_page_lock_queues();
	672	VM_PAGE_QUEUES_REMOVE(m);
	673	m->page_ticket = vm_page_ticket;
	674	vm_page_ticket_roll++;
	675	if(vm_page_ticket_roll ==
	676	VM_PAGE_TICKETS_IN_ROLL) {
	677	vm_page_ticket_roll = 0;
	678	if(vm_page_ticket ==
	679	VM_PAGE_TICKET_ROLL_IDS)
	680	vm_page_ticket= 0;
	681	else
	682	vm_page_ticket++;
	683	}
	684	queue_enter(&vm_page_queue_inactive,
	685	m, vm_page_t, pageq);
	686	m->inactive = TRUE;
	687	vm_page_inactive_count++;
	688	vm_page_unlock_queues();
	689	break;
	690	} else {
	691	if (must_be_resident) {
	692	vm_object_paging_end(object);
	693	} else if (object != first_object) {
	694	vm_object_paging_end(object);
	695	VM_PAGE_FREE(m);
	696	} else {
	697	first_m = m;
	698	m->absent = FALSE;
	699	m->unusual = FALSE;
	700	vm_object_absent_release(object);
	701	m->busy = TRUE;
	702
	703	vm_page_lock_queues();
	704	VM_PAGE_QUEUES_REMOVE(m);
	705	vm_page_unlock_queues();
	706	}
	707	XPR(XPR_VM_FAULT,
	708	"vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
	709	(integer_t)object, offset,
	710	(integer_t)next_object,
	711	offset+object->shadow_offset,0);
	712	offset += object->shadow_offset;
	713	hi_offset += object->shadow_offset;
	714	lo_offset += object->shadow_offset;
	715	access_required = VM_PROT_READ;
	716	vm_object_lock(next_object);
	717	vm_object_unlock(object);
	718	object = next_object;
	719	vm_object_paging_begin(object);
	720	continue;
	721	}
	722	}
	723
	724	if ((m->cleaning)
	725	&& ((object != first_object) \|\|
	726	(object->copy != VM_OBJECT_NULL))
	727	&& (fault_type & VM_PROT_WRITE)) {
	728	/*
	729	* This is a copy-on-write fault that will
	730	* cause us to revoke access to this page, but
	731	* this page is in the process of being cleaned
	732	* in a clustered pageout. We must wait until
	733	* the cleaning operation completes before
	734	* revoking access to the original page,
	735	* otherwise we might attempt to remove a
	736	* wired mapping.
	737	*/
	738	#if TRACEFAULTPAGE
	739	dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
	740	#endif
	741	XPR(XPR_VM_FAULT,
	742	"vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
	743	(integer_t)object, offset,
	744	(integer_t)m, 0, 0);
	745	/* take an extra ref so that object won't die */
	746	assert(object->ref_count > 0);
	747	object->ref_count++;
	748	vm_object_res_reference(object);
	749	vm_fault_cleanup(object, first_m);
	750	counter(c_vm_fault_page_block_backoff_kernel++);
	751	vm_object_lock(object);
	752	assert(object->ref_count > 0);
	753	m = vm_page_lookup(object, offset);
	754	if (m != VM_PAGE_NULL && m->cleaning) {
	755	PAGE_ASSERT_WAIT(m, interruptible);
	756	vm_object_unlock(object);
	757	wait_result = thread_block((void (*)(void)) 0);
	758	vm_object_deallocate(object);
	759	goto backoff;
	760	} else {
	761	vm_object_unlock(object);
	762	vm_object_deallocate(object);
	763	cur_thread->interruptible = interruptible_state;
	764	return VM_FAULT_RETRY;
	765	}
	766	}
	767
	768	/*
	769	* If the desired access to this page has
	770	* been locked out, request that it be unlocked.
	771	*/
	772
	773	if (access_required & m->page_lock) {
	774	if ((access_required & m->unlock_request) != access_required) {
	775	vm_prot_t new_unlock_request;
	776	kern_return_t rc;
	777
	778	#if TRACEFAULTPAGE
	779	dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
	780	#endif
	781	if (!object->pager_ready) {
	782	XPR(XPR_VM_FAULT,
	783	"vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
	784	access_required,
	785	(integer_t)object, offset,
	786	(integer_t)m, 0);
	787	/* take an extra ref */
	788	assert(object->ref_count > 0);
	789	object->ref_count++;
	790	vm_object_res_reference(object);
	791	vm_fault_cleanup(object,
	792	first_m);
	793	counter(c_vm_fault_page_block_backoff_kernel++);
	794	vm_object_lock(object);
	795	assert(object->ref_count > 0);
	796	if (!object->pager_ready) {
	797	vm_object_assert_wait(
	798	object,
	799	VM_OBJECT_EVENT_PAGER_READY,
	800	interruptible);
	801	vm_object_unlock(object);
	802	wait_result = thread_block((void (*)(void))0);
	803	vm_object_deallocate(object);
	804	goto backoff;
	805	} else {
	806	vm_object_unlock(object);
	807	vm_object_deallocate(object);
	808	cur_thread->interruptible = interruptible_state;
	809	return VM_FAULT_RETRY;
	810	}
	811	}
	812
	813	new_unlock_request = m->unlock_request =
	814	(access_required \| m->unlock_request);
	815	vm_object_unlock(object);
	816	XPR(XPR_VM_FAULT,
	817	"vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
	818	(integer_t)object, offset,
	819	(integer_t)m, new_unlock_request, 0);
	820	if ((rc = memory_object_data_unlock(
	821	object->pager,
	822	offset + object->paging_offset,
	823	PAGE_SIZE,
	824	new_unlock_request))
	825	!= KERN_SUCCESS) {
	826	if (vm_fault_debug)
	827	printf("vm_fault: memory_object_data_unlock failed\n");
	828	vm_object_lock(object);
	829	vm_fault_cleanup(object, first_m);
	830	cur_thread->interruptible = interruptible_state;
	831	return((rc == MACH_SEND_INTERRUPTED) ?
	832	VM_FAULT_INTERRUPTED :
	833	VM_FAULT_MEMORY_ERROR);
	834	}
	835	vm_object_lock(object);
	836	continue;
	837	}
	838
	839	XPR(XPR_VM_FAULT,
	840	"vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
	841	access_required, (integer_t)object,
	842	offset, (integer_t)m, 0);
	843	/* take an extra ref so object won't die */
	844	assert(object->ref_count > 0);
	845	object->ref_count++;
	846	vm_object_res_reference(object);
	847	vm_fault_cleanup(object, first_m);
	848	counter(c_vm_fault_page_block_backoff_kernel++);
	849	vm_object_lock(object);
	850	assert(object->ref_count > 0);
	851	m = vm_page_lookup(object, offset);
	852	if (m != VM_PAGE_NULL &&
	853	(access_required & m->page_lock) &&
	854	!((access_required & m->unlock_request) != access_required)) {
	855	PAGE_ASSERT_WAIT(m, interruptible);
	856	vm_object_unlock(object);
	857	wait_result = thread_block((void (*)(void)) 0);
	858	vm_object_deallocate(object);
	859	goto backoff;
	860	} else {
	861	vm_object_unlock(object);
	862	vm_object_deallocate(object);
	863	cur_thread->interruptible = interruptible_state;
	864	return VM_FAULT_RETRY;
	865	}
	866	}
	867	/*
	868	* We mark the page busy and leave it on
	869	* the pageout queues. If the pageout
	870	* deamon comes across it, then it will
	871	* remove the page.
	872	*/
	873
	874	#if TRACEFAULTPAGE
	875	dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
	876	#endif
	877
	878	#if !VM_FAULT_STATIC_CONFIG
	879	if (!software_reference_bits) {
	880	vm_page_lock_queues();
	881	if (m->inactive)
	882	vm_stat.reactivations++;
	883
	884	VM_PAGE_QUEUES_REMOVE(m);
	885	vm_page_unlock_queues();
	886	}
	887	#endif
	888	XPR(XPR_VM_FAULT,
	889	"vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
	890	(integer_t)object, offset, (integer_t)m, 0, 0);
	891	assert(!m->busy);
	892	m->busy = TRUE;
	893	assert(!m->absent);
	894	break;
	895	}
	896
	897	look_for_page =
	898	(object->pager_created) &&
	899	LOOK_FOR(object, offset) &&
	900	(!data_supply);
	901
	902	#if TRACEFAULTPAGE
	903	dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
	904	#endif
	905	if ((look_for_page \|\| (object == first_object))
	906	&& !must_be_resident
	907	&& !(object->phys_contiguous)) {
	908	/*
	909	* Allocate a new page for this object/offset
	910	* pair.
	911	*/
	912
	913	m = vm_page_grab_fictitious();
	914	#if TRACEFAULTPAGE
	915	dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
	916	#endif
	917	if (m == VM_PAGE_NULL) {
	918	vm_fault_cleanup(object, first_m);
	919	cur_thread->interruptible = interruptible_state;
	920	return(VM_FAULT_FICTITIOUS_SHORTAGE);
	921	}
	922	vm_page_insert(m, object, offset);
	923	}
	924
	925	if ((look_for_page && !must_be_resident)) {
	926	kern_return_t rc;
	927
	928	/*
	929	* If the memory manager is not ready, we
	930	* cannot make requests.
	931	*/
	932	if (!object->pager_ready) {
	933	#if TRACEFAULTPAGE
	934	dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
	935	#endif
	936	if(m != VM_PAGE_NULL)
	937	VM_PAGE_FREE(m);
	938	XPR(XPR_VM_FAULT,
	939	"vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
	940	(integer_t)object, offset, 0, 0, 0);
	941	/* take an extra ref so object won't die */
	942	assert(object->ref_count > 0);
	943	object->ref_count++;
	944	vm_object_res_reference(object);
	945	vm_fault_cleanup(object, first_m);
	946	counter(c_vm_fault_page_block_backoff_kernel++);
	947	vm_object_lock(object);
	948	assert(object->ref_count > 0);
	949	if (!object->pager_ready) {
	950	vm_object_assert_wait(object,
	951	VM_OBJECT_EVENT_PAGER_READY,
	952	interruptible);
	953	vm_object_unlock(object);
	954	wait_result = thread_block((void (*)(void))0);
	955	vm_object_deallocate(object);
	956	goto backoff;
	957	} else {
	958	vm_object_unlock(object);
	959	vm_object_deallocate(object);
	960	cur_thread->interruptible = interruptible_state;
	961	return VM_FAULT_RETRY;
	962	}
	963	}
	964
	965	if(object->phys_contiguous) {
	966	if(m != VM_PAGE_NULL) {
	967	VM_PAGE_FREE(m);
	968	m = VM_PAGE_NULL;
	969	}
	970	goto no_clustering;
	971	}
	972	if (object->internal) {
	973	/*
	974	* Requests to the default pager
	975	* must reserve a real page in advance,
	976	* because the pager's data-provided
	977	* won't block for pages. IMPORTANT:
	978	* this acts as a throttling mechanism
	979	* for data_requests to the default
	980	* pager.
	981	*/
	982
	983	#if TRACEFAULTPAGE
	984	dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
	985	#endif
	986	if (m->fictitious && !vm_page_convert(m)) {
	987	VM_PAGE_FREE(m);
	988	vm_fault_cleanup(object, first_m);
	989	cur_thread->interruptible = interruptible_state;
	990	return(VM_FAULT_MEMORY_SHORTAGE);
	991	}
	992	} else if (object->absent_count >
	993	vm_object_absent_max) {
	994	/*
	995	* If there are too many outstanding page
	996	* requests pending on this object, we
	997	* wait for them to be resolved now.
	998	*/
	999
	1000	#if TRACEFAULTPAGE
	1001	dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
	1002	#endif
	1003	if(m != VM_PAGE_NULL)
	1004	VM_PAGE_FREE(m);
	1005	/* take an extra ref so object won't die */
	1006	assert(object->ref_count > 0);
	1007	object->ref_count++;
	1008	vm_object_res_reference(object);
	1009	vm_fault_cleanup(object, first_m);
	1010	counter(c_vm_fault_page_block_backoff_kernel++);
	1011	vm_object_lock(object);
	1012	assert(object->ref_count > 0);
	1013	if (object->absent_count > vm_object_absent_max) {
	1014	vm_object_absent_assert_wait(object,
	1015	interruptible);
	1016	vm_object_unlock(object);
	1017	wait_result = thread_block((void (*)(void))0);
	1018	vm_object_deallocate(object);
	1019	goto backoff;
	1020	} else {
	1021	vm_object_unlock(object);
	1022	vm_object_deallocate(object);
	1023	cur_thread->interruptible = interruptible_state;
	1024	return VM_FAULT_RETRY;
	1025	}
	1026	}
	1027
	1028	/*
	1029	* Indicate that the page is waiting for data
	1030	* from the memory manager.
	1031	*/
	1032
	1033	if(m != VM_PAGE_NULL) {
	1034
	1035	m->list_req_pending = TRUE;
	1036	m->absent = TRUE;
	1037	m->unusual = TRUE;
	1038	object->absent_count++;
	1039
	1040	}
	1041
	1042	cluster_start = offset;
	1043	length = PAGE_SIZE;
	1044	cluster_size = object->cluster_size;
	1045
	1046	/*
	1047	* Skip clustered pagein if it is globally disabled
	1048	* or random page reference behavior is expected
	1049	* for the address range containing the faulting
	1050	* address or the object paging block size is
	1051	* equal to the page size.
	1052	*/
	1053	if (!vm_allow_clustered_pagein \|\|
	1054	behavior == VM_BEHAVIOR_RANDOM \|\|
	1055	m == VM_PAGE_NULL \|\|
	1056	cluster_size == PAGE_SIZE) {
	1057	cluster_start = trunc_page_64(cluster_start);
	1058	goto no_clustering;
	1059	}
	1060
	1061	assert(offset >= lo_offset);
	1062	assert(offset < hi_offset);
	1063	assert(ALIGNED(object->paging_offset));
	1064	assert(cluster_size >= PAGE_SIZE);
	1065
	1066	#if TRACEFAULTPAGE
	1067	dbgTrace(0xBEEF0011, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
	1068	#endif
	1069	/*
	1070	* Decide whether to scan ahead or behind for
	1071	* additional pages contiguous to the faulted
	1072	* page in the same paging block. The decision
	1073	* is based on system wide globals and the
	1074	* expected page reference behavior of the
	1075	* address range contained the faulting address.
	1076	* First calculate some constants.
	1077	*/
	1078	paging_offset = offset + object->paging_offset;
	1079	cluster_offset = paging_offset & (cluster_size - 1);
	1080	align_offset = paging_offset&(PAGE_SIZE_64-1);
	1081	if (align_offset != 0) {
	1082	cluster_offset = trunc_page_64(cluster_offset);
	1083	}
	1084
	1085	#define SPANS_CLUSTER(x) ((((x) - align_offset) & (vm_object_offset_t)(cluster_size - 1)) == 0)
	1086
	1087	/*
	1088	* Backward scan only if reverse sequential
	1089	* behavior has been specified
	1090	*/
	1091	CLUSTER_STAT(pages_at_lower_offsets = 0;)
	1092	if (((vm_default_behind != 0 &&
	1093	behavior == VM_BEHAVIOR_DEFAULT) \|\|
	1094	behavior == VM_BEHAVIOR_RSEQNTL) && offset) {
	1095	vm_object_offset_t cluster_bot;
	1096
	1097	/*
	1098	* Calculate lower search boundary.
	1099	* Exclude pages that span a cluster boundary.
	1100	* Clip to start of map entry.
	1101	* For default page reference behavior, scan
	1102	* default pages behind.
	1103	*/
	1104	cluster_bot = (offset > cluster_offset) ?
	1105	offset - cluster_offset : offset;
	1106	if (align_offset != 0) {
	1107	if ((cluster_bot < offset) &&
	1108	SPANS_CLUSTER(cluster_bot)) {
	1109	cluster_bot += PAGE_SIZE_64;
	1110	}
	1111	}
	1112	if (behavior == VM_BEHAVIOR_DEFAULT) {
	1113	vm_object_offset_t
	1114	bot = (vm_object_offset_t)
	1115	(vm_default_behind * PAGE_SIZE);
	1116
	1117	if (cluster_bot < (offset - bot))
	1118	cluster_bot = offset - bot;
	1119	}
	1120	if (lo_offset > cluster_bot)
	1121	cluster_bot = lo_offset;
	1122
	1123	for ( cluster_start = offset - PAGE_SIZE_64;
	1124	(cluster_start >= cluster_bot) &&
	1125	(cluster_start !=
	1126	(align_offset - PAGE_SIZE_64));
	1127	cluster_start -= PAGE_SIZE_64) {
	1128	assert(cluster_size > PAGE_SIZE_64);
	1129	retry_cluster_backw:
	1130	if (!LOOK_FOR(object, cluster_start) \|\|
	1131	vm_page_lookup(object, cluster_start)
	1132	!= VM_PAGE_NULL) {
	1133	break;
	1134	}
	1135	if (object->internal) {
	1136	/*
	1137	* need to acquire a real page in
	1138	* advance because this acts as
	1139	* a throttling mechanism for
	1140	* data_requests to the default
	1141	* pager. If this fails, give up
	1142	* trying to find any more pages
	1143	* in the cluster and send off the
	1144	* request for what we already have.
	1145	*/
	1146	if ((m = vm_page_grab())
	1147	== VM_PAGE_NULL) {
	1148	cluster_start += PAGE_SIZE_64;
	1149	cluster_end = offset + PAGE_SIZE_64;
	1150	goto give_up;
	1151	}
	1152	} else if ((m = vm_page_grab_fictitious())
	1153	== VM_PAGE_NULL) {
	1154	vm_object_unlock(object);
	1155	vm_page_more_fictitious();
	1156	vm_object_lock(object);
	1157	goto retry_cluster_backw;
	1158	}
	1159	m->absent = TRUE;
	1160	m->unusual = TRUE;
	1161	m->clustered = TRUE;
	1162	m->list_req_pending = TRUE;
	1163
	1164	vm_page_insert(m, object, cluster_start);
	1165	CLUSTER_STAT(pages_at_lower_offsets++;)
	1166	object->absent_count++;
	1167	}
	1168	cluster_start += PAGE_SIZE_64;
	1169	assert(cluster_start >= cluster_bot);
	1170	}
	1171	assert(cluster_start <= offset);
	1172
	1173	/*
	1174	* Forward scan if default or sequential behavior
	1175	* specified
	1176	*/
	1177	CLUSTER_STAT(pages_at_higher_offsets = 0;)
	1178	if ((behavior == VM_BEHAVIOR_DEFAULT &&
	1179	vm_default_ahead != 0) \|\|
	1180	behavior == VM_BEHAVIOR_SEQUENTIAL) {
	1181	vm_object_offset_t cluster_top;
	1182
	1183	/*
	1184	* Calculate upper search boundary.
	1185	* Exclude pages that span a cluster boundary.
	1186	* Clip to end of map entry.
	1187	* For default page reference behavior, scan
	1188	* default pages ahead.
	1189	*/
	1190	cluster_top = (offset + cluster_size) -
	1191	cluster_offset;
	1192	if (align_offset != 0) {
	1193	if ((cluster_top > (offset + PAGE_SIZE_64)) &&
	1194	SPANS_CLUSTER(cluster_top)) {
	1195	cluster_top -= PAGE_SIZE_64;
	1196	}
	1197	}
	1198	if (behavior == VM_BEHAVIOR_DEFAULT) {
	1199	vm_object_offset_t top = (vm_object_offset_t)
	1200	((vm_default_ahead*PAGE_SIZE)+PAGE_SIZE);
	1201
	1202	if (cluster_top > (offset + top))
	1203	cluster_top = offset + top;
	1204	}
	1205	if (cluster_top > hi_offset)
	1206	cluster_top = hi_offset;
	1207
	1208	for (cluster_end = offset + PAGE_SIZE_64;
	1209	cluster_end < cluster_top;
	1210	cluster_end += PAGE_SIZE_64) {
	1211	assert(cluster_size > PAGE_SIZE);
	1212	retry_cluster_forw:
	1213	if (!LOOK_FOR(object, cluster_end) \|\|
	1214	vm_page_lookup(object, cluster_end)
	1215	!= VM_PAGE_NULL) {
	1216	break;
	1217	}
	1218	if (object->internal) {
	1219	/*
	1220	* need to acquire a real page in
	1221	* advance because this acts as
	1222	* a throttling mechanism for
	1223	* data_requests to the default
	1224	* pager. If this fails, give up
	1225	* trying to find any more pages
	1226	* in the cluster and send off the
	1227	* request for what we already have.
	1228	*/
	1229	if ((m = vm_page_grab())
	1230	== VM_PAGE_NULL) {
	1231	break;
	1232	}
	1233	} else if ((m = vm_page_grab_fictitious())
	1234	== VM_PAGE_NULL) {
	1235	vm_object_unlock(object);
	1236	vm_page_more_fictitious();
	1237	vm_object_lock(object);
	1238	goto retry_cluster_forw;
	1239	}
	1240	m->absent = TRUE;
	1241	m->unusual = TRUE;
	1242	m->clustered = TRUE;
	1243	m->list_req_pending = TRUE;
	1244
	1245	vm_page_insert(m, object, cluster_end);
	1246	CLUSTER_STAT(pages_at_higher_offsets++;)
	1247	object->absent_count++;
	1248	}
	1249	assert(cluster_end <= cluster_top);
	1250	}
	1251	else {
	1252	cluster_end = offset + PAGE_SIZE_64;
	1253	}
	1254	give_up:
	1255	assert(cluster_end >= offset + PAGE_SIZE_64);
	1256	length = cluster_end - cluster_start;
	1257
	1258	#if MACH_CLUSTER_STATS
	1259	CLUSTER_STAT_HIGHER(pages_at_higher_offsets);
	1260	CLUSTER_STAT_LOWER(pages_at_lower_offsets);
	1261	CLUSTER_STAT_CLUSTER(length/PAGE_SIZE);
	1262	#endif /* MACH_CLUSTER_STATS */
	1263
	1264	no_clustering:
	1265	/*
	1266	* lengthen the cluster by the pages in the working set
	1267	*/
	1268	if((map != NULL) &&
	1269	(current_task()->dynamic_working_set != 0)) {
	1270	cluster_end = cluster_start + length;
	1271	/* tws values for start and end are just a
	1272	* suggestions. Therefore, as long as
	1273	* build_cluster does not use pointers or
	1274	* take action based on values that
	1275	* could be affected by re-entrance we
	1276	* do not need to take the map lock.
	1277	*/
	1278	tws_build_cluster((tws_hash_t)
	1279	current_task()->dynamic_working_set,
	1280	object, &cluster_start,
	1281	&cluster_end, 0x16000);
	1282	length = cluster_end - cluster_start;
	1283	}
	1284	#if TRACEFAULTPAGE
	1285	dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
	1286	#endif
	1287	/*
	1288	* We have a busy page, so we can
	1289	* release the object lock.
	1290	*/
	1291	vm_object_unlock(object);
	1292
	1293	/*
	1294	* Call the memory manager to retrieve the data.
	1295	*/
	1296
	1297	if (type_of_fault)
	1298	*type_of_fault = DBG_PAGEIN_FAULT;
	1299	VM_STAT(pageins++);
	1300	current_task()->pageins++;
	1301	bumped_pagein = TRUE;
	1302
	1303	/*
	1304	* If this object uses a copy_call strategy,
	1305	* and we are interested in a copy of this object
	1306	* (having gotten here only by following a
	1307	* shadow chain), then tell the memory manager
	1308	* via a flag added to the desired_access
	1309	* parameter, so that it can detect a race
	1310	* between our walking down the shadow chain
	1311	* and its pushing pages up into a copy of
	1312	* the object that it manages.
	1313	*/
	1314
	1315	if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
	1316	object != first_object) {
	1317	wants_copy_flag = VM_PROT_WANTS_COPY;
	1318	} else {
	1319	wants_copy_flag = VM_PROT_NONE;
	1320	}
	1321
	1322	XPR(XPR_VM_FAULT,
	1323	"vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
	1324	(integer_t)object, offset, (integer_t)m,
	1325	access_required \| wants_copy_flag, 0);
	1326
	1327	rc = memory_object_data_request(object->pager,
	1328	cluster_start + object->paging_offset,
	1329	length,
	1330	access_required \| wants_copy_flag);
	1331
	1332
	1333	#if TRACEFAULTPAGE
	1334	dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
	1335	#endif
	1336	if (rc != KERN_SUCCESS) {
	1337	if (rc != MACH_SEND_INTERRUPTED
	1338	&& vm_fault_debug)
	1339	printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n",
	1340	"memory_object_data_request",
	1341	object->pager,
	1342	cluster_start + object->paging_offset,
	1343	length, access_required, rc);
	1344	/*
	1345	* Don't want to leave a busy page around,
	1346	* but the data request may have blocked,
	1347	* so check if it's still there and busy.
	1348	*/
	1349	if(!object->phys_contiguous) {
	1350	vm_object_lock(object);
	1351	for (; length; length -= PAGE_SIZE,
	1352	cluster_start += PAGE_SIZE_64) {
	1353	vm_page_t p;
	1354	if ((p = vm_page_lookup(object,
	1355	cluster_start))
	1356	&& p->absent && p->busy
	1357	&& p != first_m) {
	1358	VM_PAGE_FREE(p);
	1359	}
	1360	}
	1361	}
	1362	vm_fault_cleanup(object, first_m);
	1363	cur_thread->interruptible = interruptible_state;
	1364	return((rc == MACH_SEND_INTERRUPTED) ?
	1365	VM_FAULT_INTERRUPTED :
	1366	VM_FAULT_MEMORY_ERROR);
	1367	} else {
	1368	#ifdef notdefcdy
	1369	tws_hash_line_t line;
	1370	task_t task;
	1371
	1372	task = current_task();
	1373
	1374	if((map != NULL) &&
	1375	(task->dynamic_working_set != 0)) {
	1376	if(tws_lookup
	1377	((tws_hash_t)
	1378	task->dynamic_working_set,
	1379	offset, object,
	1380	&line) == KERN_SUCCESS) {
	1381	tws_line_signal((tws_hash_t)
	1382	task->dynamic_working_set,
	1383	map, line, vaddr);
	1384	}
	1385	}
	1386	#endif
	1387	}
	1388
	1389	/*
	1390	* Retry with same object/offset, since new data may
	1391	* be in a different page (i.e., m is meaningless at
	1392	* this point).
	1393	*/
	1394	vm_object_lock(object);
	1395	if ((interruptible != THREAD_UNINT) &&
	1396	(current_thread()->state & TH_ABORT)) {
	1397	vm_fault_cleanup(object, first_m);
	1398	cur_thread->interruptible = interruptible_state;
	1399	return(VM_FAULT_INTERRUPTED);
	1400	}
	1401	if(m == VM_PAGE_NULL)
	1402	break;
	1403	continue;
	1404	}
	1405
	1406	/*
	1407	* The only case in which we get here is if
	1408	* object has no pager (or unwiring). If the pager doesn't
	1409	* have the page this is handled in the m->absent case above
	1410	* (and if you change things here you should look above).
	1411	*/
	1412	#if TRACEFAULTPAGE
	1413	dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
	1414	#endif
	1415	if (object == first_object)
	1416	first_m = m;
	1417	else
	1418	assert(m == VM_PAGE_NULL);
	1419
	1420	XPR(XPR_VM_FAULT,
	1421	"vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
	1422	(integer_t)object, offset, (integer_t)m,
	1423	(integer_t)object->shadow, 0);
	1424	/*
	1425	* Move on to the next object. Lock the next
	1426	* object before unlocking the current one.
	1427	*/
	1428	next_object = object->shadow;
	1429	if (next_object == VM_OBJECT_NULL) {
	1430	assert(!must_be_resident);
	1431	/*
	1432	* If there's no object left, fill the page
	1433	* in the top object with zeros. But first we
	1434	* need to allocate a real page.
	1435	*/
	1436
	1437	if (object != first_object) {
	1438	vm_object_paging_end(object);
	1439	vm_object_unlock(object);
	1440
	1441	object = first_object;
	1442	offset = first_offset;
	1443	vm_object_lock(object);
	1444	}
	1445
	1446	m = first_m;
	1447	assert(m->object == object);
	1448	first_m = VM_PAGE_NULL;
	1449
	1450	if (object->shadow_severed) {
	1451	VM_PAGE_FREE(m);
	1452	vm_fault_cleanup(object, VM_PAGE_NULL);
	1453	cur_thread->interruptible = interruptible_state;
	1454	return VM_FAULT_MEMORY_ERROR;
	1455	}
	1456
	1457	if (VM_PAGE_THROTTLED() \|\|
	1458	(m->fictitious && !vm_page_convert(m))) {
	1459	VM_PAGE_FREE(m);
	1460	vm_fault_cleanup(object, VM_PAGE_NULL);
	1461	cur_thread->interruptible = interruptible_state;
	1462	return(VM_FAULT_MEMORY_SHORTAGE);
	1463	}
	1464	m->no_isync = FALSE;
	1465
	1466	if (!no_zero_fill) {
	1467	vm_object_unlock(object);
	1468	vm_page_zero_fill(m);
	1469	if (type_of_fault)
	1470	*type_of_fault = DBG_ZERO_FILL_FAULT;
	1471	VM_STAT(zero_fill_count++);
	1472
	1473	if (bumped_pagein == TRUE) {
	1474	VM_STAT(pageins--);
	1475	current_task()->pageins--;
	1476	}
	1477	vm_object_lock(object);
	1478	}
	1479	vm_page_lock_queues();
	1480	VM_PAGE_QUEUES_REMOVE(m);
	1481	m->page_ticket = vm_page_ticket;
	1482	vm_page_ticket_roll++;
	1483	if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
	1484	vm_page_ticket_roll = 0;
	1485	if(vm_page_ticket ==
	1486	VM_PAGE_TICKET_ROLL_IDS)
	1487	vm_page_ticket= 0;
	1488	else
	1489	vm_page_ticket++;
	1490	}
	1491	queue_enter(&vm_page_queue_inactive,
	1492	m, vm_page_t, pageq);
	1493	m->inactive = TRUE;
	1494	vm_page_inactive_count++;
	1495	vm_page_unlock_queues();
	1496	pmap_clear_modify(m->phys_addr);
	1497	break;
	1498	}
	1499	else {
	1500	if ((object != first_object) \|\| must_be_resident)
	1501	vm_object_paging_end(object);
	1502	offset += object->shadow_offset;
	1503	hi_offset += object->shadow_offset;
	1504	lo_offset += object->shadow_offset;
	1505	access_required = VM_PROT_READ;
	1506	vm_object_lock(next_object);
	1507	vm_object_unlock(object);
	1508	object = next_object;
	1509	vm_object_paging_begin(object);
	1510	}
	1511	}
	1512
	1513	/*
	1514	* PAGE HAS BEEN FOUND.
	1515	*
	1516	* This page (m) is:
	1517	* busy, so that we can play with it;
	1518	* not absent, so that nobody else will fill it;
	1519	* possibly eligible for pageout;
	1520	*
	1521	* The top-level page (first_m) is:
	1522	* VM_PAGE_NULL if the page was found in the
	1523	* top-level object;
	1524	* busy, not absent, and ineligible for pageout.
	1525	*
	1526	* The current object (object) is locked. A paging
	1527	* reference is held for the current and top-level
	1528	* objects.
	1529	*/
	1530
	1531	#if TRACEFAULTPAGE
	1532	dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
	1533	#endif
	1534	#if EXTRA_ASSERTIONS
	1535	if(m != VM_PAGE_NULL) {
	1536	assert(m->busy && !m->absent);
	1537	assert((first_m == VM_PAGE_NULL) \|\|
	1538	(first_m->busy && !first_m->absent &&
	1539	!first_m->active && !first_m->inactive));
	1540	}
	1541	#endif /* EXTRA_ASSERTIONS */
	1542
	1543	XPR(XPR_VM_FAULT,
	1544	"vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
	1545	(integer_t)object, offset, (integer_t)m,
	1546	(integer_t)first_object, (integer_t)first_m);
	1547	/*
	1548	* If the page is being written, but isn't
	1549	* already owned by the top-level object,
	1550	* we have to copy it into a new page owned
	1551	* by the top-level object.
	1552	*/
	1553
	1554	if ((object != first_object) && (m != VM_PAGE_NULL)) {
	1555	/*
	1556	* We only really need to copy if we
	1557	* want to write it.
	1558	*/
	1559
	1560	#if TRACEFAULTPAGE
	1561	dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
	1562	#endif
	1563	if (fault_type & VM_PROT_WRITE) {
	1564	vm_page_t copy_m;
	1565
	1566	assert(!must_be_resident);
	1567
	1568	/*
	1569	* If we try to collapse first_object at this
	1570	* point, we may deadlock when we try to get
	1571	* the lock on an intermediate object (since we
	1572	* have the bottom object locked). We can't
	1573	* unlock the bottom object, because the page
	1574	* we found may move (by collapse) if we do.
	1575	*
	1576	* Instead, we first copy the page. Then, when
	1577	* we have no more use for the bottom object,
	1578	* we unlock it and try to collapse.
	1579	*
	1580	* Note that we copy the page even if we didn't
	1581	* need to... that's the breaks.
	1582	*/
	1583
	1584	/*
	1585	* Allocate a page for the copy
	1586	*/
	1587	copy_m = vm_page_grab();
	1588	if (copy_m == VM_PAGE_NULL) {
	1589	RELEASE_PAGE(m);
	1590	vm_fault_cleanup(object, first_m);
	1591	cur_thread->interruptible = interruptible_state;
	1592	return(VM_FAULT_MEMORY_SHORTAGE);
	1593	}
	1594
	1595
	1596	XPR(XPR_VM_FAULT,
	1597	"vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
	1598	(integer_t)object, offset,
	1599	(integer_t)m, (integer_t)copy_m, 0);
	1600	vm_page_copy(m, copy_m);
	1601
	1602	/*
	1603	* If another map is truly sharing this
	1604	* page with us, we have to flush all
	1605	* uses of the original page, since we
	1606	* can't distinguish those which want the
	1607	* original from those which need the
	1608	* new copy.
	1609	*
	1610	* XXXO If we know that only one map has
	1611	* access to this page, then we could
	1612	* avoid the pmap_page_protect() call.
	1613	*/
	1614
	1615	vm_page_lock_queues();
	1616	assert(!m->cleaning);
	1617	pmap_page_protect(m->phys_addr, VM_PROT_NONE);
	1618	vm_page_deactivate(m);
	1619	copy_m->dirty = TRUE;
	1620	/*
	1621	* Setting reference here prevents this fault from
	1622	* being counted as a (per-thread) reactivate as well
	1623	* as a copy-on-write.
	1624	*/
	1625	first_m->reference = TRUE;
	1626	vm_page_unlock_queues();
	1627
	1628	/*
	1629	* We no longer need the old page or object.
	1630	*/
	1631
	1632	PAGE_WAKEUP_DONE(m);
	1633	vm_object_paging_end(object);
	1634	vm_object_unlock(object);
	1635
	1636	if (type_of_fault)
	1637	*type_of_fault = DBG_COW_FAULT;
	1638	VM_STAT(cow_faults++);
	1639	current_task()->cow_faults++;
	1640	object = first_object;
	1641	offset = first_offset;
	1642
	1643	vm_object_lock(object);
	1644	VM_PAGE_FREE(first_m);
	1645	first_m = VM_PAGE_NULL;
	1646	assert(copy_m->busy);
	1647	vm_page_insert(copy_m, object, offset);
	1648	m = copy_m;
	1649
	1650	/*
	1651	* Now that we've gotten the copy out of the
	1652	* way, let's try to collapse the top object.
	1653	* But we have to play ugly games with
	1654	* paging_in_progress to do that...
	1655	*/
	1656
	1657	vm_object_paging_end(object);
	1658	vm_object_collapse(object);
	1659	vm_object_paging_begin(object);
	1660
	1661	}
	1662	else {
	1663	*protection &= (~VM_PROT_WRITE);
	1664	}
	1665	}
	1666
	1667	/*
	1668	* Now check whether the page needs to be pushed into the
	1669	* copy object. The use of asymmetric copy on write for
	1670	* shared temporary objects means that we may do two copies to
	1671	* satisfy the fault; one above to get the page from a
	1672	* shadowed object, and one here to push it into the copy.
	1673	*/
	1674
	1675	while (first_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&
	1676	(copy_object = first_object->copy) != VM_OBJECT_NULL &&
	1677	(m!= VM_PAGE_NULL)) {
	1678	vm_object_offset_t copy_offset;
	1679	vm_page_t copy_m;
	1680
	1681	#if TRACEFAULTPAGE
	1682	dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
	1683	#endif
	1684	/*
	1685	* If the page is being written, but hasn't been
	1686	* copied to the copy-object, we have to copy it there.
	1687	*/
	1688
	1689	if ((fault_type & VM_PROT_WRITE) == 0) {
	1690	*protection &= ~VM_PROT_WRITE;
	1691	break;
	1692	}
	1693
	1694	/*
	1695	* If the page was guaranteed to be resident,
	1696	* we must have already performed the copy.
	1697	*/
	1698
	1699	if (must_be_resident)
	1700	break;
	1701
	1702	/*
	1703	* Try to get the lock on the copy_object.
	1704	*/
	1705	if (!vm_object_lock_try(copy_object)) {
	1706	vm_object_unlock(object);
	1707
	1708	mutex_pause(); /* wait a bit */
	1709
	1710	vm_object_lock(object);
	1711	continue;
	1712	}
	1713
	1714	/*
	1715	* Make another reference to the copy-object,
	1716	* to keep it from disappearing during the
	1717	* copy.
	1718	*/
	1719	assert(copy_object->ref_count > 0);
	1720	copy_object->ref_count++;
	1721	VM_OBJ_RES_INCR(copy_object);
	1722
	1723	/*
	1724	* Does the page exist in the copy?
	1725	*/
	1726	copy_offset = first_offset - copy_object->shadow_offset;
	1727	if (copy_object->size <= copy_offset)
	1728	/*
	1729	* Copy object doesn't cover this page -- do nothing.
	1730	*/
	1731	;
	1732	else if ((copy_m =
	1733	vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
	1734	/* Page currently exists in the copy object */
	1735	if (copy_m->busy) {
	1736	/*
	1737	* If the page is being brought
	1738	* in, wait for it and then retry.
	1739	*/
	1740	RELEASE_PAGE(m);
	1741	/* take an extra ref so object won't die */
	1742	assert(copy_object->ref_count > 0);
	1743	copy_object->ref_count++;
	1744	vm_object_res_reference(copy_object);
	1745	vm_object_unlock(copy_object);
	1746	vm_fault_cleanup(object, first_m);
	1747	counter(c_vm_fault_page_block_backoff_kernel++);
	1748	vm_object_lock(copy_object);
	1749	assert(copy_object->ref_count > 0);
	1750	VM_OBJ_RES_DECR(copy_object);
	1751	copy_object->ref_count--;
	1752	assert(copy_object->ref_count > 0);
	1753	copy_m = vm_page_lookup(copy_object, copy_offset);
	1754	if (copy_m != VM_PAGE_NULL && copy_m->busy) {
	1755	PAGE_ASSERT_WAIT(copy_m, interruptible);
	1756	vm_object_unlock(copy_object);
	1757	wait_result = thread_block((void (*)(void))0);
	1758	vm_object_deallocate(copy_object);
	1759	goto backoff;
	1760	} else {
	1761	vm_object_unlock(copy_object);
	1762	vm_object_deallocate(copy_object);
	1763	cur_thread->interruptible = interruptible_state;
	1764	return VM_FAULT_RETRY;
	1765	}
	1766	}
	1767	}
	1768	else if (!PAGED_OUT(copy_object, copy_offset)) {
	1769	/*
	1770	* If PAGED_OUT is TRUE, then the page used to exist
	1771	* in the copy-object, and has already been paged out.
	1772	* We don't need to repeat this. If PAGED_OUT is
	1773	* FALSE, then either we don't know (!pager_created,
	1774	* for example) or it hasn't been paged out.
	1775	* (VM_EXTERNAL_STATE_UNKNOWN\|\|VM_EXTERNAL_STATE_ABSENT)
	1776	* We must copy the page to the copy object.
	1777	*/
	1778
	1779	/*
	1780	* Allocate a page for the copy
	1781	*/
	1782	copy_m = vm_page_alloc(copy_object, copy_offset);
	1783	if (copy_m == VM_PAGE_NULL) {
	1784	RELEASE_PAGE(m);
	1785	VM_OBJ_RES_DECR(copy_object);
	1786	copy_object->ref_count--;
	1787	assert(copy_object->ref_count > 0);
	1788	vm_object_unlock(copy_object);
	1789	vm_fault_cleanup(object, first_m);
	1790	cur_thread->interruptible = interruptible_state;
	1791	return(VM_FAULT_MEMORY_SHORTAGE);
	1792	}
	1793
	1794	/*
	1795	* Must copy page into copy-object.
	1796	*/
	1797
	1798	vm_page_copy(m, copy_m);
	1799
	1800	/*
	1801	* If the old page was in use by any users
	1802	* of the copy-object, it must be removed
	1803	* from all pmaps. (We can't know which
	1804	* pmaps use it.)
	1805	*/
	1806
	1807	vm_page_lock_queues();
	1808	assert(!m->cleaning);
	1809	pmap_page_protect(m->phys_addr, VM_PROT_NONE);
	1810	copy_m->dirty = TRUE;
	1811	vm_page_unlock_queues();
	1812
	1813	/*
	1814	* If there's a pager, then immediately
	1815	* page out this page, using the "initialize"
	1816	* option. Else, we use the copy.
	1817	*/
	1818
	1819	if
	1820	#if MACH_PAGEMAP
	1821	((!copy_object->pager_created) \|\|
	1822	vm_external_state_get(
	1823	copy_object->existence_map, copy_offset)
	1824	== VM_EXTERNAL_STATE_ABSENT)
	1825	#else
	1826	(!copy_object->pager_created)
	1827	#endif
	1828	{
	1829	vm_page_lock_queues();
	1830	vm_page_activate(copy_m);
	1831	vm_page_unlock_queues();
	1832	PAGE_WAKEUP_DONE(copy_m);
	1833	}
	1834	else {
	1835	assert(copy_m->busy == TRUE);
	1836
	1837	/*
	1838	* The page is already ready for pageout:
	1839	* not on pageout queues and busy.
	1840	* Unlock everything except the
	1841	* copy_object itself.
	1842	*/
	1843
	1844	vm_object_unlock(object);
	1845
	1846	/*
	1847	* Write the page to the copy-object,
	1848	* flushing it from the kernel.
	1849	*/
	1850
	1851	vm_pageout_initialize_page(copy_m);
	1852
	1853	/*
	1854	* Since the pageout may have
	1855	* temporarily dropped the
	1856	* copy_object's lock, we
	1857	* check whether we'll have
	1858	* to deallocate the hard way.
	1859	*/
	1860
	1861	if ((copy_object->shadow != object) \|\|
	1862	(copy_object->ref_count == 1)) {
	1863	vm_object_unlock(copy_object);
	1864	vm_object_deallocate(copy_object);
	1865	vm_object_lock(object);
	1866	continue;
	1867	}
	1868
	1869	/*
	1870	* Pick back up the old object's
	1871	* lock. [It is safe to do so,
	1872	* since it must be deeper in the
	1873	* object tree.]
	1874	*/
	1875
	1876	vm_object_lock(object);
	1877	}
	1878
	1879	/*
	1880	* Because we're pushing a page upward
	1881	* in the object tree, we must restart
	1882	* any faults that are waiting here.
	1883	* [Note that this is an expansion of
	1884	* PAGE_WAKEUP that uses the THREAD_RESTART
	1885	* wait result]. Can't turn off the page's
	1886	* busy bit because we're not done with it.
	1887	*/
	1888
	1889	if (m->wanted) {
	1890	m->wanted = FALSE;
	1891	thread_wakeup_with_result((event_t) m,
	1892	THREAD_RESTART);
	1893	}
	1894	}
	1895
	1896	/*
	1897	* The reference count on copy_object must be
	1898	* at least 2: one for our extra reference,
	1899	* and at least one from the outside world
	1900	* (we checked that when we last locked
	1901	* copy_object).
	1902	*/
	1903	copy_object->ref_count--;
	1904	assert(copy_object->ref_count > 0);
	1905	VM_OBJ_RES_DECR(copy_object);
	1906	vm_object_unlock(copy_object);
	1907
	1908	break;
	1909	}
	1910
	1911	*result_page = m;
	1912	*top_page = first_m;
	1913
	1914	XPR(XPR_VM_FAULT,
	1915	"vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
	1916	(integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
	1917	/*
	1918	* If the page can be written, assume that it will be.
	1919	* [Earlier, we restrict the permission to allow write
	1920	* access only if the fault so required, so we don't
	1921	* mark read-only data as dirty.]
	1922	*/
	1923
	1924	#if !VM_FAULT_STATIC_CONFIG
	1925	if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE) &&
	1926	(m != VM_PAGE_NULL)) {
	1927	m->dirty = TRUE;
	1928	}
	1929	#endif
	1930	#if TRACEFAULTPAGE
	1931	dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
	1932	#endif
	1933	if (vm_page_deactivate_behind) {
	1934	if (offset && /* don't underflow */
	1935	(object->last_alloc == (offset - PAGE_SIZE_64))) {
	1936	m = vm_page_lookup(object, object->last_alloc);
	1937	if ((m != VM_PAGE_NULL) && !m->busy) {
	1938	vm_page_lock_queues();
	1939	vm_page_deactivate(m);
	1940	vm_page_unlock_queues();
	1941	}
	1942	#if TRACEFAULTPAGE
	1943	dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
	1944	#endif
	1945	}
	1946	object->last_alloc = offset;
	1947	}
	1948	#if TRACEFAULTPAGE
	1949	dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
	1950	#endif
	1951	cur_thread->interruptible = interruptible_state;
	1952	if(*result_page == VM_PAGE_NULL) {
	1953	vm_object_unlock(object);
	1954	}
	1955	return(VM_FAULT_SUCCESS);
	1956
	1957	#if 0
	1958	block_and_backoff:
	1959	vm_fault_cleanup(object, first_m);
	1960
	1961	counter(c_vm_fault_page_block_backoff_kernel++);
	1962	thread_block((void (*)(void))0);
	1963	#endif
	1964
	1965	backoff:
	1966	cur_thread->interruptible = interruptible_state;
	1967	if (wait_result == THREAD_INTERRUPTED)
	1968	return VM_FAULT_INTERRUPTED;
	1969	return VM_FAULT_RETRY;
	1970
	1971	#undef RELEASE_PAGE
	1972	}
	1973
	1974	/*
	1975	* Routine: vm_fault
	1976	* Purpose:
	1977	* Handle page faults, including pseudo-faults
	1978	* used to change the wiring status of pages.
	1979	* Returns:
	1980	* Explicit continuations have been removed.
	1981	* Implementation:
	1982	* vm_fault and vm_fault_page save mucho state
	1983	* in the moral equivalent of a closure. The state
	1984	* structure is allocated when first entering vm_fault
	1985	* and deallocated when leaving vm_fault.
	1986	*/
	1987
	1988	kern_return_t
	1989	vm_fault(
	1990	vm_map_t map,
	1991	vm_offset_t vaddr,
	1992	vm_prot_t fault_type,
	1993	boolean_t change_wiring,
	1994	int interruptible)
	1995	{
	1996	vm_map_version_t version; /* Map version for verificiation */
	1997	boolean_t wired; /* Should mapping be wired down? */
	1998	vm_object_t object; /* Top-level object */
	1999	vm_object_offset_t offset; /* Top-level offset */
	2000	vm_prot_t prot; /* Protection for mapping */
	2001	vm_behavior_t behavior; /* Expected paging behavior */
	2002	vm_object_offset_t lo_offset, hi_offset;
	2003	vm_object_t old_copy_object; /* Saved copy object */
	2004	vm_page_t result_page; /* Result of vm_fault_page */
	2005	vm_page_t top_page; /* Placeholder page */
	2006	kern_return_t kr;
	2007
	2008	register
	2009	vm_page_t m; /* Fast access to result_page */
	2010	kern_return_t error_code; /* page error reasons */
	2011	register
	2012	vm_object_t cur_object;
	2013	register
	2014	vm_object_offset_t cur_offset;
	2015	vm_page_t cur_m;
	2016	vm_object_t new_object;
	2017	int type_of_fault;
	2018	vm_map_t pmap_map = map;
	2019	vm_map_t original_map = map;
	2020	pmap_t pmap = NULL;
	2021	boolean_t funnel_set = FALSE;
	2022	funnel_t *curflock;
	2023	thread_t cur_thread;
	2024	boolean_t interruptible_state;
	2025
	2026
	2027	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) \| DBG_FUNC_START,
	2028	vaddr,
	2029	0,
	2030	0,
	2031	0,
	2032	0);
	2033
	2034	cur_thread = current_thread();
	2035
	2036	interruptible_state = cur_thread->interruptible;
	2037	if (interruptible == THREAD_UNINT)
	2038	cur_thread->interruptible = FALSE;
	2039
	2040	/*
	2041	* assume we will hit a page in the cache
	2042	* otherwise, explicitly override with
	2043	* the real fault type once we determine it
	2044	*/
	2045	type_of_fault = DBG_CACHE_HIT_FAULT;
	2046
	2047	VM_STAT(faults++);
	2048	current_task()->faults++;
	2049
	2050	/*
	2051	* drop funnel if it is already held. Then restore while returning
	2052	*/
	2053	if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) {
	2054	funnel_set = TRUE;
	2055	curflock = cur_thread->funnel_lock;
	2056	thread_funnel_set( curflock , FALSE);
	2057	}
	2058
	2059	RetryFault: ;
	2060
	2061	/*
	2062	* Find the backing store object and offset into
	2063	* it to begin the search.
	2064	*/
	2065	map = original_map;
	2066	vm_map_lock_read(map);
	2067	kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
	2068	&object, &offset,
	2069	&prot, &wired,
	2070	&behavior, &lo_offset, &hi_offset, &pmap_map);
	2071
	2072	pmap = pmap_map->pmap;
	2073
	2074	if (kr != KERN_SUCCESS) {
	2075	vm_map_unlock_read(map);
	2076	goto done;
	2077	}
	2078
	2079	/*
	2080	* If the page is wired, we must fault for the current protection
	2081	* value, to avoid further faults.
	2082	*/
	2083
	2084	if (wired)
	2085	fault_type = prot \| VM_PROT_WRITE;
	2086
	2087	#if VM_FAULT_CLASSIFY
	2088	/*
	2089	* Temporary data gathering code
	2090	*/
	2091	vm_fault_classify(object, offset, fault_type);
	2092	#endif
	2093	/*
	2094	* Fast fault code. The basic idea is to do as much as
	2095	* possible while holding the map lock and object locks.
	2096	* Busy pages are not used until the object lock has to
	2097	* be dropped to do something (copy, zero fill, pmap enter).
	2098	* Similarly, paging references aren't acquired until that
	2099	* point, and object references aren't used.
	2100	*
	2101	* If we can figure out what to do
	2102	* (zero fill, copy on write, pmap enter) while holding
	2103	* the locks, then it gets done. Otherwise, we give up,
	2104	* and use the original fault path (which doesn't hold
	2105	* the map lock, and relies on busy pages).
	2106	* The give up cases include:
	2107	* - Have to talk to pager.
	2108	* - Page is busy, absent or in error.
	2109	* - Pager has locked out desired access.
	2110	* - Fault needs to be restarted.
	2111	* - Have to push page into copy object.
	2112	*
	2113	* The code is an infinite loop that moves one level down
	2114	* the shadow chain each time. cur_object and cur_offset
	2115	* refer to the current object being examined. object and offset
	2116	* are the original object from the map. The loop is at the
	2117	* top level if and only if object and cur_object are the same.
	2118	*
	2119	* Invariants: Map lock is held throughout. Lock is held on
	2120	* original object and cur_object (if different) when
	2121	* continuing or exiting loop.
	2122	*
	2123	*/
	2124
	2125
	2126	/*
	2127	* If this page is to be inserted in a copy delay object
	2128	* for writing, and if the object has a copy, then the
	2129	* copy delay strategy is implemented in the slow fault page.
	2130	*/
	2131	if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY \|\|
	2132	object->copy == VM_OBJECT_NULL \|\|
	2133	(fault_type & VM_PROT_WRITE) == 0) {
	2134	cur_object = object;
	2135	cur_offset = offset;
	2136
	2137	while (TRUE) {
	2138	m = vm_page_lookup(cur_object, cur_offset);
	2139	if (m != VM_PAGE_NULL) {
	2140	if (m->busy)
	2141	break;
	2142
	2143	if (m->unusual && (m->error \|\| m->restart \|\| m->private
	2144	\|\| m->absent \|\| (fault_type & m->page_lock))) {
	2145
	2146	/*
	2147	* Unusual case. Give up.
	2148	*/
	2149	break;
	2150	}
	2151
	2152	/*
	2153	* Two cases of map in faults:
	2154	* - At top level w/o copy object.
	2155	* - Read fault anywhere.
	2156	* --> must disallow write.
	2157	*/
	2158
	2159	if (object == cur_object &&
	2160	object->copy == VM_OBJECT_NULL)
	2161	goto FastMapInFault;
	2162
	2163	if ((fault_type & VM_PROT_WRITE) == 0) {
	2164
	2165	prot &= ~VM_PROT_WRITE;
	2166
	2167	/*
	2168	* Set up to map the page ...
	2169	* mark the page busy, drop
	2170	* locks and take a paging reference
	2171	* on the object with the page.
	2172	*/
	2173
	2174	if (object != cur_object) {
	2175	vm_object_unlock(object);
	2176	object = cur_object;
	2177	}
	2178	FastMapInFault:
	2179	m->busy = TRUE;
	2180
	2181	vm_object_paging_begin(object);
	2182	vm_object_unlock(object);
	2183
	2184	FastPmapEnter:
	2185	/*
	2186	* Check a couple of global reasons to
	2187	* be conservative about write access.
	2188	* Then do the pmap_enter.
	2189	*/
	2190	#if !VM_FAULT_STATIC_CONFIG
	2191	if (vm_fault_dirty_handling
	2192	#if MACH_KDB
	2193	\|\| db_watchpoint_list
	2194	#endif
	2195	&& (fault_type & VM_PROT_WRITE) == 0)
	2196	prot &= ~VM_PROT_WRITE;
	2197	#else /* STATIC_CONFIG */
	2198	#if MACH_KDB
	2199	if (db_watchpoint_list
	2200	&& (fault_type & VM_PROT_WRITE) == 0)
	2201	prot &= ~VM_PROT_WRITE;
	2202	#endif /* MACH_KDB */
	2203	#endif /* STATIC_CONFIG */
	2204	if (m->no_isync == TRUE)
	2205	pmap_sync_caches_phys(m->phys_addr);
	2206
	2207	PMAP_ENTER(pmap, vaddr, m, prot, wired);
	2208	{
	2209	tws_hash_line_t line;
	2210	task_t task;
	2211
	2212	task = current_task();
	2213	if((map != NULL) &&
	2214	(task->dynamic_working_set != 0)) {
	2215	if(tws_lookup
	2216	((tws_hash_t)
	2217	task->dynamic_working_set,
	2218	cur_offset, object,
	2219	&line) != KERN_SUCCESS) {
	2220	if(tws_insert((tws_hash_t)
	2221	task->dynamic_working_set,
	2222	m->offset, m->object,
	2223	vaddr, pmap_map)
	2224	== KERN_NO_SPACE) {
	2225	tws_expand_working_set(
	2226	task->dynamic_working_set,
	2227	TWS_HASH_LINE_COUNT);
	2228	}
	2229	}
	2230	}
	2231	}
	2232	/*
	2233	* Grab the object lock to manipulate
	2234	* the page queues. Change wiring
	2235	* case is obvious. In soft ref bits
	2236	* case activate page only if it fell
	2237	* off paging queues, otherwise just
	2238	* activate it if it's inactive.
	2239	*
	2240	* NOTE: original vm_fault code will
	2241	* move active page to back of active
	2242	* queue. This code doesn't.
	2243	*/
	2244	vm_object_lock(object);
	2245	vm_page_lock_queues();
	2246
	2247	if (m->clustered) {
	2248	vm_pagein_cluster_used++;
	2249	m->clustered = FALSE;
	2250	}
	2251	/*
	2252	* we did the isync above (if needed)... we're clearing
	2253	* the flag here to avoid holding a lock
	2254	* while calling pmap functions, however
	2255	* we need hold the object lock before
	2256	* we can modify the flag
	2257	*/
	2258	m->no_isync = FALSE;
	2259	m->reference = TRUE;
	2260
	2261	if (change_wiring) {
	2262	if (wired)
	2263	vm_page_wire(m);
	2264	else
	2265	vm_page_unwire(m);
	2266	}
	2267	#if VM_FAULT_STATIC_CONFIG
	2268	else {
	2269	if (!m->active && !m->inactive)
	2270	vm_page_activate(m);
	2271	}
	2272	#else
	2273	else if (software_reference_bits) {
	2274	if (!m->active && !m->inactive)
	2275	vm_page_activate(m);
	2276	}
	2277	else if (!m->active) {
	2278	vm_page_activate(m);
	2279	}
	2280	#endif
	2281	vm_page_unlock_queues();
	2282
	2283	/*
	2284	* That's it, clean up and return.
	2285	*/
	2286	PAGE_WAKEUP_DONE(m);
	2287	vm_object_paging_end(object);
	2288	vm_object_unlock(object);
	2289	vm_map_unlock_read(map);
	2290	if(pmap_map != map)
	2291	vm_map_unlock(pmap_map);
	2292
	2293	if (funnel_set) {
	2294	thread_funnel_set( curflock, TRUE);
	2295	funnel_set = FALSE;
	2296	}
	2297	cur_thread->interruptible = interruptible_state;
	2298
	2299	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) \| DBG_FUNC_END,
	2300	vaddr,
	2301	type_of_fault,
	2302	KERN_SUCCESS,
	2303	0,
	2304	0);
	2305	return KERN_SUCCESS;
	2306	}
	2307
	2308	/*
	2309	* Copy on write fault. If objects match, then
	2310	* object->copy must not be NULL (else control
	2311	* would be in previous code block), and we
	2312	* have a potential push into the copy object
	2313	* with which we won't cope here.
	2314	*/
	2315
	2316	if (cur_object == object)
	2317	break;
	2318
	2319	/*
	2320	* This is now a shadow based copy on write
	2321	* fault -- it requires a copy up the shadow
	2322	* chain.
	2323	*
	2324	* Allocate a page in the original top level
	2325	* object. Give up if allocate fails. Also
	2326	* need to remember current page, as it's the
	2327	* source of the copy.
	2328	*/
	2329	cur_m = m;
	2330	m = vm_page_grab();
	2331	if (m == VM_PAGE_NULL) {
	2332	break;
	2333	}
	2334
	2335	/*
	2336	* Now do the copy. Mark the source busy
	2337	* and take out paging references on both
	2338	* objects.
	2339	*
	2340	* NOTE: This code holds the map lock across
	2341	* the page copy.
	2342	*/
	2343
	2344	cur_m->busy = TRUE;
	2345	vm_page_copy(cur_m, m);
	2346	vm_page_insert(m, object, offset);
	2347
	2348	vm_object_paging_begin(cur_object);
	2349	vm_object_paging_begin(object);
	2350
	2351	type_of_fault = DBG_COW_FAULT;
	2352	VM_STAT(cow_faults++);
	2353	current_task()->cow_faults++;
	2354
	2355	/*
	2356	* Now cope with the source page and object
	2357	* If the top object has a ref count of 1
	2358	* then no other map can access it, and hence
	2359	* it's not necessary to do the pmap_page_protect.
	2360	*/
	2361
	2362
	2363	vm_page_lock_queues();
	2364	vm_page_deactivate(cur_m);
	2365	m->dirty = TRUE;
	2366	pmap_page_protect(cur_m->phys_addr,
	2367	VM_PROT_NONE);
	2368	vm_page_unlock_queues();
	2369
	2370	PAGE_WAKEUP_DONE(cur_m);
	2371	vm_object_paging_end(cur_object);
	2372	vm_object_unlock(cur_object);
	2373
	2374	/*
	2375	* Slight hack to call vm_object collapse
	2376	* and then reuse common map in code.
	2377	* note that the object lock was taken above.
	2378	*/
	2379
	2380	vm_object_paging_end(object);
	2381	vm_object_collapse(object);
	2382	vm_object_paging_begin(object);
	2383	vm_object_unlock(object);
	2384
	2385	goto FastPmapEnter;
	2386	}
	2387	else {
	2388
	2389	/*
	2390	* No page at cur_object, cur_offset
	2391	*/
	2392
	2393	if (cur_object->pager_created) {
	2394
	2395	/*
	2396	* Have to talk to the pager. Give up.
	2397	*/
	2398
	2399	break;
	2400	}
	2401
	2402
	2403	if (cur_object->shadow == VM_OBJECT_NULL) {
	2404
	2405	if (cur_object->shadow_severed) {
	2406	vm_object_paging_end(object);
	2407	vm_object_unlock(object);
	2408	vm_map_unlock_read(map);
	2409	if(pmap_map != map)
	2410	vm_map_unlock(pmap_map);
	2411
	2412	if (funnel_set) {
	2413	thread_funnel_set( curflock, TRUE);
	2414	funnel_set = FALSE;
	2415	}
	2416	cur_thread->interruptible = interruptible_state;
	2417
	2418	return VM_FAULT_MEMORY_ERROR;
	2419	}
	2420
	2421	/*
	2422	* Zero fill fault. Page gets
	2423	* filled in top object. Insert
	2424	* page, then drop any lower lock.
	2425	* Give up if no page.
	2426	*/
	2427	if ((vm_page_free_target -
	2428	((vm_page_free_target-vm_page_free_min)>>2))
	2429	> vm_page_free_count) {
	2430	break;
	2431	}
	2432	m = vm_page_alloc(object, offset);
	2433	if (m == VM_PAGE_NULL) {
	2434	break;
	2435	}
	2436	/*
	2437	* This is a zero-fill or initial fill
	2438	* page fault. As such, we consider it
	2439	* undefined with respect to instruction
	2440	* execution. i.e. it is the responsibility
	2441	* of higher layers to call for an instruction
	2442	* sync after changing the contents and before
	2443	* sending a program into this area. We
	2444	* choose this approach for performance
	2445	*/
	2446
	2447	m->no_isync = FALSE;
	2448
	2449	if (cur_object != object)
	2450	vm_object_unlock(cur_object);
	2451
	2452	vm_object_paging_begin(object);
	2453	vm_object_unlock(object);
	2454
	2455	/*
	2456	* Now zero fill page and map it.
	2457	* the page is probably going to
	2458	* be written soon, so don't bother
	2459	* to clear the modified bit
	2460	*
	2461	* NOTE: This code holds the map
	2462	* lock across the zero fill.
	2463	*/
	2464
	2465	if (!map->no_zero_fill) {
	2466	vm_page_zero_fill(m);
	2467	type_of_fault = DBG_ZERO_FILL_FAULT;
	2468	VM_STAT(zero_fill_count++);
	2469	}
	2470	vm_page_lock_queues();
	2471	VM_PAGE_QUEUES_REMOVE(m);
	2472
	2473	m->page_ticket = vm_page_ticket;
	2474	vm_page_ticket_roll++;
	2475	if(vm_page_ticket_roll ==
	2476	VM_PAGE_TICKETS_IN_ROLL) {
	2477	vm_page_ticket_roll = 0;
	2478	if(vm_page_ticket ==
	2479	VM_PAGE_TICKET_ROLL_IDS)
	2480	vm_page_ticket= 0;
	2481	else
	2482	vm_page_ticket++;
	2483	}
	2484
	2485	queue_enter(&vm_page_queue_inactive,
	2486	m, vm_page_t, pageq);
	2487	m->inactive = TRUE;
	2488	vm_page_inactive_count++;
	2489	vm_page_unlock_queues();
	2490	goto FastPmapEnter;
	2491	}
	2492
	2493	/*
	2494	* On to the next level
	2495	*/
	2496
	2497	cur_offset += cur_object->shadow_offset;
	2498	new_object = cur_object->shadow;
	2499	vm_object_lock(new_object);
	2500	if (cur_object != object)
	2501	vm_object_unlock(cur_object);
	2502	cur_object = new_object;
	2503
	2504	continue;
	2505	}
	2506	}
	2507
	2508	/*
	2509	* Cleanup from fast fault failure. Drop any object
	2510	* lock other than original and drop map lock.
	2511	*/
	2512
	2513	if (object != cur_object)
	2514	vm_object_unlock(cur_object);
	2515	}
	2516	vm_map_unlock_read(map);
	2517	if(pmap_map != map)
	2518	vm_map_unlock(pmap_map);
	2519
	2520	/*
	2521	* Make a reference to this object to
	2522	* prevent its disposal while we are messing with
	2523	* it. Once we have the reference, the map is free
	2524	* to be diddled. Since objects reference their
	2525	* shadows (and copies), they will stay around as well.
	2526	*/
	2527
	2528	assert(object->ref_count > 0);
	2529	object->ref_count++;
	2530	vm_object_res_reference(object);
	2531	vm_object_paging_begin(object);
	2532
	2533	XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
	2534	kr = vm_fault_page(object, offset, fault_type,
	2535	(change_wiring && !wired),
	2536	interruptible,
	2537	lo_offset, hi_offset, behavior,
	2538	&prot, &result_page, &top_page,
	2539	&type_of_fault,
	2540	&error_code, map->no_zero_fill, FALSE, map, vaddr);
	2541
	2542	/*
	2543	* If we didn't succeed, lose the object reference immediately.
	2544	*/
	2545
	2546	if (kr != VM_FAULT_SUCCESS)
	2547	vm_object_deallocate(object);
	2548
	2549	/*
	2550	* See why we failed, and take corrective action.
	2551	*/
	2552
	2553	switch (kr) {
	2554	case VM_FAULT_SUCCESS:
	2555	break;
	2556	case VM_FAULT_MEMORY_SHORTAGE:
	2557	if (vm_page_wait((change_wiring) ?
	2558	THREAD_UNINT :
	2559	THREAD_ABORTSAFE))
	2560	goto RetryFault;
	2561	/* fall thru */
	2562	case VM_FAULT_INTERRUPTED:
	2563	kr = KERN_ABORTED;
	2564	goto done;
	2565	case VM_FAULT_RETRY:
	2566	goto RetryFault;
	2567	case VM_FAULT_FICTITIOUS_SHORTAGE:
	2568	vm_page_more_fictitious();
	2569	goto RetryFault;
	2570	case VM_FAULT_MEMORY_ERROR:
	2571	if (error_code)
	2572	kr = error_code;
	2573	else
	2574	kr = KERN_MEMORY_ERROR;
	2575	goto done;
	2576	}
	2577
	2578	m = result_page;
	2579
	2580	if(m != VM_PAGE_NULL) {
	2581	assert((change_wiring && !wired) ?
	2582	(top_page == VM_PAGE_NULL) :
	2583	((top_page == VM_PAGE_NULL) == (m->object == object)));
	2584	}
	2585
	2586	/*
	2587	* How to clean up the result of vm_fault_page. This
	2588	* happens whether the mapping is entered or not.
	2589	*/
	2590
	2591	#define UNLOCK_AND_DEALLOCATE \
	2592	MACRO_BEGIN \
	2593	vm_fault_cleanup(m->object, top_page); \
	2594	vm_object_deallocate(object); \
	2595	MACRO_END
	2596
	2597	/*
	2598	* What to do with the resulting page from vm_fault_page
	2599	* if it doesn't get entered into the physical map:
	2600	*/
	2601
	2602	#define RELEASE_PAGE(m) \
	2603	MACRO_BEGIN \
	2604	PAGE_WAKEUP_DONE(m); \
	2605	vm_page_lock_queues(); \
	2606	if (!m->active && !m->inactive) \
	2607	vm_page_activate(m); \
	2608	vm_page_unlock_queues(); \
	2609	MACRO_END
	2610
	2611	/*
	2612	* We must verify that the maps have not changed
	2613	* since our last lookup.
	2614	*/
	2615
	2616	if(m != VM_PAGE_NULL) {
	2617	old_copy_object = m->object->copy;
	2618
	2619	vm_object_unlock(m->object);
	2620	} else {
	2621	old_copy_object = VM_OBJECT_NULL;
	2622	}
	2623	if ((map != original_map) \|\| !vm_map_verify(map, &version)) {
	2624	vm_object_t retry_object;
	2625	vm_object_offset_t retry_offset;
	2626	vm_prot_t retry_prot;
	2627
	2628	/*
	2629	* To avoid trying to write_lock the map while another
	2630	* thread has it read_locked (in vm_map_pageable), we
	2631	* do not try for write permission. If the page is
	2632	* still writable, we will get write permission. If it
	2633	* is not, or has been marked needs_copy, we enter the
	2634	* mapping without write permission, and will merely
	2635	* take another fault.
	2636	*/
	2637	map = original_map;
	2638	vm_map_lock_read(map);
	2639	kr = vm_map_lookup_locked(&map, vaddr,
	2640	fault_type & ~VM_PROT_WRITE, &version,
	2641	&retry_object, &retry_offset, &retry_prot,
	2642	&wired, &behavior, &lo_offset, &hi_offset,
	2643	&pmap_map);
	2644	pmap = pmap_map->pmap;
	2645
	2646	if (kr != KERN_SUCCESS) {
	2647	vm_map_unlock_read(map);
	2648	if(m != VM_PAGE_NULL) {
	2649	vm_object_lock(m->object);
	2650	RELEASE_PAGE(m);
	2651	UNLOCK_AND_DEALLOCATE;
	2652	} else {
	2653	vm_object_deallocate(object);
	2654	}
	2655	goto done;
	2656	}
	2657
	2658	vm_object_unlock(retry_object);
	2659	if(m != VM_PAGE_NULL) {
	2660	vm_object_lock(m->object);
	2661	} else {
	2662	vm_object_lock(object);
	2663	}
	2664
	2665	if ((retry_object != object) \|\|
	2666	(retry_offset != offset)) {
	2667	vm_map_unlock_read(map);
	2668	if(pmap_map != map)
	2669	vm_map_unlock(pmap_map);
	2670	if(m != VM_PAGE_NULL) {
	2671	RELEASE_PAGE(m);
	2672	UNLOCK_AND_DEALLOCATE;
	2673	} else {
	2674	vm_object_deallocate(object);
	2675	}
	2676	goto RetryFault;
	2677	}
	2678
	2679	/*
	2680	* Check whether the protection has changed or the object
	2681	* has been copied while we left the map unlocked.
	2682	*/
	2683	prot &= retry_prot;
	2684	if(m != VM_PAGE_NULL) {
	2685	vm_object_unlock(m->object);
	2686	} else {
	2687	vm_object_unlock(object);
	2688	}
	2689	}
	2690	if(m != VM_PAGE_NULL) {
	2691	vm_object_lock(m->object);
	2692	} else {
	2693	vm_object_lock(object);
	2694	}
	2695
	2696	/*
	2697	* If the copy object changed while the top-level object
	2698	* was unlocked, then we must take away write permission.
	2699	*/
	2700
	2701	if(m != VM_PAGE_NULL) {
	2702	if (m->object->copy != old_copy_object)
	2703	prot &= ~VM_PROT_WRITE;
	2704	}
	2705
	2706	/*
	2707	* If we want to wire down this page, but no longer have
	2708	* adequate permissions, we must start all over.
	2709	*/
	2710
	2711	if (wired && (fault_type != (prot\|VM_PROT_WRITE))) {
	2712	vm_map_verify_done(map, &version);
	2713	if(pmap_map != map)
	2714	vm_map_unlock(pmap_map);
	2715	if(m != VM_PAGE_NULL) {
	2716	RELEASE_PAGE(m);
	2717	UNLOCK_AND_DEALLOCATE;
	2718	} else {
	2719	vm_object_deallocate(object);
	2720	}
	2721	goto RetryFault;
	2722	}
	2723
	2724	/*
	2725	* Put this page into the physical map.
	2726	* We had to do the unlock above because pmap_enter
	2727	* may cause other faults. The page may be on
	2728	* the pageout queues. If the pageout daemon comes
	2729	* across the page, it will remove it from the queues.
	2730	*/
	2731	if (m != VM_PAGE_NULL) {
	2732	if (m->no_isync == TRUE) {
	2733	pmap_sync_caches_phys(m->phys_addr);
	2734
	2735	m->no_isync = FALSE;
	2736	}
	2737	vm_object_unlock(m->object);
	2738
	2739	PMAP_ENTER(pmap, vaddr, m, prot, wired);
	2740	{
	2741	tws_hash_line_t line;
	2742	task_t task;
	2743
	2744	task = current_task();
	2745	if((map != NULL) &&
	2746	(task->dynamic_working_set != 0)) {
	2747	if(tws_lookup
	2748	((tws_hash_t)
	2749	task->dynamic_working_set,
	2750	m->offset, m->object,
	2751	&line) != KERN_SUCCESS) {
	2752	tws_insert((tws_hash_t)
	2753	task->dynamic_working_set,
	2754	m->offset, m->object,
	2755	vaddr, pmap_map);
	2756	if(tws_insert((tws_hash_t)
	2757	task->dynamic_working_set,
	2758	m->offset, m->object,
	2759	vaddr, pmap_map)
	2760	== KERN_NO_SPACE) {
	2761	tws_expand_working_set(
	2762	task->dynamic_working_set,
	2763	TWS_HASH_LINE_COUNT);
	2764	}
	2765	}
	2766	}
	2767	}
	2768	} else {
	2769
	2770	/* if __ppc__ not working until figure out phys copy on block maps */
	2771	#ifdef notdefcdy
	2772	int memattr;
	2773	struct phys_entry *pp;
	2774	/*
	2775	* do a pmap block mapping from the physical address
	2776	* in the object
	2777	*/
	2778	if(pp = pmap_find_physentry(
	2779	(vm_offset_t)object->shadow_offset)) {
	2780	memattr = ((pp->pte1 & 0x00000078) >> 3);
	2781	} else {
	2782	memattr = PTE_WIMG_UNCACHED_COHERENT_GUARDED;
	2783	}
	2784
	2785	pmap_map_block(pmap, vaddr,
	2786	(vm_offset_t)object->shadow_offset,
	2787	object->size, prot,
	2788	memattr, 0); /* Set up a block mapped area */
	2789	//#else
	2790	vm_offset_t off;
	2791	for (off = 0; off < object->size; off += page_size) {
	2792	pmap_enter(pmap, vaddr + off,
	2793	object->shadow_offset + off, prot, TRUE);
	2794	/* Map it in */
	2795	}
	2796	#endif
	2797
	2798	}
	2799
	2800	/*
	2801	* If the page is not wired down and isn't already
	2802	* on a pageout queue, then put it where the
	2803	* pageout daemon can find it.
	2804	*/
	2805	if(m != VM_PAGE_NULL) {
	2806	vm_object_lock(m->object);
	2807	vm_page_lock_queues();
	2808
	2809	if (change_wiring) {
	2810	if (wired)
	2811	vm_page_wire(m);
	2812	else
	2813	vm_page_unwire(m);
	2814	}
	2815	#if VM_FAULT_STATIC_CONFIG
	2816	else {
	2817	if (!m->active && !m->inactive)
	2818	vm_page_activate(m);
	2819	m->reference = TRUE;
	2820	}
	2821	#else
	2822	else if (software_reference_bits) {
	2823	if (!m->active && !m->inactive)
	2824	vm_page_activate(m);
	2825	m->reference = TRUE;
	2826	} else {
	2827	vm_page_activate(m);
	2828	}
	2829	#endif
	2830	vm_page_unlock_queues();
	2831	}
	2832
	2833	/*
	2834	* Unlock everything, and return
	2835	*/
	2836
	2837	vm_map_verify_done(map, &version);
	2838	if(pmap_map != map)
	2839	vm_map_unlock(pmap_map);
	2840	if(m != VM_PAGE_NULL) {
	2841	PAGE_WAKEUP_DONE(m);
	2842	UNLOCK_AND_DEALLOCATE;
	2843	} else {
	2844	vm_fault_cleanup(object, top_page);
	2845	vm_object_deallocate(object);
	2846	}
	2847	kr = KERN_SUCCESS;
	2848
	2849	#undef UNLOCK_AND_DEALLOCATE
	2850	#undef RELEASE_PAGE
	2851
	2852	done:
	2853	if (funnel_set) {
	2854	thread_funnel_set( curflock, TRUE);
	2855	funnel_set = FALSE;
	2856	}
	2857	cur_thread->interruptible = interruptible_state;
	2858
	2859	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) \| DBG_FUNC_END,
	2860	vaddr,
	2861	type_of_fault,
	2862	kr,
	2863	0,
	2864	0);
	2865	return(kr);
	2866	}
	2867
	2868	/*
	2869	* vm_fault_wire:
	2870	*
	2871	* Wire down a range of virtual addresses in a map.
	2872	*/
	2873	kern_return_t
	2874	vm_fault_wire(
	2875	vm_map_t map,
	2876	vm_map_entry_t entry,
	2877	pmap_t pmap)
	2878	{
	2879
	2880	register vm_offset_t va;
	2881	register vm_offset_t end_addr = entry->vme_end;
	2882	register kern_return_t rc;
	2883
	2884	assert(entry->in_transition);
	2885
	2886	/*
	2887	* Inform the physical mapping system that the
	2888	* range of addresses may not fault, so that
	2889	* page tables and such can be locked down as well.
	2890	*/
	2891
	2892	pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
	2893
	2894	/*
	2895	* We simulate a fault to get the page and enter it
	2896	* in the physical map.
	2897	*/
	2898
	2899	for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
	2900	if ((rc = vm_fault_wire_fast(
	2901	map, va, entry, pmap)) != KERN_SUCCESS) {
	2902	rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
	2903	(pmap == kernel_pmap) ? THREAD_UNINT : THREAD_ABORTSAFE);
	2904	}
	2905
	2906	if (rc != KERN_SUCCESS) {
	2907	struct vm_map_entry tmp_entry = *entry;
	2908
	2909	/* unwire wired pages */
	2910	tmp_entry.vme_end = va;
	2911	vm_fault_unwire(map, &tmp_entry, FALSE, pmap);
	2912
	2913	return rc;
	2914	}
	2915	}
	2916	return KERN_SUCCESS;
	2917	}
	2918
	2919	/*
	2920	* vm_fault_unwire:
	2921	*
	2922	* Unwire a range of virtual addresses in a map.
	2923	*/
	2924	void
	2925	vm_fault_unwire(
	2926	vm_map_t map,
	2927	vm_map_entry_t entry,
	2928	boolean_t deallocate,
	2929	pmap_t pmap)
	2930	{
	2931	register vm_offset_t va;
	2932	register vm_offset_t end_addr = entry->vme_end;
	2933	vm_object_t object;
	2934
	2935	object = (entry->is_sub_map)
	2936	? VM_OBJECT_NULL : entry->object.vm_object;
	2937
	2938	/*
	2939	* Since the pages are wired down, we must be able to
	2940	* get their mappings from the physical map system.
	2941	*/
	2942
	2943	for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
	2944	pmap_change_wiring(pmap, va, FALSE);
	2945
	2946	if (object == VM_OBJECT_NULL) {
	2947	(void) vm_fault(map, va, VM_PROT_NONE, TRUE, THREAD_UNINT);
	2948	} else {
	2949	vm_prot_t prot;
	2950	vm_page_t result_page;
	2951	vm_page_t top_page;
	2952	vm_object_t result_object;
	2953	vm_fault_return_t result;
	2954
	2955	do {
	2956	prot = VM_PROT_NONE;
	2957
	2958	vm_object_lock(object);
	2959	vm_object_paging_begin(object);
	2960	XPR(XPR_VM_FAULT,
	2961	"vm_fault_unwire -> vm_fault_page\n",
	2962	0,0,0,0,0);
	2963	result = vm_fault_page(object,
	2964	entry->offset +
	2965	(va - entry->vme_start),
	2966	VM_PROT_NONE, TRUE,
	2967	THREAD_UNINT,
	2968	entry->offset,
	2969	entry->offset +
	2970	(entry->vme_end
	2971	- entry->vme_start),
	2972	entry->behavior,
	2973	&prot,
	2974	&result_page,
	2975	&top_page,
	2976	(int *)0,
	2977	0, map->no_zero_fill,
	2978	FALSE, NULL, 0);
	2979	} while (result == VM_FAULT_RETRY);
	2980
	2981	if (result != VM_FAULT_SUCCESS)
	2982	panic("vm_fault_unwire: failure");
	2983
	2984	result_object = result_page->object;
	2985	if (deallocate) {
	2986	assert(!result_page->fictitious);
	2987	pmap_page_protect(result_page->phys_addr,
	2988	VM_PROT_NONE);
	2989	VM_PAGE_FREE(result_page);
	2990	} else {
	2991	vm_page_lock_queues();
	2992	vm_page_unwire(result_page);
	2993	vm_page_unlock_queues();
	2994	PAGE_WAKEUP_DONE(result_page);
	2995	}
	2996
	2997	vm_fault_cleanup(result_object, top_page);
	2998	}
	2999	}
	3000
	3001	/*
	3002	* Inform the physical mapping system that the range
	3003	* of addresses may fault, so that page tables and
	3004	* such may be unwired themselves.
	3005	*/
	3006
	3007	pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
	3008
	3009	}
	3010
	3011	/*
	3012	* vm_fault_wire_fast:
	3013	*
	3014	* Handle common case of a wire down page fault at the given address.
	3015	* If successful, the page is inserted into the associated physical map.
	3016	* The map entry is passed in to avoid the overhead of a map lookup.
	3017	*
	3018	* NOTE: the given address should be truncated to the
	3019	* proper page address.
	3020	*
	3021	* KERN_SUCCESS is returned if the page fault is handled; otherwise,
	3022	* a standard error specifying why the fault is fatal is returned.
	3023	*
	3024	* The map in question must be referenced, and remains so.
	3025	* Caller has a read lock on the map.
	3026	*
	3027	* This is a stripped version of vm_fault() for wiring pages. Anything
	3028	* other than the common case will return KERN_FAILURE, and the caller
	3029	* is expected to call vm_fault().
	3030	*/
	3031	kern_return_t
	3032	vm_fault_wire_fast(
	3033	vm_map_t map,
	3034	vm_offset_t va,
	3035	vm_map_entry_t entry,
	3036	pmap_t pmap)
	3037	{
	3038	vm_object_t object;
	3039	vm_object_offset_t offset;
	3040	register vm_page_t m;
	3041	vm_prot_t prot;
	3042	thread_act_t thr_act;
	3043
	3044	VM_STAT(faults++);
	3045
	3046	if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
	3047	thr_act->task->faults++;
	3048
	3049	/*
	3050	* Recovery actions
	3051	*/
	3052
	3053	#undef RELEASE_PAGE
	3054	#define RELEASE_PAGE(m) { \
	3055	PAGE_WAKEUP_DONE(m); \
	3056	vm_page_lock_queues(); \
	3057	vm_page_unwire(m); \
	3058	vm_page_unlock_queues(); \
	3059	}
	3060
	3061
	3062	#undef UNLOCK_THINGS
	3063	#define UNLOCK_THINGS { \
	3064	object->paging_in_progress--; \
	3065	vm_object_unlock(object); \
	3066	}
	3067
	3068	#undef UNLOCK_AND_DEALLOCATE
	3069	#define UNLOCK_AND_DEALLOCATE { \
	3070	UNLOCK_THINGS; \
	3071	vm_object_deallocate(object); \
	3072	}
	3073	/*
	3074	* Give up and have caller do things the hard way.
	3075	*/
	3076
	3077	#define GIVE_UP { \
	3078	UNLOCK_AND_DEALLOCATE; \
	3079	return(KERN_FAILURE); \
	3080	}
	3081
	3082
	3083	/*
	3084	* If this entry is not directly to a vm_object, bail out.
	3085	*/
	3086	if (entry->is_sub_map)
	3087	return(KERN_FAILURE);
	3088
	3089	/*
	3090	* Find the backing store object and offset into it.
	3091	*/
	3092
	3093	object = entry->object.vm_object;
	3094	offset = (va - entry->vme_start) + entry->offset;
	3095	prot = entry->protection;
	3096
	3097	/*
	3098	* Make a reference to this object to prevent its
	3099	* disposal while we are messing with it.
	3100	*/
	3101
	3102	vm_object_lock(object);
	3103	assert(object->ref_count > 0);
	3104	object->ref_count++;
	3105	vm_object_res_reference(object);
	3106	object->paging_in_progress++;
	3107
	3108	/*
	3109	* INVARIANTS (through entire routine):
	3110	*
	3111	* 1) At all times, we must either have the object
	3112	* lock or a busy page in some object to prevent
	3113	* some other thread from trying to bring in
	3114	* the same page.
	3115	*
	3116	* 2) Once we have a busy page, we must remove it from
	3117	* the pageout queues, so that the pageout daemon
	3118	* will not grab it away.
	3119	*
	3120	*/
	3121
	3122	/*
	3123	* Look for page in top-level object. If it's not there or
	3124	* there's something going on, give up.
	3125	*/
	3126	m = vm_page_lookup(object, offset);
	3127	if ((m == VM_PAGE_NULL) \|\| (m->busy) \|\|
	3128	(m->unusual && ( m->error \|\| m->restart \|\| m->absent \|\|
	3129	prot & m->page_lock))) {
	3130
	3131	GIVE_UP;
	3132	}
	3133
	3134	/*
	3135	* Wire the page down now. All bail outs beyond this
	3136	* point must unwire the page.
	3137	*/
	3138
	3139	vm_page_lock_queues();
	3140	vm_page_wire(m);
	3141	vm_page_unlock_queues();
	3142
	3143	/*
	3144	* Mark page busy for other threads.
	3145	*/
	3146	assert(!m->busy);
	3147	m->busy = TRUE;
	3148	assert(!m->absent);
	3149
	3150	/*
	3151	* Give up if the page is being written and there's a copy object
	3152	*/
	3153	if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
	3154	RELEASE_PAGE(m);
	3155	GIVE_UP;
	3156	}
	3157
	3158	/*
	3159	* Put this page into the physical map.
	3160	* We have to unlock the object because pmap_enter
	3161	* may cause other faults.
	3162	*/
	3163	if (m->no_isync == TRUE) {
	3164	pmap_sync_caches_phys(m->phys_addr);
	3165
	3166	m->no_isync = FALSE;
	3167	}
	3168	vm_object_unlock(object);
	3169
	3170	PMAP_ENTER(pmap, va, m, prot, TRUE);
	3171
	3172	/*
	3173	* Must relock object so that paging_in_progress can be cleared.
	3174	*/
	3175	vm_object_lock(object);
	3176
	3177	/*
	3178	* Unlock everything, and return
	3179	*/
	3180
	3181	PAGE_WAKEUP_DONE(m);
	3182	UNLOCK_AND_DEALLOCATE;
	3183
	3184	return(KERN_SUCCESS);
	3185
	3186	}
	3187
	3188	/*
	3189	* Routine: vm_fault_copy_cleanup
	3190	* Purpose:
	3191	* Release a page used by vm_fault_copy.
	3192	*/
	3193
	3194	void
	3195	vm_fault_copy_cleanup(
	3196	vm_page_t page,
	3197	vm_page_t top_page)
	3198	{
	3199	vm_object_t object = page->object;
	3200
	3201	vm_object_lock(object);
	3202	PAGE_WAKEUP_DONE(page);
	3203	vm_page_lock_queues();
	3204	if (!page->active && !page->inactive)
	3205	vm_page_activate(page);
	3206	vm_page_unlock_queues();
	3207	vm_fault_cleanup(object, top_page);
	3208	}
	3209
	3210	void
	3211	vm_fault_copy_dst_cleanup(
	3212	vm_page_t page)
	3213	{
	3214	vm_object_t object;
	3215
	3216	if (page != VM_PAGE_NULL) {
	3217	object = page->object;
	3218	vm_object_lock(object);
	3219	vm_page_lock_queues();
	3220	vm_page_unwire(page);
	3221	vm_page_unlock_queues();
	3222	vm_object_paging_end(object);
	3223	vm_object_unlock(object);
	3224	}
	3225	}
	3226
	3227	/*
	3228	* Routine: vm_fault_copy
	3229	*
	3230	* Purpose:
	3231	* Copy pages from one virtual memory object to another --
	3232	* neither the source nor destination pages need be resident.
	3233	*
	3234	* Before actually copying a page, the version associated with
	3235	* the destination address map wil be verified.
	3236	*
	3237	* In/out conditions:
	3238	* The caller must hold a reference, but not a lock, to
	3239	* each of the source and destination objects and to the
	3240	* destination map.
	3241	*
	3242	* Results:
	3243	* Returns KERN_SUCCESS if no errors were encountered in
	3244	* reading or writing the data. Returns KERN_INTERRUPTED if
	3245	* the operation was interrupted (only possible if the
	3246	* "interruptible" argument is asserted). Other return values
	3247	* indicate a permanent error in copying the data.
	3248	*
	3249	* The actual amount of data copied will be returned in the
	3250	* "copy_size" argument. In the event that the destination map
	3251	* verification failed, this amount may be less than the amount
	3252	* requested.
	3253	*/
	3254	kern_return_t
	3255	vm_fault_copy(
	3256	vm_object_t src_object,
	3257	vm_object_offset_t src_offset,
	3258	vm_size_t src_size, / INOUT */
	3259	vm_object_t dst_object,
	3260	vm_object_offset_t dst_offset,
	3261	vm_map_t dst_map,
	3262	vm_map_version_t *dst_version,
	3263	int interruptible)
	3264	{
	3265	vm_page_t result_page;
	3266
	3267	vm_page_t src_page;
	3268	vm_page_t src_top_page;
	3269	vm_prot_t src_prot;
	3270
	3271	vm_page_t dst_page;
	3272	vm_page_t dst_top_page;
	3273	vm_prot_t dst_prot;
	3274
	3275	vm_size_t amount_left;
	3276	vm_object_t old_copy_object;
	3277	kern_return_t error = 0;
	3278
	3279	vm_size_t part_size;
	3280
	3281	/*
	3282	* In order not to confuse the clustered pageins, align
	3283	* the different offsets on a page boundary.
	3284	*/
	3285	vm_object_offset_t src_lo_offset = trunc_page_64(src_offset);
	3286	vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset);
	3287	vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size);
	3288	vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size);
	3289
	3290	#define RETURN(x) \
	3291	MACRO_BEGIN \
	3292	*src_size -= amount_left; \
	3293	MACRO_RETURN(x); \
	3294	MACRO_END
	3295
	3296	amount_left = *src_size;
	3297	do { /* while (amount_left > 0) */
	3298	/*
	3299	* There may be a deadlock if both source and destination
	3300	* pages are the same. To avoid this deadlock, the copy must
	3301	* start by getting the destination page in order to apply
	3302	* COW semantics if any.
	3303	*/
	3304
	3305	RetryDestinationFault: ;
	3306
	3307	dst_prot = VM_PROT_WRITE\|VM_PROT_READ;
	3308
	3309	vm_object_lock(dst_object);
	3310	vm_object_paging_begin(dst_object);
	3311
	3312	XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
	3313	switch (vm_fault_page(dst_object,
	3314	trunc_page_64(dst_offset),
	3315	VM_PROT_WRITE\|VM_PROT_READ,
	3316	FALSE,
	3317	interruptible,
	3318	dst_lo_offset,
	3319	dst_hi_offset,
	3320	VM_BEHAVIOR_SEQUENTIAL,
	3321	&dst_prot,
	3322	&dst_page,
	3323	&dst_top_page,
	3324	(int *)0,
	3325	&error,
	3326	dst_map->no_zero_fill,
	3327	FALSE, NULL, 0)) {
	3328	case VM_FAULT_SUCCESS:
	3329	break;
	3330	case VM_FAULT_RETRY:
	3331	goto RetryDestinationFault;
	3332	case VM_FAULT_MEMORY_SHORTAGE:
	3333	if (vm_page_wait(interruptible))
	3334	goto RetryDestinationFault;
	3335	/* fall thru */
	3336	case VM_FAULT_INTERRUPTED:
	3337	RETURN(MACH_SEND_INTERRUPTED);
	3338	case VM_FAULT_FICTITIOUS_SHORTAGE:
	3339	vm_page_more_fictitious();
	3340	goto RetryDestinationFault;
	3341	case VM_FAULT_MEMORY_ERROR:
	3342	if (error)
	3343	return (error);
	3344	else
	3345	return(KERN_MEMORY_ERROR);
	3346	}
	3347	assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
	3348
	3349	old_copy_object = dst_page->object->copy;
	3350
	3351	/*
	3352	* There exists the possiblity that the source and
	3353	* destination page are the same. But we can't
	3354	* easily determine that now. If they are the
	3355	* same, the call to vm_fault_page() for the
	3356	* destination page will deadlock. To prevent this we
	3357	* wire the page so we can drop busy without having
	3358	* the page daemon steal the page. We clean up the
	3359	* top page but keep the paging reference on the object
	3360	* holding the dest page so it doesn't go away.
	3361	*/
	3362
	3363	vm_page_lock_queues();
	3364	vm_page_wire(dst_page);
	3365	vm_page_unlock_queues();
	3366	PAGE_WAKEUP_DONE(dst_page);
	3367	vm_object_unlock(dst_page->object);
	3368
	3369	if (dst_top_page != VM_PAGE_NULL) {
	3370	vm_object_lock(dst_object);
	3371	VM_PAGE_FREE(dst_top_page);
	3372	vm_object_paging_end(dst_object);
	3373	vm_object_unlock(dst_object);
	3374	}
	3375
	3376	RetrySourceFault: ;
	3377
	3378	if (src_object == VM_OBJECT_NULL) {
	3379	/*
	3380	* No source object. We will just
	3381	* zero-fill the page in dst_object.
	3382	*/
	3383	src_page = VM_PAGE_NULL;
	3384	result_page = VM_PAGE_NULL;
	3385	} else {
	3386	vm_object_lock(src_object);
	3387	src_page = vm_page_lookup(src_object,
	3388	trunc_page_64(src_offset));
	3389	if (src_page == dst_page) {
	3390	src_prot = dst_prot;
	3391	result_page = VM_PAGE_NULL;
	3392	} else {
	3393	src_prot = VM_PROT_READ;
	3394	vm_object_paging_begin(src_object);
	3395
	3396	XPR(XPR_VM_FAULT,
	3397	"vm_fault_copy(2) -> vm_fault_page\n",
	3398	0,0,0,0,0);
	3399	switch (vm_fault_page(src_object,
	3400	trunc_page_64(src_offset),
	3401	VM_PROT_READ,
	3402	FALSE,
	3403	interruptible,
	3404	src_lo_offset,
	3405	src_hi_offset,
	3406	VM_BEHAVIOR_SEQUENTIAL,
	3407	&src_prot,
	3408	&result_page,
	3409	&src_top_page,
	3410	(int *)0,
	3411	&error,
	3412	FALSE,
	3413	FALSE, NULL, 0)) {
	3414
	3415	case VM_FAULT_SUCCESS:
	3416	break;
	3417	case VM_FAULT_RETRY:
	3418	goto RetrySourceFault;
	3419	case VM_FAULT_MEMORY_SHORTAGE:
	3420	if (vm_page_wait(interruptible))
	3421	goto RetrySourceFault;
	3422	/* fall thru */
	3423	case VM_FAULT_INTERRUPTED:
	3424	vm_fault_copy_dst_cleanup(dst_page);
	3425	RETURN(MACH_SEND_INTERRUPTED);
	3426	case VM_FAULT_FICTITIOUS_SHORTAGE:
	3427	vm_page_more_fictitious();
	3428	goto RetrySourceFault;
	3429	case VM_FAULT_MEMORY_ERROR:
	3430	vm_fault_copy_dst_cleanup(dst_page);
	3431	if (error)
	3432	return (error);
	3433	else
	3434	return(KERN_MEMORY_ERROR);
	3435	}
	3436
	3437
	3438	assert((src_top_page == VM_PAGE_NULL) ==
	3439	(result_page->object == src_object));
	3440	}
	3441	assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
	3442	vm_object_unlock(result_page->object);
	3443	}
	3444
	3445	if (!vm_map_verify(dst_map, dst_version)) {
	3446	if (result_page != VM_PAGE_NULL && src_page != dst_page)
	3447	vm_fault_copy_cleanup(result_page, src_top_page);
	3448	vm_fault_copy_dst_cleanup(dst_page);
	3449	break;
	3450	}
	3451
	3452	vm_object_lock(dst_page->object);
	3453
	3454	if (dst_page->object->copy != old_copy_object) {
	3455	vm_object_unlock(dst_page->object);
	3456	vm_map_verify_done(dst_map, dst_version);
	3457	if (result_page != VM_PAGE_NULL && src_page != dst_page)
	3458	vm_fault_copy_cleanup(result_page, src_top_page);
	3459	vm_fault_copy_dst_cleanup(dst_page);
	3460	break;
	3461	}
	3462	vm_object_unlock(dst_page->object);
	3463
	3464	/*
	3465	* Copy the page, and note that it is dirty
	3466	* immediately.
	3467	*/
	3468
	3469	if (!page_aligned(src_offset) \|\|
	3470	!page_aligned(dst_offset) \|\|
	3471	!page_aligned(amount_left)) {
	3472
	3473	vm_object_offset_t src_po,
	3474	dst_po;
	3475
	3476	src_po = src_offset - trunc_page_64(src_offset);
	3477	dst_po = dst_offset - trunc_page_64(dst_offset);
	3478
	3479	if (dst_po > src_po) {
	3480	part_size = PAGE_SIZE - dst_po;
	3481	} else {
	3482	part_size = PAGE_SIZE - src_po;
	3483	}
	3484	if (part_size > (amount_left)){
	3485	part_size = amount_left;
	3486	}
	3487
	3488	if (result_page == VM_PAGE_NULL) {
	3489	vm_page_part_zero_fill(dst_page,
	3490	dst_po, part_size);
	3491	} else {
	3492	vm_page_part_copy(result_page, src_po,
	3493	dst_page, dst_po, part_size);
	3494	if(!dst_page->dirty){
	3495	vm_object_lock(dst_object);
	3496	dst_page->dirty = TRUE;
	3497	vm_object_unlock(dst_page->object);
	3498	}
	3499
	3500	}
	3501	} else {
	3502	part_size = PAGE_SIZE;
	3503
	3504	if (result_page == VM_PAGE_NULL)
	3505	vm_page_zero_fill(dst_page);
	3506	else{
	3507	vm_page_copy(result_page, dst_page);
	3508	if(!dst_page->dirty){
	3509	vm_object_lock(dst_object);
	3510	dst_page->dirty = TRUE;
	3511	vm_object_unlock(dst_page->object);
	3512	}
	3513	}
	3514
	3515	}
	3516
	3517	/*
	3518	* Unlock everything, and return
	3519	*/
	3520
	3521	vm_map_verify_done(dst_map, dst_version);
	3522
	3523	if (result_page != VM_PAGE_NULL && src_page != dst_page)
	3524	vm_fault_copy_cleanup(result_page, src_top_page);
	3525	vm_fault_copy_dst_cleanup(dst_page);
	3526
	3527	amount_left -= part_size;
	3528	src_offset += part_size;
	3529	dst_offset += part_size;
	3530	} while (amount_left > 0);
	3531
	3532	RETURN(KERN_SUCCESS);
	3533	#undef RETURN
	3534
	3535	/NOTREACHED/
	3536	}
	3537
	3538	#ifdef notdef
	3539
	3540	/*
	3541	* Routine: vm_fault_page_overwrite
	3542	*
	3543	* Description:
	3544	* A form of vm_fault_page that assumes that the
	3545	* resulting page will be overwritten in its entirety,
	3546	* making it unnecessary to obtain the correct contents
	3547	* of the page.
	3548	*
	3549	* Implementation:
	3550	* XXX Untested. Also unused. Eventually, this technology
	3551	* could be used in vm_fault_copy() to advantage.
	3552	*/
	3553	vm_fault_return_t
	3554	vm_fault_page_overwrite(
	3555	register
	3556	vm_object_t dst_object,
	3557	vm_object_offset_t dst_offset,
	3558	vm_page_t result_page) / OUT */
	3559	{
	3560	register
	3561	vm_page_t dst_page;
	3562	kern_return_t wait_result;
	3563
	3564	#define interruptible THREAD_UNINT /* XXX */
	3565
	3566	while (TRUE) {
	3567	/*
	3568	* Look for a page at this offset
	3569	*/
	3570
	3571	while ((dst_page = vm_page_lookup(dst_object, dst_offset))
	3572	== VM_PAGE_NULL) {
	3573	/*
	3574	* No page, no problem... just allocate one.
	3575	*/
	3576
	3577	dst_page = vm_page_alloc(dst_object, dst_offset);
	3578	if (dst_page == VM_PAGE_NULL) {
	3579	vm_object_unlock(dst_object);
	3580	VM_PAGE_WAIT();
	3581	vm_object_lock(dst_object);
	3582	continue;
	3583	}
	3584
	3585	/*
	3586	* Pretend that the memory manager
	3587	* write-protected the page.
	3588	*
	3589	* Note that we will be asking for write
	3590	* permission without asking for the data
	3591	* first.
	3592	*/
	3593
	3594	dst_page->overwriting = TRUE;
	3595	dst_page->page_lock = VM_PROT_WRITE;
	3596	dst_page->absent = TRUE;
	3597	dst_page->unusual = TRUE;
	3598	dst_object->absent_count++;
	3599
	3600	break;
	3601
	3602	/*
	3603	* When we bail out, we might have to throw
	3604	* away the page created here.
	3605	*/
	3606
	3607	#define DISCARD_PAGE \
	3608	MACRO_BEGIN \
	3609	vm_object_lock(dst_object); \
	3610	dst_page = vm_page_lookup(dst_object, dst_offset); \
	3611	if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
	3612	VM_PAGE_FREE(dst_page); \
	3613	vm_object_unlock(dst_object); \
	3614	MACRO_END
	3615	}
	3616
	3617	/*
	3618	* If the page is write-protected...
	3619	*/
	3620
	3621	if (dst_page->page_lock & VM_PROT_WRITE) {
	3622	/*
	3623	* ... and an unlock request hasn't been sent
	3624	*/
	3625
	3626	if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
	3627	vm_prot_t u;
	3628	kern_return_t rc;
	3629
	3630	/*
	3631	* ... then send one now.
	3632	*/
	3633
	3634	if (!dst_object->pager_ready) {
	3635	vm_object_assert_wait(dst_object,
	3636	VM_OBJECT_EVENT_PAGER_READY,
	3637	interruptible);
	3638	vm_object_unlock(dst_object);
	3639	wait_result = thread_block((void (*)(void))0);
	3640	if (wait_result != THREAD_AWAKENED) {
	3641	DISCARD_PAGE;
	3642	return(VM_FAULT_INTERRUPTED);
	3643	}
	3644	continue;
	3645	}
	3646
	3647	u = dst_page->unlock_request \|= VM_PROT_WRITE;
	3648	vm_object_unlock(dst_object);
	3649
	3650	if ((rc = memory_object_data_unlock(
	3651	dst_object->pager,
	3652	dst_offset + dst_object->paging_offset,
	3653	PAGE_SIZE,
	3654	u)) != KERN_SUCCESS) {
	3655	if (vm_fault_debug)
	3656	printf("vm_object_overwrite: memory_object_data_unlock failed\n");
	3657	DISCARD_PAGE;
	3658	return((rc == MACH_SEND_INTERRUPTED) ?
	3659	VM_FAULT_INTERRUPTED :
	3660	VM_FAULT_MEMORY_ERROR);
	3661	}
	3662	vm_object_lock(dst_object);
	3663	continue;
	3664	}
	3665
	3666	/* ... fall through to wait below */
	3667	} else {
	3668	/*
	3669	* If the page isn't being used for other
	3670	* purposes, then we're done.
	3671	*/
	3672	if ( ! (dst_page->busy \|\| dst_page->absent \|\|
	3673	dst_page->error \|\| dst_page->restart) )
	3674	break;
	3675	}
	3676
	3677	PAGE_ASSERT_WAIT(dst_page, interruptible);
	3678	vm_object_unlock(dst_object);
	3679	wait_result = thread_block((void (*)(void))0);
	3680	if (wait_result != THREAD_AWAKENED) {
	3681	DISCARD_PAGE;
	3682	return(VM_FAULT_INTERRUPTED);
	3683	}
	3684	}
	3685
	3686	*result_page = dst_page;
	3687	return(VM_FAULT_SUCCESS);
	3688
	3689	#undef interruptible
	3690	#undef DISCARD_PAGE
	3691	}
	3692
	3693	#endif /* notdef */
	3694
	3695	#if VM_FAULT_CLASSIFY
	3696	/*
	3697	* Temporary statistics gathering support.
	3698	*/
	3699
	3700	/*
	3701	* Statistics arrays:
	3702	*/
	3703	#define VM_FAULT_TYPES_MAX 5
	3704	#define VM_FAULT_LEVEL_MAX 8
	3705
	3706	int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
	3707
	3708	#define VM_FAULT_TYPE_ZERO_FILL 0
	3709	#define VM_FAULT_TYPE_MAP_IN 1
	3710	#define VM_FAULT_TYPE_PAGER 2
	3711	#define VM_FAULT_TYPE_COPY 3
	3712	#define VM_FAULT_TYPE_OTHER 4
	3713
	3714
	3715	void
	3716	vm_fault_classify(vm_object_t object,
	3717	vm_object_offset_t offset,
	3718	vm_prot_t fault_type)
	3719	{
	3720	int type, level = 0;
	3721	vm_page_t m;
	3722
	3723	while (TRUE) {
	3724	m = vm_page_lookup(object, offset);
	3725	if (m != VM_PAGE_NULL) {
	3726	if (m->busy \|\| m->error \|\| m->restart \|\| m->absent \|\|
	3727	fault_type & m->page_lock) {
	3728	type = VM_FAULT_TYPE_OTHER;
	3729	break;
	3730	}
	3731	if (((fault_type & VM_PROT_WRITE) == 0) \|\|
	3732	((level == 0) && object->copy == VM_OBJECT_NULL)) {
	3733	type = VM_FAULT_TYPE_MAP_IN;
	3734	break;
	3735	}
	3736	type = VM_FAULT_TYPE_COPY;
	3737	break;
	3738	}
	3739	else {
	3740	if (object->pager_created) {
	3741	type = VM_FAULT_TYPE_PAGER;
	3742	break;
	3743	}
	3744	if (object->shadow == VM_OBJECT_NULL) {
	3745	type = VM_FAULT_TYPE_ZERO_FILL;
	3746	break;
	3747	}
	3748
	3749	offset += object->shadow_offset;
	3750	object = object->shadow;
	3751	level++;
	3752	continue;
	3753	}
	3754	}
	3755
	3756	if (level > VM_FAULT_LEVEL_MAX)
	3757	level = VM_FAULT_LEVEL_MAX;
	3758
	3759	vm_fault_stats[type][level] += 1;
	3760
	3761	return;
	3762	}
	3763
	3764	/* cleanup routine to call from debugger */
	3765
	3766	void
	3767	vm_fault_classify_init(void)
	3768	{
	3769	int type, level;
	3770
	3771	for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
	3772	for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
	3773	vm_fault_stats[type][level] = 0;
	3774	}
	3775	}
	3776
	3777	return;
	3778	}
	3779	#endif /* VM_FAULT_CLASSIFY */