git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/default_pager/dp_memory

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_OSREFERENCE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the
	10	* License may not be used to create, or enable the creation or
	11	* redistribution of, unlawful or unlicensed copies of an Apple operating
	12	* system, or to circumvent, violate, or enable the circumvention or
	13	* violation of, any terms of an Apple operating system software license
	14	* agreement.
	15	*
	16	* Please obtain a copy of the License at
	17	* http://www.opensource.apple.com/apsl/ and read it before using this
	18	* file.
	19	*
	20	* The Original Code and all software distributed under the License are
	21	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	22	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	23	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	24	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	25	* Please see the License for the specific language governing rights and
	26	* limitations under the License.
	27	*
	28	* @APPLE_LICENSE_OSREFERENCE_HEADER_END@
	29	*/
	30	/*
	31	* @OSF_COPYRIGHT@
	32	*/
	33	/*
	34	* Mach Operating System
	35	* Copyright (c) 1991,1990,1989 Carnegie Mellon University
	36	* All Rights Reserved.
	37	*
	38	* Permission to use, copy, modify and distribute this software and its
	39	* documentation is hereby granted, provided that both the copyright
	40	* notice and this permission notice appear in all copies of the
	41	* software, derivative works or modified versions, and any portions
	42	* thereof, and that both notices appear in supporting documentation.
	43	*
	44	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	45	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	46	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	47	*
	48	* Carnegie Mellon requests users of this software to return to
	49	*
	50	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	51	* School of Computer Science
	52	* Carnegie Mellon University
	53	* Pittsburgh PA 15213-3890
	54	*
	55	* any improvements or extensions that they make and grant Carnegie Mellon
	56	* the rights to redistribute these changes.
	57	*/
	58
	59	/*
	60	* Default Pager.
	61	* Memory Object Management.
	62	*/
	63
	64	#include "default_pager_internal.h"
	65	#include <default_pager/default_pager_object_server.h>
	66	#include <mach/memory_object_default_server.h>
	67	#include <mach/memory_object_control.h>
	68	#include <mach/memory_object_types.h>
	69	#include <mach/memory_object_server.h>
	70	#include <mach/upl.h>
	71	#include <mach/vm_map.h>
	72	#include <vm/memory_object.h>
	73	#include <vm/vm_pageout.h>
	74	#include <vm/vm_map.h>
	75	#include <vm/vm_protos.h>
	76
	77	/* forward declaration */
	78	vstruct_t vs_object_create(vm_size_t size);
	79
	80	/*
	81	* List of all vstructs. A specific vstruct is
	82	* found directly via its port, this list is
	83	* only used for monitoring purposes by the
	84	* default_pager_object* calls and by ps_delete
	85	* when abstract memory objects must be scanned
	86	* to remove any live storage on a segment which
	87	* is to be removed.
	88	*/
	89	struct vstruct_list_head vstruct_list;
	90
	91	__private_extern__ void
	92	vstruct_list_insert(
	93	vstruct_t vs)
	94	{
	95	VSL_LOCK();
	96	queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
	97	vstruct_list.vsl_count++;
	98	VSL_UNLOCK();
	99	}
	100
	101
	102	__private_extern__ void
	103	vstruct_list_delete(
	104	vstruct_t vs)
	105	{
	106	queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
	107	vstruct_list.vsl_count--;
	108	}
	109
	110	/*
	111	* We use the sequence numbers on requests to regulate
	112	* our parallelism. In general, we allow multiple reads and writes
	113	* to proceed in parallel, with the exception that reads must
	114	* wait for previous writes to finish. (Because the kernel might
	115	* generate a data-request for a page on the heels of a data-write
	116	* for the same page, and we must avoid returning stale data.)
	117	* terminate requests wait for proceeding reads and writes to finish.
	118	*/
	119
	120	static unsigned int default_pager_total = 0; /* debugging */
	121	static unsigned int default_pager_wait_seqno = 0; /* debugging */
	122	static unsigned int default_pager_wait_read = 0; /* debugging */
	123	static unsigned int default_pager_wait_write = 0; /* debugging */
	124
	125	__private_extern__ void
	126	vs_async_wait(
	127	vstruct_t vs)
	128	{
	129
	130	ASSERT(vs->vs_async_pending >= 0);
	131	while (vs->vs_async_pending > 0) {
	132	vs->vs_waiting_async = TRUE;
	133	assert_wait(&vs->vs_async_pending, THREAD_UNINT);
	134	VS_UNLOCK(vs);
	135	thread_block(THREAD_CONTINUE_NULL);
	136	VS_LOCK(vs);
	137	}
	138	ASSERT(vs->vs_async_pending == 0);
	139	}
	140
	141
	142	#if PARALLEL
	143	/*
	144	* Waits for correct sequence number. Leaves pager locked.
	145	*
	146	* JMM - Sequence numbers guarantee ordering of requests generated
	147	* by a single thread if the receiver is multithreaded and
	148	* the interfaces are asynchronous (i.e. sender can generate
	149	* more than one request before the first is received in the
	150	* pager). Normally, IPC would generate these number in that
	151	* case. But we are trying to avoid using IPC for the in-kernel
	152	* scenario. Since these are actually invoked synchronously
	153	* anyway (in-kernel), we can just fake the sequence number
	154	* generation here (thus avoiding the dependence on IPC).
	155	*/
	156	__private_extern__ void
	157	vs_lock(
	158	vstruct_t vs)
	159	{
	160	mach_port_seqno_t seqno;
	161
	162	default_pager_total++;
	163	VS_LOCK(vs);
	164
	165	seqno = vs->vs_next_seqno++;
	166
	167	while (vs->vs_seqno != seqno) {
	168	default_pager_wait_seqno++;
	169	vs->vs_waiting_seqno = TRUE;
	170	assert_wait(&vs->vs_seqno, THREAD_UNINT);
	171	VS_UNLOCK(vs);
	172	thread_block(THREAD_CONTINUE_NULL);
	173	VS_LOCK(vs);
	174	}
	175	}
	176
	177	/*
	178	* Increments sequence number and unlocks pager.
	179	*/
	180	__private_extern__ void
	181	vs_unlock(vstruct_t vs)
	182	{
	183	vs->vs_seqno++;
	184	if (vs->vs_waiting_seqno) {
	185	vs->vs_waiting_seqno = FALSE;
	186	VS_UNLOCK(vs);
	187	thread_wakeup(&vs->vs_seqno);
	188	return;
	189	}
	190	VS_UNLOCK(vs);
	191	}
	192
	193	/*
	194	* Start a read - one more reader. Pager must be locked.
	195	*/
	196	__private_extern__ void
	197	vs_start_read(
	198	vstruct_t vs)
	199	{
	200	vs->vs_readers++;
	201	}
	202
	203	/*
	204	* Wait for readers. Unlocks and relocks pager if wait needed.
	205	*/
	206	__private_extern__ void
	207	vs_wait_for_readers(
	208	vstruct_t vs)
	209	{
	210	while (vs->vs_readers != 0) {
	211	default_pager_wait_read++;
	212	vs->vs_waiting_read = TRUE;
	213	assert_wait(&vs->vs_readers, THREAD_UNINT);
	214	VS_UNLOCK(vs);
	215	thread_block(THREAD_CONTINUE_NULL);
	216	VS_LOCK(vs);
	217	}
	218	}
	219
	220	/*
	221	* Finish a read. Pager is unlocked and returns unlocked.
	222	*/
	223	__private_extern__ void
	224	vs_finish_read(
	225	vstruct_t vs)
	226	{
	227	VS_LOCK(vs);
	228	if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
	229	vs->vs_waiting_read = FALSE;
	230	VS_UNLOCK(vs);
	231	thread_wakeup(&vs->vs_readers);
	232	return;
	233	}
	234	VS_UNLOCK(vs);
	235	}
	236
	237	/*
	238	* Start a write - one more writer. Pager must be locked.
	239	*/
	240	__private_extern__ void
	241	vs_start_write(
	242	vstruct_t vs)
	243	{
	244	vs->vs_writers++;
	245	}
	246
	247	/*
	248	* Wait for writers. Unlocks and relocks pager if wait needed.
	249	*/
	250	__private_extern__ void
	251	vs_wait_for_writers(
	252	vstruct_t vs)
	253	{
	254	while (vs->vs_writers != 0) {
	255	default_pager_wait_write++;
	256	vs->vs_waiting_write = TRUE;
	257	assert_wait(&vs->vs_writers, THREAD_UNINT);
	258	VS_UNLOCK(vs);
	259	thread_block(THREAD_CONTINUE_NULL);
	260	VS_LOCK(vs);
	261	}
	262	vs_async_wait(vs);
	263	}
	264
	265	/* This is to be used for the transfer from segment code ONLY */
	266	/* The transfer code holds off vs destruction by keeping the */
	267	/* vs_async_wait count non-zero. It will not ocnflict with */
	268	/* other writers on an async basis because it only writes on */
	269	/* a cluster basis into fresh (as of sync time) cluster locations */
	270
	271	__private_extern__ void
	272	vs_wait_for_sync_writers(
	273	vstruct_t vs)
	274	{
	275	while (vs->vs_writers != 0) {
	276	default_pager_wait_write++;
	277	vs->vs_waiting_write = TRUE;
	278	assert_wait(&vs->vs_writers, THREAD_UNINT);
	279	VS_UNLOCK(vs);
	280	thread_block(THREAD_CONTINUE_NULL);
	281	VS_LOCK(vs);
	282	}
	283	}
	284
	285
	286	/*
	287	* Finish a write. Pager is unlocked and returns unlocked.
	288	*/
	289	__private_extern__ void
	290	vs_finish_write(
	291	vstruct_t vs)
	292	{
	293	VS_LOCK(vs);
	294	if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
	295	vs->vs_waiting_write = FALSE;
	296	VS_UNLOCK(vs);
	297	thread_wakeup(&vs->vs_writers);
	298	return;
	299	}
	300	VS_UNLOCK(vs);
	301	}
	302	#endif /* PARALLEL */
	303
	304	vstruct_t
	305	vs_object_create(
	306	vm_size_t size)
	307	{
	308	vstruct_t vs;
	309
	310	/*
	311	* Allocate a vstruct. If there are any problems, then report them
	312	* to the console.
	313	*/
	314	vs = ps_vstruct_create(size);
	315	if (vs == VSTRUCT_NULL) {
	316	dprintf(("vs_object_create: unable to allocate %s\n",
	317	"-- either run swapon command or reboot"));
	318	return VSTRUCT_NULL;
	319	}
	320
	321	return vs;
	322	}
	323
	324	#if 0
	325	void default_pager_add(vstruct_t, boolean_t); /* forward */
	326
	327	void
	328	default_pager_add(
	329	vstruct_t vs,
	330	boolean_t internal)
	331	{
	332	memory_object_t mem_obj = vs->vs_mem_obj;
	333	mach_port_t pset;
	334	mach_port_mscount_t sync;
	335	mach_port_t previous;
	336	kern_return_t kr;
	337	static char here[] = "default_pager_add";
	338
	339	/*
	340	* The port currently has a make-send count of zero,
	341	* because either we just created the port or we just
	342	* received the port in a memory_object_create request.
	343	*/
	344
	345	if (internal) {
	346	/* possibly generate an immediate no-senders notification */
	347	sync = 0;
	348	pset = default_pager_internal_set;
	349	} else {
	350	/* delay notification till send right is created */
	351	sync = 1;
	352	pset = default_pager_external_set;
	353	}
	354
	355	ipc_port_make_sonce(mem_obj);
	356	ip_lock(mem_obj); /* unlocked in nsrequest below */
	357	ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
	358	}
	359
	360	#endif
	361
	362	kern_return_t
	363	dp_memory_object_init(
	364	memory_object_t mem_obj,
	365	memory_object_control_t control,
	366	__unused vm_size_t pager_page_size)
	367	{
	368	vstruct_t vs;
	369
	370	assert(pager_page_size == vm_page_size);
	371
	372	memory_object_control_reference(control);
	373
	374	vs_lookup(mem_obj, vs);
	375	vs_lock(vs);
	376
	377	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
	378	Panic("bad request");
	379
	380	vs->vs_control = control;
	381	vs_unlock(vs);
	382
	383	return KERN_SUCCESS;
	384	}
	385
	386	kern_return_t
	387	dp_memory_object_synchronize(
	388	memory_object_t mem_obj,
	389	memory_object_offset_t offset,
	390	vm_size_t length,
	391	__unused vm_sync_t flags)
	392	{
	393	vstruct_t vs;
	394
	395	vs_lookup(mem_obj, vs);
	396	vs_lock(vs);
	397	vs_unlock(vs);
	398
	399	memory_object_synchronize_completed(vs->vs_control, offset, length);
	400
	401	return KERN_SUCCESS;
	402	}
	403
	404	kern_return_t
	405	dp_memory_object_unmap(
	406	__unused memory_object_t mem_obj)
	407	{
	408	panic("dp_memory_object_unmap");
	409
	410	return KERN_FAILURE;
	411	}
	412
	413	kern_return_t
	414	dp_memory_object_terminate(
	415	memory_object_t mem_obj)
	416	{
	417	memory_object_control_t control;
	418	vstruct_t vs;
	419
	420	/*
	421	* control port is a receive right, not a send right.
	422	*/
	423
	424	vs_lookup(mem_obj, vs);
	425	vs_lock(vs);
	426
	427	/*
	428	* Wait for read and write requests to terminate.
	429	*/
	430
	431	vs_wait_for_readers(vs);
	432	vs_wait_for_writers(vs);
	433
	434	/*
	435	* After memory_object_terminate both memory_object_init
	436	* and a no-senders notification are possible, so we need
	437	* to clean up our reference to the memory_object_control
	438	* to prepare for a new init.
	439	*/
	440
	441	control = vs->vs_control;
	442	vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
	443
	444	/* a bit of special case ugliness here. Wakeup any waiting reads */
	445	/* these data requests had to be removed from the seqno traffic */
	446	/* based on a performance bottleneck with large memory objects */
	447	/* the problem will right itself with the new component based */
	448	/* synchronous interface. The new async will be able to return */
	449	/* failure during its sync phase. In the mean time ... */
	450
	451	thread_wakeup(&vs->vs_writers);
	452	thread_wakeup(&vs->vs_async_pending);
	453
	454	vs_unlock(vs);
	455
	456	/*
	457	* Now we deallocate our reference on the control.
	458	*/
	459	memory_object_control_deallocate(control);
	460	return KERN_SUCCESS;
	461	}
	462
	463	void
	464	dp_memory_object_reference(
	465	memory_object_t mem_obj)
	466	{
	467	vstruct_t vs;
	468
	469	vs_lookup_safe(mem_obj, vs);
	470	if (vs == VSTRUCT_NULL)
	471	return;
	472
	473	VS_LOCK(vs);
	474	assert(vs->vs_references > 0);
	475	vs->vs_references++;
	476	VS_UNLOCK(vs);
	477	}
	478
	479	void
	480	dp_memory_object_deallocate(
	481	memory_object_t mem_obj)
	482	{
	483	vstruct_t vs;
	484	mach_port_seqno_t seqno;
	485
	486	/*
	487	* Because we don't give out multiple first references
	488	* for a memory object, there can't be a race
	489	* between getting a deallocate call and creating
	490	* a new reference for the object.
	491	*/
	492
	493	vs_lookup_safe(mem_obj, vs);
	494	if (vs == VSTRUCT_NULL)
	495	return;
	496
	497	VS_LOCK(vs);
	498	if (--vs->vs_references > 0) {
	499	VS_UNLOCK(vs);
	500	return;
	501	}
	502
	503	seqno = vs->vs_next_seqno++;
	504	while (vs->vs_seqno != seqno) {
	505	default_pager_wait_seqno++;
	506	vs->vs_waiting_seqno = TRUE;
	507	assert_wait(&vs->vs_seqno, THREAD_UNINT);
	508	VS_UNLOCK(vs);
	509	thread_block(THREAD_CONTINUE_NULL);
	510	VS_LOCK(vs);
	511	}
	512
	513	vs_async_wait(vs); /* wait for pending async IO */
	514
	515	/* do not delete the vs structure until the referencing pointers */
	516	/* in the vstruct list have been expunged */
	517
	518	/* get VSL_LOCK out of order by using TRY mechanism */
	519	while(!VSL_LOCK_TRY()) {
	520	VS_UNLOCK(vs);
	521	VSL_LOCK();
	522	VSL_UNLOCK();
	523	VS_LOCK(vs);
	524	vs_async_wait(vs); /* wait for pending async IO */
	525	}
	526
	527
	528	/*
	529	* We shouldn't get a deallocation call
	530	* when the kernel has the object cached.
	531	*/
	532	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
	533	Panic("bad request");
	534
	535	/*
	536	* Unlock the pager (though there should be no one
	537	* waiting for it).
	538	*/
	539	VS_UNLOCK(vs);
	540
	541	/* Lock out paging segment removal for the duration of this */
	542	/* call. We are vulnerable to losing a paging segment we rely */
	543	/* on as soon as we remove ourselves from the VSL and unlock */
	544
	545	/* Keep our thread from blocking on attempt to trigger backing */
	546	/* store release */
	547	backing_store_release_trigger_disable += 1;
	548
	549	/*
	550	* Remove the memory object port association, and then
	551	* the destroy the port itself. We must remove the object
	552	* from the port list before deallocating the pager,
	553	* because of default_pager_objects.
	554	*/
	555	vstruct_list_delete(vs);
	556	VSL_UNLOCK();
	557
	558	ps_vstruct_dealloc(vs);
	559
	560	VSL_LOCK();
	561	backing_store_release_trigger_disable -= 1;
	562	if(backing_store_release_trigger_disable == 0) {
	563	thread_wakeup((event_t)&backing_store_release_trigger_disable);
	564	}
	565	VSL_UNLOCK();
	566	}
	567
	568	kern_return_t
	569	dp_memory_object_data_request(
	570	memory_object_t mem_obj,
	571	memory_object_offset_t offset,
	572	vm_size_t length,
	573	__unused vm_prot_t protection_required)
	574	{
	575	vstruct_t vs;
	576
	577	GSTAT(global_stats.gs_pagein_calls++);
	578
	579
	580	/* CDY at this moment vs_lookup panics when presented with the wrong */
	581	/* port. As we are expanding this pager to support user interfaces */
	582	/* this should be changed to return kern_failure */
	583	vs_lookup(mem_obj, vs);
	584	vs_lock(vs);
	585
	586	/* We are going to relax the strict sequencing here for performance */
	587	/* reasons. We can do this because we know that the read and */
	588	/* write threads are different and we rely on synchronization */
	589	/* of read and write requests at the cache memory_object level */
	590	/* break out wait_for_writers, all of this goes away when */
	591	/* we get real control of seqno with the new component interface */
	592
	593	if (vs->vs_writers != 0) {
	594	/* you can't hold on to the seqno and go */
	595	/* to sleep like that */
	596	vs_unlock(vs); /* bump internal count of seqno */
	597	VS_LOCK(vs);
	598	while (vs->vs_writers != 0) {
	599	default_pager_wait_write++;
	600	vs->vs_waiting_write = TRUE;
	601	assert_wait(&vs->vs_writers, THREAD_UNINT);
	602	VS_UNLOCK(vs);
	603	thread_block(THREAD_CONTINUE_NULL);
	604	VS_LOCK(vs);
	605	vs_async_wait(vs);
	606	}
	607	if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
	608	VS_UNLOCK(vs);
	609	return KERN_FAILURE;
	610	}
	611	vs_start_read(vs);
	612	VS_UNLOCK(vs);
	613	} else {
	614	vs_start_read(vs);
	615	vs_unlock(vs);
	616	}
	617
	618	/*
	619	* Request must be on a page boundary and a multiple of pages.
	620	*/
	621	if ((offset & vm_page_mask) != 0 \|\| (length & vm_page_mask) != 0)
	622	Panic("bad alignment");
	623
	624	pvs_cluster_read(vs, (vm_offset_t)offset, length);
	625
	626	vs_finish_read(vs);
	627
	628	return KERN_SUCCESS;
	629	}
	630
	631	/*
	632	* memory_object_data_initialize: check whether we already have each page, and
	633	* write it if we do not. The implementation is far from optimized, and
	634	* also assumes that the default_pager is single-threaded.
	635	*/
	636	/* It is questionable whether or not a pager should decide what is relevant */
	637	/* and what is not in data sent from the kernel. Data initialize has been */
	638	/* changed to copy back all data sent to it in preparation for its eventual */
	639	/* merge with data return. It is the kernel that should decide what pages */
	640	/* to write back. As of the writing of this note, this is indeed the case */
	641	/* the kernel writes back one page at a time through this interface */
	642
	643	kern_return_t
	644	dp_memory_object_data_initialize(
	645	memory_object_t mem_obj,
	646	memory_object_offset_t offset,
	647	vm_size_t size)
	648	{
	649	vstruct_t vs;
	650
	651	DP_DEBUG(DEBUG_MO_EXTERNAL,
	652	("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
	653	(int)mem_obj, (int)offset, (int)size));
	654	GSTAT(global_stats.gs_pages_init += atop_32(size));
	655
	656	vs_lookup(mem_obj, vs);
	657	vs_lock(vs);
	658	vs_start_write(vs);
	659	vs_unlock(vs);
	660
	661	/*
	662	* Write the data via clustered writes. vs_cluster_write will
	663	* loop if the address range specified crosses cluster
	664	* boundaries.
	665	*/
	666	vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
	667
	668	vs_finish_write(vs);
	669
	670	return KERN_SUCCESS;
	671	}
	672
	673	kern_return_t
	674	dp_memory_object_data_unlock(
	675	__unused memory_object_t mem_obj,
	676	__unused memory_object_offset_t offset,
	677	__unused vm_size_t size,
	678	__unused vm_prot_t desired_access)
	679	{
	680	Panic("dp_memory_object_data_unlock: illegal");
	681	return KERN_FAILURE;
	682	}
	683
	684
	685	/ARGSUSED8/
	686	kern_return_t
	687	dp_memory_object_data_return(
	688	memory_object_t mem_obj,
	689	memory_object_offset_t offset,
	690	vm_size_t size,
	691	__unused memory_object_offset_t *resid_offset,
	692	__unused int *io_error,
	693	__unused boolean_t dirty,
	694	__unused boolean_t kernel_copy,
	695	__unused int upl_flags)
	696	{
	697	vstruct_t vs;
	698
	699	DP_DEBUG(DEBUG_MO_EXTERNAL,
	700	("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
	701	(int)mem_obj, (int)offset, (int)size));
	702	GSTAT(global_stats.gs_pageout_calls++);
	703
	704	/* This routine is called by the pageout thread. The pageout thread */
	705	/* cannot be blocked by read activities unless the read activities */
	706	/* Therefore the grant of vs lock must be done on a try versus a */
	707	/* blocking basis. The code below relies on the fact that the */
	708	/* interface is synchronous. Should this interface be again async */
	709	/* for some type of pager in the future the pages will have to be */
	710	/* returned through a separate, asynchronous path. */
	711
	712	vs_lookup(mem_obj, vs);
	713
	714	default_pager_total++;
	715	if(!VS_TRY_LOCK(vs)) {
	716	/* the call below will not be done by caller when we have */
	717	/* a synchronous interface */
	718	/* return KERN_LOCK_OWNED; */
	719	upl_t upl;
	720	int page_list_count = 0;
	721	memory_object_super_upl_request(vs->vs_control,
	722	(memory_object_offset_t)offset,
	723	size, size,
	724	&upl, NULL, &page_list_count,
	725	UPL_NOBLOCK \| UPL_CLEAN_IN_PLACE
	726	\| UPL_NO_SYNC \| UPL_COPYOUT_FROM);
	727	upl_abort(upl,0);
	728	upl_deallocate(upl);
	729	return KERN_SUCCESS;
	730	}
	731
	732	if ((vs->vs_seqno != vs->vs_next_seqno++)
	733	\|\| (vs->vs_readers)
	734	\|\| (vs->vs_xfer_pending)) {
	735	upl_t upl;
	736	int page_list_count = 0;
	737
	738	vs->vs_next_seqno--;
	739	VS_UNLOCK(vs);
	740
	741	/* the call below will not be done by caller when we have */
	742	/* a synchronous interface */
	743	/* return KERN_LOCK_OWNED; */
	744	memory_object_super_upl_request(vs->vs_control,
	745	(memory_object_offset_t)offset,
	746	size, size,
	747	&upl, NULL, &page_list_count,
	748	UPL_NOBLOCK \| UPL_CLEAN_IN_PLACE
	749	\| UPL_NO_SYNC \| UPL_COPYOUT_FROM);
	750	upl_abort(upl,0);
	751	upl_deallocate(upl);
	752	return KERN_SUCCESS;
	753	}
	754
	755	if ((size % vm_page_size) != 0)
	756	Panic("bad alignment");
	757
	758	vs_start_write(vs);
	759
	760
	761	vs->vs_async_pending += 1; /* protect from backing store contraction */
	762	vs_unlock(vs);
	763
	764	/*
	765	* Write the data via clustered writes. vs_cluster_write will
	766	* loop if the address range specified crosses cluster
	767	* boundaries.
	768	*/
	769	vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
	770
	771	vs_finish_write(vs);
	772
	773	/* temporary, need a finer lock based on cluster */
	774
	775	VS_LOCK(vs);
	776	vs->vs_async_pending -= 1; /* release vs_async_wait */
	777	if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
	778	vs->vs_waiting_async = FALSE;
	779	VS_UNLOCK(vs);
	780	thread_wakeup(&vs->vs_async_pending);
	781	} else {
	782	VS_UNLOCK(vs);
	783	}
	784
	785
	786	return KERN_SUCCESS;
	787	}
	788
	789	/*
	790	* Routine: default_pager_memory_object_create
	791	* Purpose:
	792	* Handle requests for memory objects from the
	793	* kernel.
	794	* Notes:
	795	* Because we only give out the default memory
	796	* manager port to the kernel, we don't have to
	797	* be so paranoid about the contents.
	798	*/
	799	kern_return_t
	800	default_pager_memory_object_create(
	801	__unused memory_object_default_t dmm,
	802	vm_size_t new_size,
	803	memory_object_t *new_mem_obj)
	804	{
	805	vstruct_t vs;
	806
	807	assert(dmm == default_pager_object);
	808
	809	vs = vs_object_create(new_size);
	810	if (vs == VSTRUCT_NULL)
	811	return KERN_RESOURCE_SHORTAGE;
	812
	813	vs->vs_next_seqno = 0;
	814
	815	/*
	816	* Set up associations between this memory object
	817	* and this default_pager structure
	818	*/
	819
	820	vs->vs_mem_obj = ISVS;
	821	vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
	822
	823	/*
	824	* After this, other threads might receive requests
	825	* for this memory object or find it in the port list.
	826	*/
	827
	828	vstruct_list_insert(vs);
	829	*new_mem_obj = vs_to_mem_obj(vs);
	830	return KERN_SUCCESS;
	831	}
	832
	833	/*
	834	* Create an external object.
	835	*/
	836	kern_return_t
	837	default_pager_object_create(
	838	default_pager_t default_pager,
	839	vm_size_t size,
	840	memory_object_t *mem_objp)
	841	{
	842	vstruct_t vs;
	843
	844	if (default_pager != default_pager_object)
	845	return KERN_INVALID_ARGUMENT;
	846
	847	vs = vs_object_create(size);
	848	if (vs == VSTRUCT_NULL)
	849	return KERN_RESOURCE_SHORTAGE;
	850
	851	/*
	852	* Set up associations between the default pager
	853	* and this vstruct structure
	854	*/
	855	vs->vs_mem_obj = ISVS;
	856	vstruct_list_insert(vs);
	857	*mem_objp = vs_to_mem_obj(vs);
	858	return KERN_SUCCESS;
	859	}
	860
	861	kern_return_t
	862	default_pager_objects(
	863	default_pager_t default_pager,
	864	default_pager_object_array_t *objectsp,
	865	mach_msg_type_number_t *ocountp,
	866	mach_port_array_t *portsp,
	867	mach_msg_type_number_t *pcountp)
	868	{
	869	vm_offset_t oaddr = 0; /* memory for objects */
	870	vm_size_t osize = 0; /* current size */
	871	default_pager_object_t * objects;
	872	unsigned int opotential = 0;
	873
	874	vm_map_copy_t pcopy = 0; /* copy handle for pagers */
	875	vm_size_t psize = 0; /* current size */
	876	memory_object_t * pagers;
	877	unsigned int ppotential = 0;
	878
	879	unsigned int actual;
	880	unsigned int num_objects;
	881	kern_return_t kr;
	882	vstruct_t entry;
	883
	884	if (default_pager != default_pager_object)
	885	return KERN_INVALID_ARGUMENT;
	886
	887	/*
	888	* We will send no more than this many
	889	*/
	890	actual = vstruct_list.vsl_count;
	891
	892	/*
	893	* Out out-of-line port arrays are simply kalloc'ed.
	894	*/
	895	psize = round_page(actual * sizeof * pagers);
	896	ppotential = psize / sizeof * pagers;
	897	pagers = (memory_object_t *)kalloc(psize);
	898	if (0 == pagers)
	899	return KERN_RESOURCE_SHORTAGE;
	900
	901	/*
	902	* returned out of line data must be allocated out
	903	* the ipc_kernel_map, wired down, filled in, and
	904	* then "copied in" as if it had been sent by a
	905	* user process.
	906	*/
	907	osize = round_page(actual * sizeof * objects);
	908	opotential = osize / sizeof * objects;
	909	kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
	910	if (KERN_SUCCESS != kr) {
	911	kfree(pagers, psize);
	912	return KERN_RESOURCE_SHORTAGE;
	913	}
	914	objects = (default_pager_object_t *)oaddr;
	915
	916
	917	/*
	918	* Now scan the list.
	919	*/
	920
	921	VSL_LOCK();
	922
	923	num_objects = 0;
	924	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
	925
	926	memory_object_t pager;
	927	vm_size_t size;
	928
	929	if ((num_objects >= opotential) \|\|
	930	(num_objects >= ppotential)) {
	931
	932	/*
	933	* This should be rare. In any case,
	934	* we will only miss recent objects,
	935	* because they are added at the end.
	936	*/
	937	break;
	938	}
	939
	940	/*
	941	* Avoid interfering with normal operations
	942	*/
	943	if (!VS_MAP_TRY_LOCK(entry))
	944	goto not_this_one;
	945	size = ps_vstruct_allocated_size(entry);
	946	VS_MAP_UNLOCK(entry);
	947
	948	VS_LOCK(entry);
	949
	950	/*
	951	* We need a reference for our caller. Adding this
	952	* reference through the linked list could race with
	953	* destruction of the object. If we find the object
	954	* has no references, just give up on it.
	955	*/
	956	VS_LOCK(entry);
	957	if (entry->vs_references == 0) {
	958	VS_UNLOCK(entry);
	959	goto not_this_one;
	960	}
	961	pager = vs_to_mem_obj(entry);
	962	dp_memory_object_reference(pager);
	963	VS_UNLOCK(entry);
	964
	965	/* the arrays are wired, so no deadlock worries */
	966
	967	objects[num_objects].dpo_object = (vm_offset_t) entry;
	968	objects[num_objects].dpo_size = size;
	969	pagers [num_objects++] = pager;
	970	continue;
	971
	972	not_this_one:
	973	/*
	974	* Do not return garbage
	975	*/
	976	objects[num_objects].dpo_object = (vm_offset_t) 0;
	977	objects[num_objects].dpo_size = 0;
	978	pagers[num_objects++] = MEMORY_OBJECT_NULL;
	979
	980	}
	981
	982	VSL_UNLOCK();
	983
	984	/* clear out any excess allocation */
	985	while (num_objects < opotential) {
	986	objects[--opotential].dpo_object = (vm_offset_t) 0;
	987	objects[opotential].dpo_size = 0;
	988	}
	989	while (num_objects < ppotential) {
	990	pagers[--ppotential] = MEMORY_OBJECT_NULL;
	991	}
	992
	993	kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
	994	vm_map_round_page(oaddr + osize), FALSE);
	995	assert(KERN_SUCCESS == kr);
	996	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
	997	(vm_map_size_t)osize, TRUE, &pcopy);
	998	assert(KERN_SUCCESS == kr);
	999
	1000	*objectsp = (default_pager_object_array_t)objects;
	1001	*ocountp = num_objects;
	1002	*portsp = (mach_port_array_t)pcopy;
	1003	*pcountp = num_objects;
	1004
	1005	return KERN_SUCCESS;
	1006	}
	1007
	1008	kern_return_t
	1009	default_pager_object_pages(
	1010	default_pager_t default_pager,
	1011	mach_port_t memory_object,
	1012	default_pager_page_array_t *pagesp,
	1013	mach_msg_type_number_t *countp)
	1014	{
	1015	vm_offset_t addr = 0; /* memory for page offsets */
	1016	vm_size_t size = 0; /* current memory size */
	1017	vm_map_copy_t copy;
	1018	default_pager_page_t * pages = 0;
	1019	unsigned int potential;
	1020	unsigned int actual;
	1021	kern_return_t kr;
	1022	memory_object_t object;
	1023
	1024	if (default_pager != default_pager_object)
	1025	return KERN_INVALID_ARGUMENT;
	1026
	1027	object = (memory_object_t) memory_object;
	1028
	1029	potential = 0;
	1030	for (;;) {
	1031	vstruct_t entry;
	1032
	1033	VSL_LOCK();
	1034	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
	1035	vs_links) {
	1036	VS_LOCK(entry);
	1037	if (vs_to_mem_obj(entry) == object) {
	1038	VSL_UNLOCK();
	1039	goto found_object;
	1040	}
	1041	VS_UNLOCK(entry);
	1042	}
	1043	VSL_UNLOCK();
	1044
	1045	/* did not find the object */
	1046	if (0 != addr)
	1047	kmem_free(ipc_kernel_map, addr, size);
	1048
	1049	return KERN_INVALID_ARGUMENT;
	1050
	1051	found_object:
	1052
	1053	if (!VS_MAP_TRY_LOCK(entry)) {
	1054	/* oh well bad luck */
	1055	int wresult;
	1056
	1057	VS_UNLOCK(entry);
	1058
	1059	assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
	1060	wresult = thread_block(THREAD_CONTINUE_NULL);
	1061	assert(wresult == THREAD_TIMED_OUT);
	1062	continue;
	1063	}
	1064
	1065	actual = ps_vstruct_allocated_pages(entry, pages, potential);
	1066	VS_MAP_UNLOCK(entry);
	1067	VS_UNLOCK(entry);
	1068
	1069	if (actual <= potential)
	1070	break;
	1071
	1072	/* allocate more memory */
	1073	if (0 != addr)
	1074	kmem_free(ipc_kernel_map, addr, size);
	1075
	1076	size = round_page(actual * sizeof * pages);
	1077	kr = kmem_alloc(ipc_kernel_map, &addr, size);
	1078	if (KERN_SUCCESS != kr)
	1079	return KERN_RESOURCE_SHORTAGE;
	1080
	1081	pages = (default_pager_page_t *)addr;
	1082	potential = size / sizeof * pages;
	1083	}
	1084
	1085	/*
	1086	* Clear unused memory.
	1087	*/
	1088	while (actual < potential)
	1089	pages[--potential].dpp_offset = 0;
	1090
	1091	kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
	1092	vm_map_round_page(addr + size), FALSE);
	1093	assert(KERN_SUCCESS == kr);
	1094	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
	1095	(vm_map_size_t)size, TRUE, &copy);
	1096	assert(KERN_SUCCESS == kr);
	1097
	1098
	1099	*pagesp = (default_pager_page_array_t)copy;
	1100	*countp = actual;
	1101	return KERN_SUCCESS;
	1102	}