git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/default_pager/dp_memory

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. Please obtain a copy of the License at
	10	* http://www.opensource.apple.com/apsl/ and read it before using this
	11	* file.
	12	*
	13	* The Original Code and all software distributed under the License are
	14	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	15	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	16	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	17	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	18	* Please see the License for the specific language governing rights and
	19	* limitations under the License.
	20	*
	21	* @APPLE_LICENSE_HEADER_END@
	22	*/
	23	/*
	24	* @OSF_COPYRIGHT@
	25	*/
	26	/*
	27	* Mach Operating System
	28	* Copyright (c) 1991,1990,1989 Carnegie Mellon University
	29	* All Rights Reserved.
	30	*
	31	* Permission to use, copy, modify and distribute this software and its
	32	* documentation is hereby granted, provided that both the copyright
	33	* notice and this permission notice appear in all copies of the
	34	* software, derivative works or modified versions, and any portions
	35	* thereof, and that both notices appear in supporting documentation.
	36	*
	37	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	38	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	39	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	40	*
	41	* Carnegie Mellon requests users of this software to return to
	42	*
	43	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	44	* School of Computer Science
	45	* Carnegie Mellon University
	46	* Pittsburgh PA 15213-3890
	47	*
	48	* any improvements or extensions that they make and grant Carnegie Mellon
	49	* the rights to redistribute these changes.
	50	*/
	51
	52	/*
	53	* Default Pager.
	54	* Memory Object Management.
	55	*/
	56
	57	#include "default_pager_internal.h"
	58	#include <default_pager/default_pager_object_server.h>
	59	#include <mach/memory_object_default_server.h>
	60	#include <mach/memory_object_control.h>
	61	#include <mach/memory_object_types.h>
	62	#include <mach/memory_object_server.h>
	63	#include <mach/upl.h>
	64	#include <mach/vm_map.h>
	65	#include <vm/memory_object.h>
	66	#include <vm/vm_pageout.h>
	67	#include <vm/vm_map.h>
	68	#include <vm/vm_protos.h>
	69
	70	/* forward declaration */
	71	vstruct_t vs_object_create(vm_size_t size);
	72
	73	/*
	74	* List of all vstructs. A specific vstruct is
	75	* found directly via its port, this list is
	76	* only used for monitoring purposes by the
	77	* default_pager_object* calls and by ps_delete
	78	* when abstract memory objects must be scanned
	79	* to remove any live storage on a segment which
	80	* is to be removed.
	81	*/
	82	struct vstruct_list_head vstruct_list;
	83
	84	__private_extern__ void
	85	vstruct_list_insert(
	86	vstruct_t vs)
	87	{
	88	VSL_LOCK();
	89	queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
	90	vstruct_list.vsl_count++;
	91	VSL_UNLOCK();
	92	}
	93
	94
	95	__private_extern__ void
	96	vstruct_list_delete(
	97	vstruct_t vs)
	98	{
	99	queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
	100	vstruct_list.vsl_count--;
	101	}
	102
	103	/*
	104	* We use the sequence numbers on requests to regulate
	105	* our parallelism. In general, we allow multiple reads and writes
	106	* to proceed in parallel, with the exception that reads must
	107	* wait for previous writes to finish. (Because the kernel might
	108	* generate a data-request for a page on the heels of a data-write
	109	* for the same page, and we must avoid returning stale data.)
	110	* terminate requests wait for proceeding reads and writes to finish.
	111	*/
	112
	113	static unsigned int default_pager_total = 0; /* debugging */
	114	static unsigned int default_pager_wait_seqno = 0; /* debugging */
	115	static unsigned int default_pager_wait_read = 0; /* debugging */
	116	static unsigned int default_pager_wait_write = 0; /* debugging */
	117
	118	__private_extern__ void
	119	vs_async_wait(
	120	vstruct_t vs)
	121	{
	122
	123	ASSERT(vs->vs_async_pending >= 0);
	124	while (vs->vs_async_pending > 0) {
	125	vs->vs_waiting_async = TRUE;
	126	assert_wait(&vs->vs_async_pending, THREAD_UNINT);
	127	VS_UNLOCK(vs);
	128	thread_block(THREAD_CONTINUE_NULL);
	129	VS_LOCK(vs);
	130	}
	131	ASSERT(vs->vs_async_pending == 0);
	132	}
	133
	134
	135	#if PARALLEL
	136	/*
	137	* Waits for correct sequence number. Leaves pager locked.
	138	*
	139	* JMM - Sequence numbers guarantee ordering of requests generated
	140	* by a single thread if the receiver is multithreaded and
	141	* the interfaces are asynchronous (i.e. sender can generate
	142	* more than one request before the first is received in the
	143	* pager). Normally, IPC would generate these number in that
	144	* case. But we are trying to avoid using IPC for the in-kernel
	145	* scenario. Since these are actually invoked synchronously
	146	* anyway (in-kernel), we can just fake the sequence number
	147	* generation here (thus avoiding the dependence on IPC).
	148	*/
	149	__private_extern__ void
	150	vs_lock(
	151	vstruct_t vs)
	152	{
	153	mach_port_seqno_t seqno;
	154
	155	default_pager_total++;
	156	VS_LOCK(vs);
	157
	158	seqno = vs->vs_next_seqno++;
	159
	160	while (vs->vs_seqno != seqno) {
	161	default_pager_wait_seqno++;
	162	vs->vs_waiting_seqno = TRUE;
	163	assert_wait(&vs->vs_seqno, THREAD_UNINT);
	164	VS_UNLOCK(vs);
	165	thread_block(THREAD_CONTINUE_NULL);
	166	VS_LOCK(vs);
	167	}
	168	}
	169
	170	/*
	171	* Increments sequence number and unlocks pager.
	172	*/
	173	__private_extern__ void
	174	vs_unlock(vstruct_t vs)
	175	{
	176	vs->vs_seqno++;
	177	if (vs->vs_waiting_seqno) {
	178	vs->vs_waiting_seqno = FALSE;
	179	VS_UNLOCK(vs);
	180	thread_wakeup(&vs->vs_seqno);
	181	return;
	182	}
	183	VS_UNLOCK(vs);
	184	}
	185
	186	/*
	187	* Start a read - one more reader. Pager must be locked.
	188	*/
	189	__private_extern__ void
	190	vs_start_read(
	191	vstruct_t vs)
	192	{
	193	vs->vs_readers++;
	194	}
	195
	196	/*
	197	* Wait for readers. Unlocks and relocks pager if wait needed.
	198	*/
	199	__private_extern__ void
	200	vs_wait_for_readers(
	201	vstruct_t vs)
	202	{
	203	while (vs->vs_readers != 0) {
	204	default_pager_wait_read++;
	205	vs->vs_waiting_read = TRUE;
	206	assert_wait(&vs->vs_readers, THREAD_UNINT);
	207	VS_UNLOCK(vs);
	208	thread_block(THREAD_CONTINUE_NULL);
	209	VS_LOCK(vs);
	210	}
	211	}
	212
	213	/*
	214	* Finish a read. Pager is unlocked and returns unlocked.
	215	*/
	216	__private_extern__ void
	217	vs_finish_read(
	218	vstruct_t vs)
	219	{
	220	VS_LOCK(vs);
	221	if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
	222	vs->vs_waiting_read = FALSE;
	223	VS_UNLOCK(vs);
	224	thread_wakeup(&vs->vs_readers);
	225	return;
	226	}
	227	VS_UNLOCK(vs);
	228	}
	229
	230	/*
	231	* Start a write - one more writer. Pager must be locked.
	232	*/
	233	__private_extern__ void
	234	vs_start_write(
	235	vstruct_t vs)
	236	{
	237	vs->vs_writers++;
	238	}
	239
	240	/*
	241	* Wait for writers. Unlocks and relocks pager if wait needed.
	242	*/
	243	__private_extern__ void
	244	vs_wait_for_writers(
	245	vstruct_t vs)
	246	{
	247	while (vs->vs_writers != 0) {
	248	default_pager_wait_write++;
	249	vs->vs_waiting_write = TRUE;
	250	assert_wait(&vs->vs_writers, THREAD_UNINT);
	251	VS_UNLOCK(vs);
	252	thread_block(THREAD_CONTINUE_NULL);
	253	VS_LOCK(vs);
	254	}
	255	vs_async_wait(vs);
	256	}
	257
	258	/* This is to be used for the transfer from segment code ONLY */
	259	/* The transfer code holds off vs destruction by keeping the */
	260	/* vs_async_wait count non-zero. It will not ocnflict with */
	261	/* other writers on an async basis because it only writes on */
	262	/* a cluster basis into fresh (as of sync time) cluster locations */
	263
	264	__private_extern__ void
	265	vs_wait_for_sync_writers(
	266	vstruct_t vs)
	267	{
	268	while (vs->vs_writers != 0) {
	269	default_pager_wait_write++;
	270	vs->vs_waiting_write = TRUE;
	271	assert_wait(&vs->vs_writers, THREAD_UNINT);
	272	VS_UNLOCK(vs);
	273	thread_block(THREAD_CONTINUE_NULL);
	274	VS_LOCK(vs);
	275	}
	276	}
	277
	278
	279	/*
	280	* Finish a write. Pager is unlocked and returns unlocked.
	281	*/
	282	__private_extern__ void
	283	vs_finish_write(
	284	vstruct_t vs)
	285	{
	286	VS_LOCK(vs);
	287	if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
	288	vs->vs_waiting_write = FALSE;
	289	VS_UNLOCK(vs);
	290	thread_wakeup(&vs->vs_writers);
	291	return;
	292	}
	293	VS_UNLOCK(vs);
	294	}
	295	#endif /* PARALLEL */
	296
	297	vstruct_t
	298	vs_object_create(
	299	vm_size_t size)
	300	{
	301	vstruct_t vs;
	302
	303	/*
	304	* Allocate a vstruct. If there are any problems, then report them
	305	* to the console.
	306	*/
	307	vs = ps_vstruct_create(size);
	308	if (vs == VSTRUCT_NULL) {
	309	dprintf(("vs_object_create: unable to allocate %s\n",
	310	"-- either run swapon command or reboot"));
	311	return VSTRUCT_NULL;
	312	}
	313
	314	return vs;
	315	}
	316
	317	#if 0
	318	void default_pager_add(vstruct_t, boolean_t); /* forward */
	319
	320	void
	321	default_pager_add(
	322	vstruct_t vs,
	323	boolean_t internal)
	324	{
	325	memory_object_t mem_obj = vs->vs_mem_obj;
	326	mach_port_t pset;
	327	mach_port_mscount_t sync;
	328	mach_port_t previous;
	329	kern_return_t kr;
	330	static char here[] = "default_pager_add";
	331
	332	/*
	333	* The port currently has a make-send count of zero,
	334	* because either we just created the port or we just
	335	* received the port in a memory_object_create request.
	336	*/
	337
	338	if (internal) {
	339	/* possibly generate an immediate no-senders notification */
	340	sync = 0;
	341	pset = default_pager_internal_set;
	342	} else {
	343	/* delay notification till send right is created */
	344	sync = 1;
	345	pset = default_pager_external_set;
	346	}
	347
	348	ipc_port_make_sonce(mem_obj);
	349	ip_lock(mem_obj); /* unlocked in nsrequest below */
	350	ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
	351	}
	352
	353	#endif
	354
	355	kern_return_t
	356	dp_memory_object_init(
	357	memory_object_t mem_obj,
	358	memory_object_control_t control,
	359	__unused vm_size_t pager_page_size)
	360	{
	361	vstruct_t vs;
	362
	363	assert(pager_page_size == vm_page_size);
	364
	365	memory_object_control_reference(control);
	366
	367	vs_lookup(mem_obj, vs);
	368	vs_lock(vs);
	369
	370	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
	371	Panic("bad request");
	372
	373	vs->vs_control = control;
	374	vs_unlock(vs);
	375
	376	return KERN_SUCCESS;
	377	}
	378
	379	kern_return_t
	380	dp_memory_object_synchronize(
	381	memory_object_t mem_obj,
	382	memory_object_offset_t offset,
	383	vm_size_t length,
	384	__unused vm_sync_t flags)
	385	{
	386	vstruct_t vs;
	387
	388	vs_lookup(mem_obj, vs);
	389	vs_lock(vs);
	390	vs_unlock(vs);
	391
	392	memory_object_synchronize_completed(vs->vs_control, offset, length);
	393
	394	return KERN_SUCCESS;
	395	}
	396
	397	kern_return_t
	398	dp_memory_object_unmap(
	399	__unused memory_object_t mem_obj)
	400	{
	401	panic("dp_memory_object_unmap");
	402
	403	return KERN_FAILURE;
	404	}
	405
	406	kern_return_t
	407	dp_memory_object_terminate(
	408	memory_object_t mem_obj)
	409	{
	410	memory_object_control_t control;
	411	vstruct_t vs;
	412
	413	/*
	414	* control port is a receive right, not a send right.
	415	*/
	416
	417	vs_lookup(mem_obj, vs);
	418	vs_lock(vs);
	419
	420	/*
	421	* Wait for read and write requests to terminate.
	422	*/
	423
	424	vs_wait_for_readers(vs);
	425	vs_wait_for_writers(vs);
	426
	427	/*
	428	* After memory_object_terminate both memory_object_init
	429	* and a no-senders notification are possible, so we need
	430	* to clean up our reference to the memory_object_control
	431	* to prepare for a new init.
	432	*/
	433
	434	control = vs->vs_control;
	435	vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
	436
	437	/* a bit of special case ugliness here. Wakeup any waiting reads */
	438	/* these data requests had to be removed from the seqno traffic */
	439	/* based on a performance bottleneck with large memory objects */
	440	/* the problem will right itself with the new component based */
	441	/* synchronous interface. The new async will be able to return */
	442	/* failure during its sync phase. In the mean time ... */
	443
	444	thread_wakeup(&vs->vs_writers);
	445	thread_wakeup(&vs->vs_async_pending);
	446
	447	vs_unlock(vs);
	448
	449	/*
	450	* Now we deallocate our reference on the control.
	451	*/
	452	memory_object_control_deallocate(control);
	453	return KERN_SUCCESS;
	454	}
	455
	456	void
	457	dp_memory_object_reference(
	458	memory_object_t mem_obj)
	459	{
	460	vstruct_t vs;
	461
	462	vs_lookup_safe(mem_obj, vs);
	463	if (vs == VSTRUCT_NULL)
	464	return;
	465
	466	VS_LOCK(vs);
	467	assert(vs->vs_references > 0);
	468	vs->vs_references++;
	469	VS_UNLOCK(vs);
	470	}
	471
	472	void
	473	dp_memory_object_deallocate(
	474	memory_object_t mem_obj)
	475	{
	476	vstruct_t vs;
	477	mach_port_seqno_t seqno;
	478
	479	/*
	480	* Because we don't give out multiple first references
	481	* for a memory object, there can't be a race
	482	* between getting a deallocate call and creating
	483	* a new reference for the object.
	484	*/
	485
	486	vs_lookup_safe(mem_obj, vs);
	487	if (vs == VSTRUCT_NULL)
	488	return;
	489
	490	VS_LOCK(vs);
	491	if (--vs->vs_references > 0) {
	492	VS_UNLOCK(vs);
	493	return;
	494	}
	495
	496	seqno = vs->vs_next_seqno++;
	497	while (vs->vs_seqno != seqno) {
	498	default_pager_wait_seqno++;
	499	vs->vs_waiting_seqno = TRUE;
	500	assert_wait(&vs->vs_seqno, THREAD_UNINT);
	501	VS_UNLOCK(vs);
	502	thread_block(THREAD_CONTINUE_NULL);
	503	VS_LOCK(vs);
	504	}
	505
	506	vs_async_wait(vs); /* wait for pending async IO */
	507
	508	/* do not delete the vs structure until the referencing pointers */
	509	/* in the vstruct list have been expunged */
	510
	511	/* get VSL_LOCK out of order by using TRY mechanism */
	512	while(!VSL_LOCK_TRY()) {
	513	VS_UNLOCK(vs);
	514	VSL_LOCK();
	515	VSL_UNLOCK();
	516	VS_LOCK(vs);
	517	vs_async_wait(vs); /* wait for pending async IO */
	518	}
	519
	520
	521	/*
	522	* We shouldn't get a deallocation call
	523	* when the kernel has the object cached.
	524	*/
	525	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
	526	Panic("bad request");
	527
	528	/*
	529	* Unlock the pager (though there should be no one
	530	* waiting for it).
	531	*/
	532	VS_UNLOCK(vs);
	533
	534	/* Lock out paging segment removal for the duration of this */
	535	/* call. We are vulnerable to losing a paging segment we rely */
	536	/* on as soon as we remove ourselves from the VSL and unlock */
	537
	538	/* Keep our thread from blocking on attempt to trigger backing */
	539	/* store release */
	540	backing_store_release_trigger_disable += 1;
	541
	542	/*
	543	* Remove the memory object port association, and then
	544	* the destroy the port itself. We must remove the object
	545	* from the port list before deallocating the pager,
	546	* because of default_pager_objects.
	547	*/
	548	vstruct_list_delete(vs);
	549	VSL_UNLOCK();
	550
	551	ps_vstruct_dealloc(vs);
	552
	553	VSL_LOCK();
	554	backing_store_release_trigger_disable -= 1;
	555	if(backing_store_release_trigger_disable == 0) {
	556	thread_wakeup((event_t)&backing_store_release_trigger_disable);
	557	}
	558	VSL_UNLOCK();
	559	}
	560
	561	kern_return_t
	562	dp_memory_object_data_request(
	563	memory_object_t mem_obj,
	564	memory_object_offset_t offset,
	565	vm_size_t length,
	566	__unused vm_prot_t protection_required)
	567	{
	568	vstruct_t vs;
	569
	570	GSTAT(global_stats.gs_pagein_calls++);
	571
	572
	573	/* CDY at this moment vs_lookup panics when presented with the wrong */
	574	/* port. As we are expanding this pager to support user interfaces */
	575	/* this should be changed to return kern_failure */
	576	vs_lookup(mem_obj, vs);
	577	vs_lock(vs);
	578
	579	/* We are going to relax the strict sequencing here for performance */
	580	/* reasons. We can do this because we know that the read and */
	581	/* write threads are different and we rely on synchronization */
	582	/* of read and write requests at the cache memory_object level */
	583	/* break out wait_for_writers, all of this goes away when */
	584	/* we get real control of seqno with the new component interface */
	585
	586	if (vs->vs_writers != 0) {
	587	/* you can't hold on to the seqno and go */
	588	/* to sleep like that */
	589	vs_unlock(vs); /* bump internal count of seqno */
	590	VS_LOCK(vs);
	591	while (vs->vs_writers != 0) {
	592	default_pager_wait_write++;
	593	vs->vs_waiting_write = TRUE;
	594	assert_wait(&vs->vs_writers, THREAD_UNINT);
	595	VS_UNLOCK(vs);
	596	thread_block(THREAD_CONTINUE_NULL);
	597	VS_LOCK(vs);
	598	vs_async_wait(vs);
	599	}
	600	if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
	601	VS_UNLOCK(vs);
	602	return KERN_FAILURE;
	603	}
	604	vs_start_read(vs);
	605	VS_UNLOCK(vs);
	606	} else {
	607	vs_start_read(vs);
	608	vs_unlock(vs);
	609	}
	610
	611	/*
	612	* Request must be on a page boundary and a multiple of pages.
	613	*/
	614	if ((offset & vm_page_mask) != 0 \|\| (length & vm_page_mask) != 0)
	615	Panic("bad alignment");
	616
	617	pvs_cluster_read(vs, (vm_offset_t)offset, length);
	618
	619	vs_finish_read(vs);
	620
	621	return KERN_SUCCESS;
	622	}
	623
	624	/*
	625	* memory_object_data_initialize: check whether we already have each page, and
	626	* write it if we do not. The implementation is far from optimized, and
	627	* also assumes that the default_pager is single-threaded.
	628	*/
	629	/* It is questionable whether or not a pager should decide what is relevant */
	630	/* and what is not in data sent from the kernel. Data initialize has been */
	631	/* changed to copy back all data sent to it in preparation for its eventual */
	632	/* merge with data return. It is the kernel that should decide what pages */
	633	/* to write back. As of the writing of this note, this is indeed the case */
	634	/* the kernel writes back one page at a time through this interface */
	635
	636	kern_return_t
	637	dp_memory_object_data_initialize(
	638	memory_object_t mem_obj,
	639	memory_object_offset_t offset,
	640	vm_size_t size)
	641	{
	642	vstruct_t vs;
	643
	644	DP_DEBUG(DEBUG_MO_EXTERNAL,
	645	("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
	646	(int)mem_obj, (int)offset, (int)size));
	647	GSTAT(global_stats.gs_pages_init += atop_32(size));
	648
	649	vs_lookup(mem_obj, vs);
	650	vs_lock(vs);
	651	vs_start_write(vs);
	652	vs_unlock(vs);
	653
	654	/*
	655	* Write the data via clustered writes. vs_cluster_write will
	656	* loop if the address range specified crosses cluster
	657	* boundaries.
	658	*/
	659	vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
	660
	661	vs_finish_write(vs);
	662
	663	return KERN_SUCCESS;
	664	}
	665
	666	kern_return_t
	667	dp_memory_object_data_unlock(
	668	__unused memory_object_t mem_obj,
	669	__unused memory_object_offset_t offset,
	670	__unused vm_size_t size,
	671	__unused vm_prot_t desired_access)
	672	{
	673	Panic("dp_memory_object_data_unlock: illegal");
	674	return KERN_FAILURE;
	675	}
	676
	677
	678	/ARGSUSED8/
	679	kern_return_t
	680	dp_memory_object_data_return(
	681	memory_object_t mem_obj,
	682	memory_object_offset_t offset,
	683	vm_size_t size,
	684	__unused memory_object_offset_t *resid_offset,
	685	__unused int *io_error,
	686	__unused boolean_t dirty,
	687	__unused boolean_t kernel_copy,
	688	__unused int upl_flags)
	689	{
	690	vstruct_t vs;
	691
	692	DP_DEBUG(DEBUG_MO_EXTERNAL,
	693	("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
	694	(int)mem_obj, (int)offset, (int)size));
	695	GSTAT(global_stats.gs_pageout_calls++);
	696
	697	/* This routine is called by the pageout thread. The pageout thread */
	698	/* cannot be blocked by read activities unless the read activities */
	699	/* Therefore the grant of vs lock must be done on a try versus a */
	700	/* blocking basis. The code below relies on the fact that the */
	701	/* interface is synchronous. Should this interface be again async */
	702	/* for some type of pager in the future the pages will have to be */
	703	/* returned through a separate, asynchronous path. */
	704
	705	vs_lookup(mem_obj, vs);
	706
	707	default_pager_total++;
	708	if(!VS_TRY_LOCK(vs)) {
	709	/* the call below will not be done by caller when we have */
	710	/* a synchronous interface */
	711	/* return KERN_LOCK_OWNED; */
	712	upl_t upl;
	713	int page_list_count = 0;
	714	memory_object_super_upl_request(vs->vs_control,
	715	(memory_object_offset_t)offset,
	716	size, size,
	717	&upl, NULL, &page_list_count,
	718	UPL_NOBLOCK \| UPL_CLEAN_IN_PLACE
	719	\| UPL_NO_SYNC \| UPL_COPYOUT_FROM);
	720	upl_abort(upl,0);
	721	upl_deallocate(upl);
	722	return KERN_SUCCESS;
	723	}
	724
	725	if ((vs->vs_seqno != vs->vs_next_seqno++)
	726	\|\| (vs->vs_readers)
	727	\|\| (vs->vs_xfer_pending)) {
	728	upl_t upl;
	729	int page_list_count = 0;
	730
	731	vs->vs_next_seqno--;
	732	VS_UNLOCK(vs);
	733
	734	/* the call below will not be done by caller when we have */
	735	/* a synchronous interface */
	736	/* return KERN_LOCK_OWNED; */
	737	memory_object_super_upl_request(vs->vs_control,
	738	(memory_object_offset_t)offset,
	739	size, size,
	740	&upl, NULL, &page_list_count,
	741	UPL_NOBLOCK \| UPL_CLEAN_IN_PLACE
	742	\| UPL_NO_SYNC \| UPL_COPYOUT_FROM);
	743	upl_abort(upl,0);
	744	upl_deallocate(upl);
	745	return KERN_SUCCESS;
	746	}
	747
	748	if ((size % vm_page_size) != 0)
	749	Panic("bad alignment");
	750
	751	vs_start_write(vs);
	752
	753
	754	vs->vs_async_pending += 1; /* protect from backing store contraction */
	755	vs_unlock(vs);
	756
	757	/*
	758	* Write the data via clustered writes. vs_cluster_write will
	759	* loop if the address range specified crosses cluster
	760	* boundaries.
	761	*/
	762	vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
	763
	764	vs_finish_write(vs);
	765
	766	/* temporary, need a finer lock based on cluster */
	767
	768	VS_LOCK(vs);
	769	vs->vs_async_pending -= 1; /* release vs_async_wait */
	770	if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
	771	vs->vs_waiting_async = FALSE;
	772	VS_UNLOCK(vs);
	773	thread_wakeup(&vs->vs_async_pending);
	774	} else {
	775	VS_UNLOCK(vs);
	776	}
	777
	778
	779	return KERN_SUCCESS;
	780	}
	781
	782	/*
	783	* Routine: default_pager_memory_object_create
	784	* Purpose:
	785	* Handle requests for memory objects from the
	786	* kernel.
	787	* Notes:
	788	* Because we only give out the default memory
	789	* manager port to the kernel, we don't have to
	790	* be so paranoid about the contents.
	791	*/
	792	kern_return_t
	793	default_pager_memory_object_create(
	794	__unused memory_object_default_t dmm,
	795	vm_size_t new_size,
	796	memory_object_t *new_mem_obj)
	797	{
	798	vstruct_t vs;
	799
	800	assert(dmm == default_pager_object);
	801
	802	vs = vs_object_create(new_size);
	803	if (vs == VSTRUCT_NULL)
	804	return KERN_RESOURCE_SHORTAGE;
	805
	806	vs->vs_next_seqno = 0;
	807
	808	/*
	809	* Set up associations between this memory object
	810	* and this default_pager structure
	811	*/
	812
	813	vs->vs_mem_obj = ISVS;
	814	vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
	815
	816	/*
	817	* After this, other threads might receive requests
	818	* for this memory object or find it in the port list.
	819	*/
	820
	821	vstruct_list_insert(vs);
	822	*new_mem_obj = vs_to_mem_obj(vs);
	823	return KERN_SUCCESS;
	824	}
	825
	826	/*
	827	* Create an external object.
	828	*/
	829	kern_return_t
	830	default_pager_object_create(
	831	default_pager_t default_pager,
	832	vm_size_t size,
	833	memory_object_t *mem_objp)
	834	{
	835	vstruct_t vs;
	836
	837	if (default_pager != default_pager_object)
	838	return KERN_INVALID_ARGUMENT;
	839
	840	vs = vs_object_create(size);
	841	if (vs == VSTRUCT_NULL)
	842	return KERN_RESOURCE_SHORTAGE;
	843
	844	/*
	845	* Set up associations between the default pager
	846	* and this vstruct structure
	847	*/
	848	vs->vs_mem_obj = ISVS;
	849	vstruct_list_insert(vs);
	850	*mem_objp = vs_to_mem_obj(vs);
	851	return KERN_SUCCESS;
	852	}
	853
	854	kern_return_t
	855	default_pager_objects(
	856	default_pager_t default_pager,
	857	default_pager_object_array_t *objectsp,
	858	mach_msg_type_number_t *ocountp,
	859	mach_port_array_t *portsp,
	860	mach_msg_type_number_t *pcountp)
	861	{
	862	vm_offset_t oaddr = 0; /* memory for objects */
	863	vm_size_t osize = 0; /* current size */
	864	default_pager_object_t * objects;
	865	unsigned int opotential = 0;
	866
	867	vm_map_copy_t pcopy = 0; /* copy handle for pagers */
	868	vm_size_t psize = 0; /* current size */
	869	memory_object_t * pagers;
	870	unsigned int ppotential = 0;
	871
	872	unsigned int actual;
	873	unsigned int num_objects;
	874	kern_return_t kr;
	875	vstruct_t entry;
	876
	877	if (default_pager != default_pager_object)
	878	return KERN_INVALID_ARGUMENT;
	879
	880	/*
	881	* We will send no more than this many
	882	*/
	883	actual = vstruct_list.vsl_count;
	884
	885	/*
	886	* Out out-of-line port arrays are simply kalloc'ed.
	887	*/
	888	psize = round_page(actual * sizeof * pagers);
	889	ppotential = psize / sizeof * pagers;
	890	pagers = (memory_object_t *)kalloc(psize);
	891	if (0 == pagers)
	892	return KERN_RESOURCE_SHORTAGE;
	893
	894	/*
	895	* returned out of line data must be allocated out
	896	* the ipc_kernel_map, wired down, filled in, and
	897	* then "copied in" as if it had been sent by a
	898	* user process.
	899	*/
	900	osize = round_page(actual * sizeof * objects);
	901	opotential = osize / sizeof * objects;
	902	kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
	903	if (KERN_SUCCESS != kr) {
	904	kfree(pagers, psize);
	905	return KERN_RESOURCE_SHORTAGE;
	906	}
	907	objects = (default_pager_object_t *)oaddr;
	908
	909
	910	/*
	911	* Now scan the list.
	912	*/
	913
	914	VSL_LOCK();
	915
	916	num_objects = 0;
	917	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
	918
	919	memory_object_t pager;
	920	vm_size_t size;
	921
	922	if ((num_objects >= opotential) \|\|
	923	(num_objects >= ppotential)) {
	924
	925	/*
	926	* This should be rare. In any case,
	927	* we will only miss recent objects,
	928	* because they are added at the end.
	929	*/
	930	break;
	931	}
	932
	933	/*
	934	* Avoid interfering with normal operations
	935	*/
	936	if (!VS_MAP_TRY_LOCK(entry))
	937	goto not_this_one;
	938	size = ps_vstruct_allocated_size(entry);
	939	VS_MAP_UNLOCK(entry);
	940
	941	VS_LOCK(entry);
	942
	943	/*
	944	* We need a reference for our caller. Adding this
	945	* reference through the linked list could race with
	946	* destruction of the object. If we find the object
	947	* has no references, just give up on it.
	948	*/
	949	VS_LOCK(entry);
	950	if (entry->vs_references == 0) {
	951	VS_UNLOCK(entry);
	952	goto not_this_one;
	953	}
	954	pager = vs_to_mem_obj(entry);
	955	dp_memory_object_reference(pager);
	956	VS_UNLOCK(entry);
	957
	958	/* the arrays are wired, so no deadlock worries */
	959
	960	objects[num_objects].dpo_object = (vm_offset_t) entry;
	961	objects[num_objects].dpo_size = size;
	962	pagers [num_objects++] = pager;
	963	continue;
	964
	965	not_this_one:
	966	/*
	967	* Do not return garbage
	968	*/
	969	objects[num_objects].dpo_object = (vm_offset_t) 0;
	970	objects[num_objects].dpo_size = 0;
	971	pagers[num_objects++] = MEMORY_OBJECT_NULL;
	972
	973	}
	974
	975	VSL_UNLOCK();
	976
	977	/* clear out any excess allocation */
	978	while (num_objects < opotential) {
	979	objects[--opotential].dpo_object = (vm_offset_t) 0;
	980	objects[opotential].dpo_size = 0;
	981	}
	982	while (num_objects < ppotential) {
	983	pagers[--ppotential] = MEMORY_OBJECT_NULL;
	984	}
	985
	986	kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
	987	vm_map_round_page(oaddr + osize), FALSE);
	988	assert(KERN_SUCCESS == kr);
	989	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
	990	(vm_map_size_t)osize, TRUE, &pcopy);
	991	assert(KERN_SUCCESS == kr);
	992
	993	*objectsp = (default_pager_object_array_t)objects;
	994	*ocountp = num_objects;
	995	*portsp = (mach_port_array_t)pcopy;
	996	*pcountp = num_objects;
	997
	998	return KERN_SUCCESS;
	999	}
	1000
	1001	kern_return_t
	1002	default_pager_object_pages(
	1003	default_pager_t default_pager,
	1004	mach_port_t memory_object,
	1005	default_pager_page_array_t *pagesp,
	1006	mach_msg_type_number_t *countp)
	1007	{
	1008	vm_offset_t addr = 0; /* memory for page offsets */
	1009	vm_size_t size = 0; /* current memory size */
	1010	vm_map_copy_t copy;
	1011	default_pager_page_t * pages = 0;
	1012	unsigned int potential;
	1013	unsigned int actual;
	1014	kern_return_t kr;
	1015	memory_object_t object;
	1016
	1017	if (default_pager != default_pager_object)
	1018	return KERN_INVALID_ARGUMENT;
	1019
	1020	object = (memory_object_t) memory_object;
	1021
	1022	potential = 0;
	1023	for (;;) {
	1024	vstruct_t entry;
	1025
	1026	VSL_LOCK();
	1027	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
	1028	vs_links) {
	1029	VS_LOCK(entry);
	1030	if (vs_to_mem_obj(entry) == object) {
	1031	VSL_UNLOCK();
	1032	goto found_object;
	1033	}
	1034	VS_UNLOCK(entry);
	1035	}
	1036	VSL_UNLOCK();
	1037
	1038	/* did not find the object */
	1039	if (0 != addr)
	1040	kmem_free(ipc_kernel_map, addr, size);
	1041
	1042	return KERN_INVALID_ARGUMENT;
	1043
	1044	found_object:
	1045
	1046	if (!VS_MAP_TRY_LOCK(entry)) {
	1047	/* oh well bad luck */
	1048	int wresult;
	1049
	1050	VS_UNLOCK(entry);
	1051
	1052	assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
	1053	wresult = thread_block(THREAD_CONTINUE_NULL);
	1054	assert(wresult == THREAD_TIMED_OUT);
	1055	continue;
	1056	}
	1057
	1058	actual = ps_vstruct_allocated_pages(entry, pages, potential);
	1059	VS_MAP_UNLOCK(entry);
	1060	VS_UNLOCK(entry);
	1061
	1062	if (actual <= potential)
	1063	break;
	1064
	1065	/* allocate more memory */
	1066	if (0 != addr)
	1067	kmem_free(ipc_kernel_map, addr, size);
	1068
	1069	size = round_page(actual * sizeof * pages);
	1070	kr = kmem_alloc(ipc_kernel_map, &addr, size);
	1071	if (KERN_SUCCESS != kr)
	1072	return KERN_RESOURCE_SHORTAGE;
	1073
	1074	pages = (default_pager_page_t *)addr;
	1075	potential = size / sizeof * pages;
	1076	}
	1077
	1078	/*
	1079	* Clear unused memory.
	1080	*/
	1081	while (actual < potential)
	1082	pages[--potential].dpp_offset = 0;
	1083
	1084	kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
	1085	vm_map_round_page(addr + size), FALSE);
	1086	assert(KERN_SUCCESS == kr);
	1087	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
	1088	(vm_map_size_t)size, TRUE, &copy);
	1089	assert(KERN_SUCCESS == kr);
	1090
	1091
	1092	*pagesp = (default_pager_page_array_t)copy;
	1093	*countp = actual;
	1094	return KERN_SUCCESS;
	1095	}