git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/default_pager/dp_memory

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
	11	*
	12	* This Original Code and all software distributed under the License are
	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22	/*
	23	* @OSF_COPYRIGHT@
	24	*/
	25	/*
	26	* Mach Operating System
	27	* Copyright (c) 1991,1990,1989 Carnegie Mellon University
	28	* All Rights Reserved.
	29	*
	30	* Permission to use, copy, modify and distribute this software and its
	31	* documentation is hereby granted, provided that both the copyright
	32	* notice and this permission notice appear in all copies of the
	33	* software, derivative works or modified versions, and any portions
	34	* thereof, and that both notices appear in supporting documentation.
	35	*
	36	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	37	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	38	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	39	*
	40	* Carnegie Mellon requests users of this software to return to
	41	*
	42	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	43	* School of Computer Science
	44	* Carnegie Mellon University
	45	* Pittsburgh PA 15213-3890
	46	*
	47	* any improvements or extensions that they make and grant Carnegie Mellon
	48	* the rights to redistribute these changes.
	49	*/
	50
	51	/*
	52	* Default Pager.
	53	* Memory Object Management.
	54	*/
	55
	56	#include "default_pager_internal.h"
	57	#include <mach/memory_object_types.h>
	58	#include <mach/memory_object_server.h>
	59	#include <vm/memory_object.h>
	60	#include <vm/vm_pageout.h>
	61
	62
	63	/*
	64	* List of all vstructs. A specific vstruct is
	65	* found directly via its port, this list is
	66	* only used for monitoring purposes by the
	67	* default_pager_object* calls and by ps_delete
	68	* when abstract memory objects must be scanned
	69	* to remove any live storage on a segment which
	70	* is to be removed.
	71	*/
	72	struct vstruct_list_head vstruct_list;
	73
	74	__private_extern__ void
	75	vstruct_list_insert(
	76	vstruct_t vs)
	77	{
	78	VSL_LOCK();
	79	queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
	80	vstruct_list.vsl_count++;
	81	VSL_UNLOCK();
	82	}
	83
	84
	85	__private_extern__ void
	86	vstruct_list_delete(
	87	vstruct_t vs)
	88	{
	89	queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
	90	vstruct_list.vsl_count--;
	91	}
	92
	93	/*
	94	* We use the sequence numbers on requests to regulate
	95	* our parallelism. In general, we allow multiple reads and writes
	96	* to proceed in parallel, with the exception that reads must
	97	* wait for previous writes to finish. (Because the kernel might
	98	* generate a data-request for a page on the heels of a data-write
	99	* for the same page, and we must avoid returning stale data.)
	100	* terminate requests wait for proceeding reads and writes to finish.
	101	*/
	102
	103	static unsigned int default_pager_total = 0; /* debugging */
	104	static unsigned int default_pager_wait_seqno = 0; /* debugging */
	105	static unsigned int default_pager_wait_read = 0; /* debugging */
	106	static unsigned int default_pager_wait_write = 0; /* debugging */
	107	static unsigned int default_pager_wait_refs = 0; /* debugging */
	108
	109	__private_extern__ void
	110	vs_async_wait(
	111	vstruct_t vs)
	112	{
	113
	114	ASSERT(vs->vs_async_pending >= 0);
	115	while (vs->vs_async_pending > 0) {
	116	vs->vs_waiting_async = TRUE;
	117	assert_wait(&vs->vs_async_pending, THREAD_UNINT);
	118	VS_UNLOCK(vs);
	119	thread_block((void (*)(void))0);
	120	VS_LOCK(vs);
	121	}
	122	ASSERT(vs->vs_async_pending == 0);
	123	}
	124
	125
	126	#if PARALLEL
	127	/*
	128	* Waits for correct sequence number. Leaves pager locked.
	129	*
	130	* JMM - Sequence numbers guarantee ordering of requests generated
	131	* by a single thread if the receiver is multithreaded and
	132	* the interfaces are asynchronous (i.e. sender can generate
	133	* more than one request before the first is received in the
	134	* pager). Normally, IPC would generate these number in that
	135	* case. But we are trying to avoid using IPC for the in-kernel
	136	* scenario. Since these are actually invoked synchronously
	137	* anyway (in-kernel), we can just fake the sequence number
	138	* generation here (thus avoiding the dependence on IPC).
	139	*/
	140	__private_extern__ void
	141	vs_lock(
	142	vstruct_t vs)
	143	{
	144	mach_port_seqno_t seqno;
	145
	146	default_pager_total++;
	147	VS_LOCK(vs);
	148
	149	seqno = vs->vs_next_seqno++;
	150
	151	while (vs->vs_seqno != seqno) {
	152	default_pager_wait_seqno++;
	153	vs->vs_waiting_seqno = TRUE;
	154	assert_wait(&vs->vs_seqno, THREAD_UNINT);
	155	VS_UNLOCK(vs);
	156	thread_block((void (*)(void))0);
	157	VS_LOCK(vs);
	158	}
	159	}
	160
	161	/*
	162	* Increments sequence number and unlocks pager.
	163	*/
	164	__private_extern__ void
	165	vs_unlock(vstruct_t vs)
	166	{
	167	vs->vs_seqno++;
	168	if (vs->vs_waiting_seqno) {
	169	vs->vs_waiting_seqno = FALSE;
	170	VS_UNLOCK(vs);
	171	thread_wakeup(&vs->vs_seqno);
	172	return;
	173	}
	174	VS_UNLOCK(vs);
	175	}
	176
	177	/*
	178	* Start a read - one more reader. Pager must be locked.
	179	*/
	180	__private_extern__ void
	181	vs_start_read(
	182	vstruct_t vs)
	183	{
	184	vs->vs_readers++;
	185	}
	186
	187	/*
	188	* Wait for readers. Unlocks and relocks pager if wait needed.
	189	*/
	190	__private_extern__ void
	191	vs_wait_for_readers(
	192	vstruct_t vs)
	193	{
	194	while (vs->vs_readers != 0) {
	195	default_pager_wait_read++;
	196	vs->vs_waiting_read = TRUE;
	197	assert_wait(&vs->vs_readers, THREAD_UNINT);
	198	VS_UNLOCK(vs);
	199	thread_block((void (*)(void))0);
	200	VS_LOCK(vs);
	201	}
	202	}
	203
	204	/*
	205	* Finish a read. Pager is unlocked and returns unlocked.
	206	*/
	207	__private_extern__ void
	208	vs_finish_read(
	209	vstruct_t vs)
	210	{
	211	VS_LOCK(vs);
	212	if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
	213	vs->vs_waiting_read = FALSE;
	214	VS_UNLOCK(vs);
	215	thread_wakeup(&vs->vs_readers);
	216	return;
	217	}
	218	VS_UNLOCK(vs);
	219	}
	220
	221	/*
	222	* Start a write - one more writer. Pager must be locked.
	223	*/
	224	__private_extern__ void
	225	vs_start_write(
	226	vstruct_t vs)
	227	{
	228	vs->vs_writers++;
	229	}
	230
	231	/*
	232	* Wait for writers. Unlocks and relocks pager if wait needed.
	233	*/
	234	__private_extern__ void
	235	vs_wait_for_writers(
	236	vstruct_t vs)
	237	{
	238	while (vs->vs_writers != 0) {
	239	default_pager_wait_write++;
	240	vs->vs_waiting_write = TRUE;
	241	assert_wait(&vs->vs_writers, THREAD_UNINT);
	242	VS_UNLOCK(vs);
	243	thread_block((void (*)(void))0);
	244	VS_LOCK(vs);
	245	}
	246	vs_async_wait(vs);
	247	}
	248
	249	/* This is to be used for the transfer from segment code ONLY */
	250	/* The transfer code holds off vs destruction by keeping the */
	251	/* vs_async_wait count non-zero. It will not ocnflict with */
	252	/* other writers on an async basis because it only writes on */
	253	/* a cluster basis into fresh (as of sync time) cluster locations */
	254
	255	__private_extern__ void
	256	vs_wait_for_sync_writers(
	257	vstruct_t vs)
	258	{
	259	while (vs->vs_writers != 0) {
	260	default_pager_wait_write++;
	261	vs->vs_waiting_write = TRUE;
	262	assert_wait(&vs->vs_writers, THREAD_UNINT);
	263	VS_UNLOCK(vs);
	264	thread_block((void (*)(void))0);
	265	VS_LOCK(vs);
	266	}
	267	}
	268
	269
	270	/*
	271	* Finish a write. Pager is unlocked and returns unlocked.
	272	*/
	273	__private_extern__ void
	274	vs_finish_write(
	275	vstruct_t vs)
	276	{
	277	VS_LOCK(vs);
	278	if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
	279	vs->vs_waiting_write = FALSE;
	280	VS_UNLOCK(vs);
	281	thread_wakeup(&vs->vs_writers);
	282	return;
	283	}
	284	VS_UNLOCK(vs);
	285	}
	286	#endif /* PARALLEL */
	287
	288	vstruct_t
	289	vs_object_create(
	290	vm_size_t size)
	291	{
	292	vstruct_t vs;
	293
	294	/*
	295	* Allocate a vstruct. If there are any problems, then report them
	296	* to the console.
	297	*/
	298	vs = ps_vstruct_create(size);
	299	if (vs == VSTRUCT_NULL) {
	300	dprintf(("vs_object_create: unable to allocate %s\n",
	301	"-- either run swapon command or reboot"));
	302	return VSTRUCT_NULL;
	303	}
	304
	305	return vs;
	306	}
	307
	308	#if 0
	309	void default_pager_add(vstruct_t, boolean_t); /* forward */
	310
	311	void
	312	default_pager_add(
	313	vstruct_t vs,
	314	boolean_t internal)
	315	{
	316	memory_object_t mem_obj = vs->vs_mem_obj;
	317	mach_port_t pset;
	318	mach_port_mscount_t sync;
	319	mach_port_t previous;
	320	kern_return_t kr;
	321	static char here[] = "default_pager_add";
	322
	323	/*
	324	* The port currently has a make-send count of zero,
	325	* because either we just created the port or we just
	326	* received the port in a memory_object_create request.
	327	*/
	328
	329	if (internal) {
	330	/* possibly generate an immediate no-senders notification */
	331	sync = 0;
	332	pset = default_pager_internal_set;
	333	} else {
	334	/* delay notification till send right is created */
	335	sync = 1;
	336	pset = default_pager_external_set;
	337	}
	338
	339	ipc_port_make_sonce(mem_obj);
	340	ip_lock(mem_obj); /* unlocked in nsrequest below */
	341	ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
	342	}
	343
	344	#endif
	345
	346	kern_return_t
	347	dp_memory_object_init(
	348	memory_object_t mem_obj,
	349	memory_object_control_t control,
	350	vm_size_t pager_page_size)
	351	{
	352	vstruct_t vs;
	353
	354	assert(pager_page_size == vm_page_size);
	355
	356	memory_object_control_reference(control);
	357
	358	vs_lookup(mem_obj, vs);
	359	vs_lock(vs);
	360
	361	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
	362	Panic("bad request");
	363
	364	vs->vs_control = control;
	365	vs_unlock(vs);
	366
	367	return KERN_SUCCESS;
	368	}
	369
	370	kern_return_t
	371	dp_memory_object_synchronize(
	372	memory_object_t mem_obj,
	373	memory_object_offset_t offset,
	374	vm_size_t length,
	375	vm_sync_t flags)
	376	{
	377	vstruct_t vs;
	378
	379	vs_lookup(mem_obj, vs);
	380	vs_lock(vs);
	381	vs_unlock(vs);
	382
	383	memory_object_synchronize_completed(vs->vs_control, offset, length);
	384
	385	return KERN_SUCCESS;
	386	}
	387
	388	kern_return_t
	389	dp_memory_object_unmap(
	390	memory_object_t mem_obj)
	391	{
	392	panic("dp_memory_object_unmap");
	393
	394	return KERN_FAILURE;
	395	}
	396
	397	kern_return_t
	398	dp_memory_object_terminate(
	399	memory_object_t mem_obj)
	400	{
	401	memory_object_control_t control;
	402	vstruct_t vs;
	403	kern_return_t kr;
	404
	405	/*
	406	* control port is a receive right, not a send right.
	407	*/
	408
	409	vs_lookup(mem_obj, vs);
	410	vs_lock(vs);
	411
	412	/*
	413	* Wait for read and write requests to terminate.
	414	*/
	415
	416	vs_wait_for_readers(vs);
	417	vs_wait_for_writers(vs);
	418
	419	/*
	420	* After memory_object_terminate both memory_object_init
	421	* and a no-senders notification are possible, so we need
	422	* to clean up our reference to the memory_object_control
	423	* to prepare for a new init.
	424	*/
	425
	426	control = vs->vs_control;
	427	vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
	428
	429	/* a bit of special case ugliness here. Wakeup any waiting reads */
	430	/* these data requests had to be removed from the seqno traffic */
	431	/* based on a performance bottleneck with large memory objects */
	432	/* the problem will right itself with the new component based */
	433	/* synchronous interface. The new async will be able to return */
	434	/* failure during its sync phase. In the mean time ... */
	435
	436	thread_wakeup(&vs->vs_writers);
	437	thread_wakeup(&vs->vs_async_pending);
	438
	439	vs_unlock(vs);
	440
	441	/*
	442	* Now we deallocate our reference on the control.
	443	*/
	444	memory_object_control_deallocate(control);
	445	return KERN_SUCCESS;
	446	}
	447
	448	void
	449	dp_memory_object_reference(
	450	memory_object_t mem_obj)
	451	{
	452	vstruct_t vs;
	453
	454	vs_lookup_safe(mem_obj, vs);
	455	if (vs == VSTRUCT_NULL)
	456	return;
	457
	458	VS_LOCK(vs);
	459	assert(vs->vs_references > 0);
	460	vs->vs_references++;
	461	VS_UNLOCK(vs);
	462	}
	463
	464	extern ipc_port_t max_pages_trigger_port;
	465	extern int dp_pages_free;
	466	extern int maximum_pages_free;
	467	void
	468	dp_memory_object_deallocate(
	469	memory_object_t mem_obj)
	470	{
	471	vstruct_t vs;
	472	mach_port_seqno_t seqno;
	473	ipc_port_t trigger;
	474
	475	/*
	476	* Because we don't give out multiple first references
	477	* for a memory object, there can't be a race
	478	* between getting a deallocate call and creating
	479	* a new reference for the object.
	480	*/
	481
	482	vs_lookup_safe(mem_obj, vs);
	483	if (vs == VSTRUCT_NULL)
	484	return;
	485
	486	VS_LOCK(vs);
	487	if (--vs->vs_references > 0) {
	488	VS_UNLOCK(vs);
	489	return;
	490	}
	491
	492	seqno = vs->vs_next_seqno++;
	493	while (vs->vs_seqno != seqno) {
	494	default_pager_wait_seqno++;
	495	vs->vs_waiting_seqno = TRUE;
	496	assert_wait(&vs->vs_seqno, THREAD_UNINT);
	497	VS_UNLOCK(vs);
	498	thread_block((void (*)(void))0);
	499	VS_LOCK(vs);
	500	}
	501
	502	vs_async_wait(vs); /* wait for pending async IO */
	503
	504	/* do not delete the vs structure until the referencing pointers */
	505	/* in the vstruct list have been expunged */
	506
	507	/* get VSL_LOCK out of order by using TRY mechanism */
	508	while(!VSL_LOCK_TRY()) {
	509	VS_UNLOCK(vs);
	510	VSL_LOCK();
	511	VSL_UNLOCK();
	512	VS_LOCK(vs);
	513	vs_async_wait(vs); /* wait for pending async IO */
	514	}
	515
	516
	517	/*
	518	* We shouldn't get a deallocation call
	519	* when the kernel has the object cached.
	520	*/
	521	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
	522	Panic("bad request");
	523
	524	/*
	525	* Unlock the pager (though there should be no one
	526	* waiting for it).
	527	*/
	528	VS_UNLOCK(vs);
	529
	530	/* Lock out paging segment removal for the duration of this */
	531	/* call. We are vulnerable to losing a paging segment we rely */
	532	/* on as soon as we remove ourselves from the VSL and unlock */
	533
	534	/* Keep our thread from blocking on attempt to trigger backing */
	535	/* store release */
	536	backing_store_release_trigger_disable += 1;
	537
	538	/*
	539	* Remove the memory object port association, and then
	540	* the destroy the port itself. We must remove the object
	541	* from the port list before deallocating the pager,
	542	* because of default_pager_objects.
	543	*/
	544	vstruct_list_delete(vs);
	545	VSL_UNLOCK();
	546
	547	ps_vstruct_dealloc(vs);
	548
	549	VSL_LOCK();
	550	backing_store_release_trigger_disable -= 1;
	551	if(backing_store_release_trigger_disable == 0) {
	552	thread_wakeup((event_t)&vm_page_laundry_count);
	553	}
	554	VSL_UNLOCK();
	555
	556	PSL_LOCK();
	557	if(max_pages_trigger_port
	558	&& (backing_store_release_trigger_disable == 0)
	559	&& (dp_pages_free > maximum_pages_free)) {
	560	trigger = max_pages_trigger_port;
	561	max_pages_trigger_port = NULL;
	562	} else
	563	trigger = IP_NULL;
	564	PSL_UNLOCK();
	565
	566	if (trigger != IP_NULL) {
	567	default_pager_space_alert(trigger, LO_WAT_ALERT);
	568	ipc_port_release_send(trigger);
	569	}
	570
	571	}
	572
	573	kern_return_t
	574	dp_memory_object_data_request(
	575	memory_object_t mem_obj,
	576	memory_object_offset_t offset,
	577	vm_size_t length,
	578	vm_prot_t protection_required)
	579	{
	580	vstruct_t vs;
	581
	582	GSTAT(global_stats.gs_pagein_calls++);
	583
	584
	585	/* CDY at this moment vs_lookup panics when presented with the wrong */
	586	/* port. As we are expanding this pager to support user interfaces */
	587	/* this should be changed to return kern_failure */
	588	vs_lookup(mem_obj, vs);
	589	vs_lock(vs);
	590
	591	/* We are going to relax the strict sequencing here for performance */
	592	/* reasons. We can do this because we know that the read and */
	593	/* write threads are different and we rely on synchronization */
	594	/* of read and write requests at the cache memory_object level */
	595	/* break out wait_for_writers, all of this goes away when */
	596	/* we get real control of seqno with the new component interface */
	597
	598	if (vs->vs_writers != 0) {
	599	/* you can't hold on to the seqno and go */
	600	/* to sleep like that */
	601	vs_unlock(vs); /* bump internal count of seqno */
	602	VS_LOCK(vs);
	603	while (vs->vs_writers != 0) {
	604	default_pager_wait_write++;
	605	vs->vs_waiting_write = TRUE;
	606	assert_wait(&vs->vs_writers, THREAD_UNINT);
	607	VS_UNLOCK(vs);
	608	thread_block((void (*)(void))0);
	609	VS_LOCK(vs);
	610	vs_async_wait(vs);
	611	}
	612	if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
	613	VS_UNLOCK(vs);
	614	return KERN_FAILURE;
	615	}
	616	vs_start_read(vs);
	617	VS_UNLOCK(vs);
	618	} else {
	619	vs_start_read(vs);
	620	vs_unlock(vs);
	621	}
	622
	623	/*
	624	* Request must be on a page boundary and a multiple of pages.
	625	*/
	626	if ((offset & vm_page_mask) != 0 \|\| (length & vm_page_mask) != 0)
	627	Panic("bad alignment");
	628
	629	pvs_cluster_read(vs, (vm_offset_t)offset, length);
	630
	631	vs_finish_read(vs);
	632
	633	return KERN_SUCCESS;
	634	}
	635
	636	/*
	637	* memory_object_data_initialize: check whether we already have each page, and
	638	* write it if we do not. The implementation is far from optimized, and
	639	* also assumes that the default_pager is single-threaded.
	640	*/
	641	/* It is questionable whether or not a pager should decide what is relevant */
	642	/* and what is not in data sent from the kernel. Data initialize has been */
	643	/* changed to copy back all data sent to it in preparation for its eventual */
	644	/* merge with data return. It is the kernel that should decide what pages */
	645	/* to write back. As of the writing of this note, this is indeed the case */
	646	/* the kernel writes back one page at a time through this interface */
	647
	648	kern_return_t
	649	dp_memory_object_data_initialize(
	650	memory_object_t mem_obj,
	651	memory_object_offset_t offset,
	652	vm_size_t size)
	653	{
	654	vstruct_t vs;
	655
	656	DEBUG(DEBUG_MO_EXTERNAL,
	657	("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
	658	(int)mem_obj, (int)offset, (int)size));
	659	GSTAT(global_stats.gs_pages_init += atop(size));
	660
	661	vs_lookup(mem_obj, vs);
	662	vs_lock(vs);
	663	vs_start_write(vs);
	664	vs_unlock(vs);
	665
	666	/*
	667	* Write the data via clustered writes. vs_cluster_write will
	668	* loop if the address range specified crosses cluster
	669	* boundaries.
	670	*/
	671	vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
	672
	673	vs_finish_write(vs);
	674
	675	return KERN_SUCCESS;
	676	}
	677
	678	kern_return_t
	679	dp_memory_object_data_unlock(
	680	memory_object_t mem_obj,
	681	memory_object_offset_t offset,
	682	vm_size_t size,
	683	vm_prot_t desired_access)
	684	{
	685	Panic("dp_memory_object_data_unlock: illegal");
	686	return KERN_FAILURE;
	687	}
	688
	689
	690	kern_return_t
	691	dp_memory_object_data_return(
	692	memory_object_t mem_obj,
	693	memory_object_offset_t offset,
	694	vm_size_t size,
	695	boolean_t dirty,
	696	boolean_t kernel_copy)
	697	{
	698	vstruct_t vs;
	699
	700	DEBUG(DEBUG_MO_EXTERNAL,
	701	("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
	702	(int)mem_obj, (int)offset, (int)size));
	703	GSTAT(global_stats.gs_pageout_calls++);
	704
	705	/* This routine is called by the pageout thread. The pageout thread */
	706	/* cannot be blocked by read activities unless the read activities */
	707	/* Therefore the grant of vs lock must be done on a try versus a */
	708	/* blocking basis. The code below relies on the fact that the */
	709	/* interface is synchronous. Should this interface be again async */
	710	/* for some type of pager in the future the pages will have to be */
	711	/* returned through a separate, asynchronous path. */
	712
	713	vs_lookup(mem_obj, vs);
	714
	715	default_pager_total++;
	716	if(!VS_TRY_LOCK(vs)) {
	717	/* the call below will not be done by caller when we have */
	718	/* a synchronous interface */
	719	/* return KERN_LOCK_OWNED; */
	720	upl_t upl;
	721	int page_list_count = 0;
	722	memory_object_super_upl_request(vs->vs_control,
	723	(memory_object_offset_t)offset,
	724	size, size,
	725	&upl, NULL, &page_list_count,
	726	UPL_NOBLOCK \| UPL_CLEAN_IN_PLACE
	727	\| UPL_NO_SYNC \| UPL_COPYOUT_FROM);
	728	upl_abort(upl,0);
	729	upl_deallocate(upl);
	730	return KERN_SUCCESS;
	731	}
	732
	733	if ((vs->vs_seqno != vs->vs_next_seqno++) \|\| (vs->vs_xfer_pending)) {
	734	upl_t upl;
	735	int page_list_count = 0;
	736
	737	vs->vs_next_seqno--;
	738	VS_UNLOCK(vs);
	739
	740	/* the call below will not be done by caller when we have */
	741	/* a synchronous interface */
	742	/* return KERN_LOCK_OWNED; */
	743	memory_object_super_upl_request(vs->vs_control,
	744	(memory_object_offset_t)offset,
	745	size, size,
	746	&upl, NULL, &page_list_count,
	747	UPL_NOBLOCK \| UPL_CLEAN_IN_PLACE
	748	\| UPL_NO_SYNC \| UPL_COPYOUT_FROM);
	749	upl_abort(upl,0);
	750	upl_deallocate(upl);
	751	return KERN_SUCCESS;
	752	}
	753
	754	if ((size % vm_page_size) != 0)
	755	Panic("bad alignment");
	756
	757	vs_start_write(vs);
	758
	759
	760	vs->vs_async_pending += 1; /* protect from backing store contraction */
	761	vs_unlock(vs);
	762
	763	/*
	764	* Write the data via clustered writes. vs_cluster_write will
	765	* loop if the address range specified crosses cluster
	766	* boundaries.
	767	*/
	768	vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
	769
	770	vs_finish_write(vs);
	771
	772	/* temporary, need a finer lock based on cluster */
	773
	774	VS_LOCK(vs);
	775	vs->vs_async_pending -= 1; /* release vs_async_wait */
	776	if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
	777	vs->vs_waiting_async = FALSE;
	778	VS_UNLOCK(vs);
	779	thread_wakeup(&vs->vs_async_pending);
	780	} else {
	781	VS_UNLOCK(vs);
	782	}
	783
	784
	785	return KERN_SUCCESS;
	786	}
	787
	788	/*
	789	* Routine: default_pager_memory_object_create
	790	* Purpose:
	791	* Handle requests for memory objects from the
	792	* kernel.
	793	* Notes:
	794	* Because we only give out the default memory
	795	* manager port to the kernel, we don't have to
	796	* be so paranoid about the contents.
	797	*/
	798	kern_return_t
	799	default_pager_memory_object_create(
	800	memory_object_default_t dmm,
	801	vm_size_t new_size,
	802	memory_object_t *new_mem_obj)
	803	{
	804	vstruct_t vs;
	805
	806	assert(dmm == default_pager_object);
	807
	808	vs = vs_object_create(new_size);
	809	if (vs == VSTRUCT_NULL)
	810	return KERN_RESOURCE_SHORTAGE;
	811
	812	vs->vs_next_seqno = 0;
	813
	814	/*
	815	* Set up associations between this memory object
	816	* and this default_pager structure
	817	*/
	818
	819	vs->vs_mem_obj = ISVS;
	820	vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
	821
	822	/*
	823	* After this, other threads might receive requests
	824	* for this memory object or find it in the port list.
	825	*/
	826
	827	vstruct_list_insert(vs);
	828	*new_mem_obj = vs_to_mem_obj(vs);
	829	return KERN_SUCCESS;
	830	}
	831
	832	/*
	833	* Create an external object.
	834	*/
	835	kern_return_t
	836	default_pager_object_create(
	837	default_pager_t pager,
	838	vm_size_t size,
	839	memory_object_t *mem_objp)
	840	{
	841	vstruct_t vs;
	842	kern_return_t result;
	843	struct vstruct_alias *alias_struct;
	844
	845
	846	if (pager != default_pager_object)
	847	return KERN_INVALID_ARGUMENT;
	848
	849	vs = vs_object_create(size);
	850	if (vs == VSTRUCT_NULL)
	851	return KERN_RESOURCE_SHORTAGE;
	852
	853	/*
	854	* Set up associations between the default pager
	855	* and this vstruct structure
	856	*/
	857	vs->vs_mem_obj = ISVS;
	858	vstruct_list_insert(vs);
	859	*mem_objp = vs_to_mem_obj(vs);
	860	return KERN_SUCCESS;
	861	}
	862
	863	kern_return_t
	864	default_pager_objects(
	865	default_pager_t pager,
	866	default_pager_object_array_t *objectsp,
	867	mach_msg_type_number_t *ocountp,
	868	memory_object_array_t *pagersp,
	869	mach_msg_type_number_t *pcountp)
	870	{
	871	vm_offset_t oaddr = 0; /* memory for objects */
	872	vm_size_t osize = 0; /* current size */
	873	default_pager_object_t * objects;
	874	unsigned int opotential;
	875
	876	vm_offset_t paddr = 0; /* memory for pagers */
	877	vm_size_t psize = 0; /* current size */
	878	memory_object_t * pagers;
	879	unsigned int ppotential;
	880
	881	unsigned int actual;
	882	unsigned int num_objects;
	883	kern_return_t kr;
	884	vstruct_t entry;
	885	/*
	886	if (pager != default_pager_default_port)
	887	return KERN_INVALID_ARGUMENT;
	888	*/
	889
	890	/* start with the inline memory */
	891
	892	kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
	893	(vm_map_copy_t) *objectsp);
	894
	895	if (kr != KERN_SUCCESS)
	896	return kr;
	897
	898	osize = round_page(ocountp sizeof * objects);
	899	kr = vm_map_wire(ipc_kernel_map,
	900	trunc_page((vm_offset_t)objects),
	901	round_page(((vm_offset_t)objects) + osize),
	902	VM_PROT_READ\|VM_PROT_WRITE, FALSE);
	903	osize=0;
	904
	905	*objectsp = objects;
	906	/* we start with the inline space */
	907
	908
	909	num_objects = 0;
	910	opotential = *ocountp;
	911
	912	pagers = (memory_object_t ) pagersp;
	913	ppotential = *pcountp;
	914
	915	VSL_LOCK();
	916
	917	/*
	918	* We will send no more than this many
	919	*/
	920	actual = vstruct_list.vsl_count;
	921	VSL_UNLOCK();
	922
	923	if (opotential < actual) {
	924	vm_offset_t newaddr;
	925	vm_size_t newsize;
	926
	927	newsize = 2 * round_page(actual * sizeof * objects);
	928
	929	kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
	930	if (kr != KERN_SUCCESS)
	931	goto nomemory;
	932
	933	oaddr = newaddr;
	934	osize = newsize;
	935	opotential = osize / sizeof * objects;
	936	objects = (default_pager_object_t *)oaddr;
	937	}
	938
	939	if (ppotential < actual) {
	940	vm_offset_t newaddr;
	941	vm_size_t newsize;
	942
	943	newsize = 2 * round_page(actual * sizeof * pagers);
	944
	945	kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
	946	if (kr != KERN_SUCCESS)
	947	goto nomemory;
	948
	949	paddr = newaddr;
	950	psize = newsize;
	951	ppotential = psize / sizeof * pagers;
	952	pagers = (memory_object_t *)paddr;
	953	}
	954
	955	/*
	956	* Now scan the list.
	957	*/
	958
	959	VSL_LOCK();
	960
	961	num_objects = 0;
	962	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
	963
	964	memory_object_t pager;
	965	vm_size_t size;
	966
	967	if ((num_objects >= opotential) \|\|
	968	(num_objects >= ppotential)) {
	969
	970	/*
	971	* This should be rare. In any case,
	972	* we will only miss recent objects,
	973	* because they are added at the end.
	974	*/
	975	break;
	976	}
	977
	978	/*
	979	* Avoid interfering with normal operations
	980	*/
	981	if (!VS_MAP_TRY_LOCK(entry))
	982	goto not_this_one;
	983	size = ps_vstruct_allocated_size(entry);
	984	VS_MAP_UNLOCK(entry);
	985
	986	VS_LOCK(entry);
	987
	988	/*
	989	* We need a reference for our caller. Adding this
	990	* reference through the linked list could race with
	991	* destruction of the object. If we find the object
	992	* has no references, just give up on it.
	993	*/
	994	VS_LOCK(entry);
	995	if (entry->vs_references == 0) {
	996	VS_UNLOCK(entry);
	997	goto not_this_one;
	998	}
	999	dp_memory_object_reference(vs_to_mem_obj(entry));
	1000	VS_UNLOCK(entry);
	1001
	1002	/* the arrays are wired, so no deadlock worries */
	1003
	1004	objects[num_objects].dpo_object = (vm_offset_t) entry;
	1005	objects[num_objects].dpo_size = size;
	1006	pagers [num_objects++] = pager;
	1007	continue;
	1008
	1009	not_this_one:
	1010	/*
	1011	* Do not return garbage
	1012	*/
	1013	objects[num_objects].dpo_object = (vm_offset_t) 0;
	1014	objects[num_objects].dpo_size = 0;
	1015	pagers[num_objects++] = MEMORY_OBJECT_NULL;
	1016
	1017	}
	1018
	1019	VSL_UNLOCK();
	1020
	1021	/*
	1022	* Deallocate and clear unused memory.
	1023	* (Returned memory will automagically become pageable.)
	1024	*/
	1025
	1026	if (objects == *objectsp) {
	1027
	1028	/*
	1029	* Our returned information fit inline.
	1030	* Nothing to deallocate.
	1031	*/
	1032	*ocountp = num_objects;
	1033	} else if (actual == 0) {
	1034	(void) vm_deallocate(kernel_map, oaddr, osize);
	1035
	1036	/* return zero items inline */
	1037	*ocountp = 0;
	1038	} else {
	1039	vm_offset_t used;
	1040
	1041	used = round_page(actual * sizeof * objects);
	1042
	1043	if (used != osize)
	1044	(void) vm_deallocate(kernel_map,
	1045	oaddr + used, osize - used);
	1046
	1047	*objectsp = objects;
	1048	*ocountp = num_objects;
	1049	}
	1050
	1051	if (pagers == (memory_object_t )pagersp) {
	1052
	1053	/*
	1054	* Our returned information fit inline.
	1055	* Nothing to deallocate.
	1056	*/
	1057
	1058	*pcountp = num_objects;
	1059	} else if (actual == 0) {
	1060	(void) vm_deallocate(kernel_map, paddr, psize);
	1061
	1062	/* return zero items inline */
	1063	*pcountp = 0;
	1064	} else {
	1065	vm_offset_t used;
	1066
	1067	used = round_page(actual * sizeof * pagers);
	1068
	1069	if (used != psize)
	1070	(void) vm_deallocate(kernel_map,
	1071	paddr + used, psize - used);
	1072
	1073	*pagersp = (memory_object_array_t)pagers;
	1074	*pcountp = num_objects;
	1075	}
	1076	(void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
	1077	*ocountp + (vm_offset_t)objects, FALSE);
	1078	(void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
	1079	ocountp, TRUE, (vm_map_copy_t )objectsp);
	1080
	1081	return KERN_SUCCESS;
	1082
	1083	nomemory:
	1084	{
	1085	register int i;
	1086	for (i = 0; i < num_objects; i++)
	1087	if (pagers[i] != MEMORY_OBJECT_NULL)
	1088	memory_object_deallocate(pagers[i]);
	1089	}
	1090
	1091	if (objects != *objectsp)
	1092	(void) vm_deallocate(kernel_map, oaddr, osize);
	1093
	1094	if (pagers != (memory_object_t )pagersp)
	1095	(void) vm_deallocate(kernel_map, paddr, psize);
	1096
	1097	return KERN_RESOURCE_SHORTAGE;
	1098	}
	1099
	1100	kern_return_t
	1101	default_pager_object_pages(
	1102	default_pager_t pager,
	1103	memory_object_t object,
	1104	default_pager_page_array_t *pagesp,
	1105	mach_msg_type_number_t *countp)
	1106	{
	1107	vm_offset_t addr; /* memory for page offsets */
	1108	vm_size_t size = 0; /* current memory size */
	1109	default_pager_page_t * pages;
	1110	unsigned int potential, actual;
	1111	kern_return_t kr;
	1112
	1113
	1114	if (pager != default_pager_object)
	1115	return KERN_INVALID_ARGUMENT;
	1116
	1117	kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
	1118	(vm_map_copy_t) *pagesp);
	1119
	1120	if (kr != KERN_SUCCESS)
	1121	return kr;
	1122
	1123	size = round_page(countp sizeof * pages);
	1124	kr = vm_map_wire(ipc_kernel_map,
	1125	trunc_page((vm_offset_t)pages),
	1126	round_page(((vm_offset_t)pages) + size),
	1127	VM_PROT_READ\|VM_PROT_WRITE, FALSE);
	1128	size=0;
	1129
	1130	*pagesp = pages;
	1131	/* we start with the inline space */
	1132
	1133	addr = (vm_offset_t)pages;
	1134	potential = *countp;
	1135
	1136	for (;;) {
	1137	vstruct_t entry;
	1138
	1139	VSL_LOCK();
	1140	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
	1141	vs_links) {
	1142	VS_LOCK(entry);
	1143	if (vs_to_mem_obj(entry) == object) {
	1144	VSL_UNLOCK();
	1145	goto found_object;
	1146	}
	1147	VS_UNLOCK(entry);
	1148	}
	1149	VSL_UNLOCK();
	1150
	1151	/* did not find the object */
	1152
	1153	if (pages != *pagesp)
	1154	(void) vm_deallocate(kernel_map, addr, size);
	1155	return KERN_INVALID_ARGUMENT;
	1156
	1157	found_object:
	1158
	1159	if (!VS_MAP_TRY_LOCK(entry)) {
	1160	/* oh well bad luck */
	1161	int wait_result;
	1162
	1163	VS_UNLOCK(entry);
	1164
	1165	assert_wait_timeout( 1, THREAD_INTERRUPTIBLE);
	1166	wait_result = thread_block((void (*)(void)) 0);
	1167	if (wait_result != THREAD_TIMED_OUT)
	1168	thread_cancel_timer();
	1169	continue;
	1170	}
	1171
	1172	actual = ps_vstruct_allocated_pages(entry, pages, potential);
	1173	VS_MAP_UNLOCK(entry);
	1174	VS_UNLOCK(entry);
	1175
	1176	if (actual <= potential)
	1177	break;
	1178
	1179	/* allocate more memory */
	1180
	1181	if (pages != *pagesp)
	1182	(void) vm_deallocate(kernel_map, addr, size);
	1183	size = round_page(actual * sizeof * pages);
	1184	kr = vm_allocate(kernel_map, &addr, size, TRUE);
	1185	if (kr != KERN_SUCCESS)
	1186	return kr;
	1187	pages = (default_pager_page_t *)addr;
	1188	potential = size / sizeof * pages;
	1189	}
	1190
	1191	/*
	1192	* Deallocate and clear unused memory.
	1193	* (Returned memory will automagically become pageable.)
	1194	*/
	1195
	1196	if (pages == *pagesp) {
	1197
	1198	/*
	1199	* Our returned information fit inline.
	1200	* Nothing to deallocate.
	1201	*/
	1202
	1203	*countp = actual;
	1204	} else if (actual == 0) {
	1205	(void) vm_deallocate(kernel_map, addr, size);
	1206
	1207	/* return zero items inline */
	1208	*countp = 0;
	1209	} else {
	1210	vm_offset_t used;
	1211
	1212	used = round_page(actual * sizeof * pages);
	1213
	1214	if (used != size)
	1215	(void) vm_deallocate(kernel_map,
	1216	addr + used, size - used);
	1217
	1218	*pagesp = pages;
	1219	*countp = actual;
	1220	}
	1221	(void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
	1222	*countp + (vm_offset_t)pages, FALSE);
	1223	(void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
	1224	countp, TRUE, (vm_map_copy_t )pagesp);
	1225	return KERN_SUCCESS;
	1226	}