]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
ff6e181a
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
43866e37 12 *
ff6e181a
A
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
ff6e181a
A
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
1c79356b
A
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/*
24 * @OSF_COPYRIGHT@
25 */
26/*
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
29 * All Rights Reserved.
30 *
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
36 *
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 *
41 * Carnegie Mellon requests users of this software to return to
42 *
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
47 *
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
50 */
51
52/*
53 * Default Pager.
54 * Memory Object Management.
55 */
56
57#include "default_pager_internal.h"
91447636
A
58#include <default_pager/default_pager_object_server.h>
59#include <mach/memory_object_default_server.h>
60#include <mach/memory_object_control.h>
0b4e3aa0 61#include <mach/memory_object_types.h>
1c79356b 62#include <mach/memory_object_server.h>
91447636
A
63#include <mach/upl.h>
64#include <mach/vm_map.h>
0b4e3aa0
A
65#include <vm/memory_object.h>
66#include <vm/vm_pageout.h>
91447636
A
67#include <vm/vm_map.h>
68#include <vm/vm_protos.h>
1c79356b 69
91447636
A
70/* forward declaration */
71vstruct_t vs_object_create(vm_size_t size);
1c79356b
A
72
73/*
74 * List of all vstructs. A specific vstruct is
75 * found directly via its port, this list is
76 * only used for monitoring purposes by the
77 * default_pager_object* calls and by ps_delete
78 * when abstract memory objects must be scanned
79 * to remove any live storage on a segment which
80 * is to be removed.
81 */
82struct vstruct_list_head vstruct_list;
83
0b4e3aa0 84__private_extern__ void
1c79356b
A
85vstruct_list_insert(
86 vstruct_t vs)
87{
88 VSL_LOCK();
89 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
90 vstruct_list.vsl_count++;
91 VSL_UNLOCK();
92}
93
1c79356b 94
0b4e3aa0 95__private_extern__ void
1c79356b
A
96vstruct_list_delete(
97 vstruct_t vs)
98{
99 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
100 vstruct_list.vsl_count--;
101}
102
103/*
104 * We use the sequence numbers on requests to regulate
105 * our parallelism. In general, we allow multiple reads and writes
106 * to proceed in parallel, with the exception that reads must
107 * wait for previous writes to finish. (Because the kernel might
108 * generate a data-request for a page on the heels of a data-write
109 * for the same page, and we must avoid returning stale data.)
110 * terminate requests wait for proceeding reads and writes to finish.
111 */
112
0b4e3aa0
A
113static unsigned int default_pager_total = 0; /* debugging */
114static unsigned int default_pager_wait_seqno = 0; /* debugging */
115static unsigned int default_pager_wait_read = 0; /* debugging */
116static unsigned int default_pager_wait_write = 0; /* debugging */
1c79356b 117
0b4e3aa0 118__private_extern__ void
1c79356b
A
119vs_async_wait(
120 vstruct_t vs)
121{
1c79356b
A
122
123 ASSERT(vs->vs_async_pending >= 0);
124 while (vs->vs_async_pending > 0) {
125 vs->vs_waiting_async = TRUE;
0b4e3aa0 126 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 127 VS_UNLOCK(vs);
9bccf70c 128 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
129 VS_LOCK(vs);
130 }
131 ASSERT(vs->vs_async_pending == 0);
132}
133
1c79356b 134
0b4e3aa0 135#if PARALLEL
1c79356b
A
136/*
137 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
138 *
139 * JMM - Sequence numbers guarantee ordering of requests generated
140 * by a single thread if the receiver is multithreaded and
141 * the interfaces are asynchronous (i.e. sender can generate
142 * more than one request before the first is received in the
143 * pager). Normally, IPC would generate these number in that
144 * case. But we are trying to avoid using IPC for the in-kernel
145 * scenario. Since these are actually invoked synchronously
146 * anyway (in-kernel), we can just fake the sequence number
147 * generation here (thus avoiding the dependence on IPC).
1c79356b 148 */
0b4e3aa0 149__private_extern__ void
1c79356b 150vs_lock(
0b4e3aa0 151 vstruct_t vs)
1c79356b 152{
0b4e3aa0
A
153 mach_port_seqno_t seqno;
154
1c79356b
A
155 default_pager_total++;
156 VS_LOCK(vs);
157
158 seqno = vs->vs_next_seqno++;
159
160 while (vs->vs_seqno != seqno) {
161 default_pager_wait_seqno++;
162 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 163 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 164 VS_UNLOCK(vs);
9bccf70c 165 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
166 VS_LOCK(vs);
167 }
168}
169
170/*
171 * Increments sequence number and unlocks pager.
172 */
0b4e3aa0 173__private_extern__ void
1c79356b
A
174vs_unlock(vstruct_t vs)
175{
1c79356b 176 vs->vs_seqno++;
0b4e3aa0
A
177 if (vs->vs_waiting_seqno) {
178 vs->vs_waiting_seqno = FALSE;
179 VS_UNLOCK(vs);
180 thread_wakeup(&vs->vs_seqno);
181 return;
182 }
1c79356b 183 VS_UNLOCK(vs);
1c79356b
A
184}
185
186/*
187 * Start a read - one more reader. Pager must be locked.
188 */
0b4e3aa0 189__private_extern__ void
1c79356b
A
190vs_start_read(
191 vstruct_t vs)
192{
193 vs->vs_readers++;
194}
195
196/*
197 * Wait for readers. Unlocks and relocks pager if wait needed.
198 */
0b4e3aa0 199__private_extern__ void
1c79356b
A
200vs_wait_for_readers(
201 vstruct_t vs)
202{
203 while (vs->vs_readers != 0) {
204 default_pager_wait_read++;
205 vs->vs_waiting_read = TRUE;
0b4e3aa0 206 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 207 VS_UNLOCK(vs);
9bccf70c 208 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
209 VS_LOCK(vs);
210 }
211}
212
213/*
214 * Finish a read. Pager is unlocked and returns unlocked.
215 */
0b4e3aa0 216__private_extern__ void
1c79356b
A
217vs_finish_read(
218 vstruct_t vs)
219{
220 VS_LOCK(vs);
0b4e3aa0 221 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
222 vs->vs_waiting_read = FALSE;
223 VS_UNLOCK(vs);
0b4e3aa0
A
224 thread_wakeup(&vs->vs_readers);
225 return;
226 }
227 VS_UNLOCK(vs);
1c79356b
A
228}
229
230/*
231 * Start a write - one more writer. Pager must be locked.
232 */
0b4e3aa0 233__private_extern__ void
1c79356b
A
234vs_start_write(
235 vstruct_t vs)
236{
237 vs->vs_writers++;
238}
239
240/*
241 * Wait for writers. Unlocks and relocks pager if wait needed.
242 */
0b4e3aa0 243__private_extern__ void
1c79356b
A
244vs_wait_for_writers(
245 vstruct_t vs)
246{
247 while (vs->vs_writers != 0) {
248 default_pager_wait_write++;
249 vs->vs_waiting_write = TRUE;
0b4e3aa0 250 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 251 VS_UNLOCK(vs);
9bccf70c 252 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
253 VS_LOCK(vs);
254 }
255 vs_async_wait(vs);
256}
257
258/* This is to be used for the transfer from segment code ONLY */
259/* The transfer code holds off vs destruction by keeping the */
260/* vs_async_wait count non-zero. It will not ocnflict with */
261/* other writers on an async basis because it only writes on */
262/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
263
264__private_extern__ void
1c79356b
A
265vs_wait_for_sync_writers(
266 vstruct_t vs)
267{
268 while (vs->vs_writers != 0) {
269 default_pager_wait_write++;
270 vs->vs_waiting_write = TRUE;
0b4e3aa0 271 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 272 VS_UNLOCK(vs);
9bccf70c 273 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
274 VS_LOCK(vs);
275 }
276}
277
278
279/*
280 * Finish a write. Pager is unlocked and returns unlocked.
281 */
0b4e3aa0 282__private_extern__ void
1c79356b
A
283vs_finish_write(
284 vstruct_t vs)
285{
286 VS_LOCK(vs);
0b4e3aa0 287 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
288 vs->vs_waiting_write = FALSE;
289 VS_UNLOCK(vs);
0b4e3aa0
A
290 thread_wakeup(&vs->vs_writers);
291 return;
1c79356b 292 }
0b4e3aa0 293 VS_UNLOCK(vs);
1c79356b 294}
1c79356b
A
295#endif /* PARALLEL */
296
1c79356b
A
297vstruct_t
298vs_object_create(
299 vm_size_t size)
300{
301 vstruct_t vs;
1c79356b
A
302
303 /*
304 * Allocate a vstruct. If there are any problems, then report them
305 * to the console.
306 */
307 vs = ps_vstruct_create(size);
308 if (vs == VSTRUCT_NULL) {
309 dprintf(("vs_object_create: unable to allocate %s\n",
310 "-- either run swapon command or reboot"));
311 return VSTRUCT_NULL;
312 }
313
314 return vs;
315}
316
0b4e3aa0 317#if 0
1c79356b
A
318void default_pager_add(vstruct_t, boolean_t); /* forward */
319
320void
321default_pager_add(
322 vstruct_t vs,
323 boolean_t internal)
324{
0b4e3aa0
A
325 memory_object_t mem_obj = vs->vs_mem_obj;
326 mach_port_t pset;
1c79356b 327 mach_port_mscount_t sync;
0b4e3aa0 328 mach_port_t previous;
1c79356b
A
329 kern_return_t kr;
330 static char here[] = "default_pager_add";
331
332 /*
333 * The port currently has a make-send count of zero,
334 * because either we just created the port or we just
335 * received the port in a memory_object_create request.
336 */
337
338 if (internal) {
339 /* possibly generate an immediate no-senders notification */
340 sync = 0;
341 pset = default_pager_internal_set;
342 } else {
343 /* delay notification till send right is created */
344 sync = 1;
345 pset = default_pager_external_set;
346 }
347
348 ipc_port_make_sonce(mem_obj);
349 ip_lock(mem_obj); /* unlocked in nsrequest below */
350 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
351}
352
0b4e3aa0 353#endif
1c79356b
A
354
355kern_return_t
356dp_memory_object_init(
0b4e3aa0
A
357 memory_object_t mem_obj,
358 memory_object_control_t control,
91447636 359 __unused vm_size_t pager_page_size)
1c79356b 360{
1c79356b 361 vstruct_t vs;
1c79356b
A
362
363 assert(pager_page_size == vm_page_size);
364
0b4e3aa0
A
365 memory_object_control_reference(control);
366
1c79356b 367 vs_lookup(mem_obj, vs);
0b4e3aa0 368 vs_lock(vs);
1c79356b 369
0b4e3aa0 370 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
371 Panic("bad request");
372
0b4e3aa0 373 vs->vs_control = control;
1c79356b
A
374 vs_unlock(vs);
375
376 return KERN_SUCCESS;
377}
378
379kern_return_t
380dp_memory_object_synchronize(
0b4e3aa0
A
381 memory_object_t mem_obj,
382 memory_object_offset_t offset,
383 vm_size_t length,
91447636 384 __unused vm_sync_t flags)
1c79356b 385{
1c79356b 386 vstruct_t vs;
1c79356b
A
387
388 vs_lookup(mem_obj, vs);
0b4e3aa0 389 vs_lock(vs);
1c79356b
A
390 vs_unlock(vs);
391
0b4e3aa0 392 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
393
394 return KERN_SUCCESS;
395}
396
0b4e3aa0
A
397kern_return_t
398dp_memory_object_unmap(
91447636 399 __unused memory_object_t mem_obj)
0b4e3aa0
A
400{
401 panic("dp_memory_object_unmap");
402
403 return KERN_FAILURE;
404}
405
1c79356b
A
406kern_return_t
407dp_memory_object_terminate(
0b4e3aa0 408 memory_object_t mem_obj)
1c79356b 409{
0b4e3aa0 410 memory_object_control_t control;
1c79356b 411 vstruct_t vs;
1c79356b
A
412
413 /*
414 * control port is a receive right, not a send right.
415 */
416
417 vs_lookup(mem_obj, vs);
0b4e3aa0 418 vs_lock(vs);
1c79356b
A
419
420 /*
421 * Wait for read and write requests to terminate.
422 */
423
424 vs_wait_for_readers(vs);
425 vs_wait_for_writers(vs);
426
427 /*
428 * After memory_object_terminate both memory_object_init
429 * and a no-senders notification are possible, so we need
0b4e3aa0
A
430 * to clean up our reference to the memory_object_control
431 * to prepare for a new init.
1c79356b
A
432 */
433
0b4e3aa0
A
434 control = vs->vs_control;
435 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
436
437 /* a bit of special case ugliness here. Wakeup any waiting reads */
438 /* these data requests had to be removed from the seqno traffic */
439 /* based on a performance bottleneck with large memory objects */
440 /* the problem will right itself with the new component based */
441 /* synchronous interface. The new async will be able to return */
442 /* failure during its sync phase. In the mean time ... */
443
0b4e3aa0
A
444 thread_wakeup(&vs->vs_writers);
445 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
446
447 vs_unlock(vs);
448
449 /*
0b4e3aa0 450 * Now we deallocate our reference on the control.
1c79356b 451 */
0b4e3aa0 452 memory_object_control_deallocate(control);
1c79356b
A
453 return KERN_SUCCESS;
454}
455
456void
0b4e3aa0
A
457dp_memory_object_reference(
458 memory_object_t mem_obj)
459{
460 vstruct_t vs;
461
462 vs_lookup_safe(mem_obj, vs);
463 if (vs == VSTRUCT_NULL)
464 return;
465
466 VS_LOCK(vs);
467 assert(vs->vs_references > 0);
468 vs->vs_references++;
469 VS_UNLOCK(vs);
470}
471
0b4e3aa0
A
472void
473dp_memory_object_deallocate(
474 memory_object_t mem_obj)
1c79356b
A
475{
476 vstruct_t vs;
0b4e3aa0 477 mach_port_seqno_t seqno;
1c79356b
A
478
479 /*
0b4e3aa0 480 * Because we don't give out multiple first references
1c79356b 481 * for a memory object, there can't be a race
0b4e3aa0
A
482 * between getting a deallocate call and creating
483 * a new reference for the object.
1c79356b
A
484 */
485
0b4e3aa0
A
486 vs_lookup_safe(mem_obj, vs);
487 if (vs == VSTRUCT_NULL)
488 return;
489
490 VS_LOCK(vs);
491 if (--vs->vs_references > 0) {
492 VS_UNLOCK(vs);
493 return;
494 }
495
496 seqno = vs->vs_next_seqno++;
497 while (vs->vs_seqno != seqno) {
498 default_pager_wait_seqno++;
499 vs->vs_waiting_seqno = TRUE;
500 assert_wait(&vs->vs_seqno, THREAD_UNINT);
501 VS_UNLOCK(vs);
9bccf70c 502 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
503 VS_LOCK(vs);
504 }
505
1c79356b
A
506 vs_async_wait(vs); /* wait for pending async IO */
507
508 /* do not delete the vs structure until the referencing pointers */
509 /* in the vstruct list have been expunged */
510
511 /* get VSL_LOCK out of order by using TRY mechanism */
512 while(!VSL_LOCK_TRY()) {
513 VS_UNLOCK(vs);
514 VSL_LOCK();
515 VSL_UNLOCK();
516 VS_LOCK(vs);
517 vs_async_wait(vs); /* wait for pending async IO */
518 }
0b4e3aa0
A
519
520
1c79356b 521 /*
0b4e3aa0 522 * We shouldn't get a deallocation call
1c79356b
A
523 * when the kernel has the object cached.
524 */
0b4e3aa0 525 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
526 Panic("bad request");
527
528 /*
529 * Unlock the pager (though there should be no one
530 * waiting for it).
531 */
532 VS_UNLOCK(vs);
533
0b4e3aa0
A
534 /* Lock out paging segment removal for the duration of this */
535 /* call. We are vulnerable to losing a paging segment we rely */
536 /* on as soon as we remove ourselves from the VSL and unlock */
537
538 /* Keep our thread from blocking on attempt to trigger backing */
539 /* store release */
540 backing_store_release_trigger_disable += 1;
541
1c79356b
A
542 /*
543 * Remove the memory object port association, and then
544 * the destroy the port itself. We must remove the object
545 * from the port list before deallocating the pager,
546 * because of default_pager_objects.
547 */
548 vstruct_list_delete(vs);
0b4e3aa0
A
549 VSL_UNLOCK();
550
1c79356b
A
551 ps_vstruct_dealloc(vs);
552
0b4e3aa0
A
553 VSL_LOCK();
554 backing_store_release_trigger_disable -= 1;
555 if(backing_store_release_trigger_disable == 0) {
9bccf70c 556 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
557 }
558 VSL_UNLOCK();
559}
560
561kern_return_t
562dp_memory_object_data_request(
0b4e3aa0
A
563 memory_object_t mem_obj,
564 memory_object_offset_t offset,
1c79356b 565 vm_size_t length,
91447636 566 __unused vm_prot_t protection_required)
1c79356b 567{
1c79356b 568 vstruct_t vs;
1c79356b
A
569
570 GSTAT(global_stats.gs_pagein_calls++);
571
572
573 /* CDY at this moment vs_lookup panics when presented with the wrong */
574 /* port. As we are expanding this pager to support user interfaces */
575 /* this should be changed to return kern_failure */
576 vs_lookup(mem_obj, vs);
0b4e3aa0 577 vs_lock(vs);
1c79356b
A
578
579 /* We are going to relax the strict sequencing here for performance */
580 /* reasons. We can do this because we know that the read and */
581 /* write threads are different and we rely on synchronization */
582 /* of read and write requests at the cache memory_object level */
583 /* break out wait_for_writers, all of this goes away when */
584 /* we get real control of seqno with the new component interface */
0b4e3aa0 585
1c79356b
A
586 if (vs->vs_writers != 0) {
587 /* you can't hold on to the seqno and go */
588 /* to sleep like that */
589 vs_unlock(vs); /* bump internal count of seqno */
590 VS_LOCK(vs);
591 while (vs->vs_writers != 0) {
592 default_pager_wait_write++;
593 vs->vs_waiting_write = TRUE;
0b4e3aa0 594 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 595 VS_UNLOCK(vs);
9bccf70c 596 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
597 VS_LOCK(vs);
598 vs_async_wait(vs);
599 }
0b4e3aa0 600 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
601 VS_UNLOCK(vs);
602 return KERN_FAILURE;
603 }
604 vs_start_read(vs);
605 VS_UNLOCK(vs);
606 } else {
607 vs_start_read(vs);
608 vs_unlock(vs);
609 }
610
611 /*
612 * Request must be on a page boundary and a multiple of pages.
613 */
614 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
615 Panic("bad alignment");
616
617 pvs_cluster_read(vs, (vm_offset_t)offset, length);
618
619 vs_finish_read(vs);
620
621 return KERN_SUCCESS;
622}
623
624/*
625 * memory_object_data_initialize: check whether we already have each page, and
626 * write it if we do not. The implementation is far from optimized, and
627 * also assumes that the default_pager is single-threaded.
628 */
629/* It is questionable whether or not a pager should decide what is relevant */
630/* and what is not in data sent from the kernel. Data initialize has been */
631/* changed to copy back all data sent to it in preparation for its eventual */
632/* merge with data return. It is the kernel that should decide what pages */
633/* to write back. As of the writing of this note, this is indeed the case */
634/* the kernel writes back one page at a time through this interface */
635
636kern_return_t
637dp_memory_object_data_initialize(
0b4e3aa0
A
638 memory_object_t mem_obj,
639 memory_object_offset_t offset,
640 vm_size_t size)
1c79356b 641{
1c79356b 642 vstruct_t vs;
1c79356b 643
91447636
A
644 DP_DEBUG(DEBUG_MO_EXTERNAL,
645 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
646 (int)mem_obj, (int)offset, (int)size));
55e303ae 647 GSTAT(global_stats.gs_pages_init += atop_32(size));
1c79356b
A
648
649 vs_lookup(mem_obj, vs);
0b4e3aa0 650 vs_lock(vs);
1c79356b
A
651 vs_start_write(vs);
652 vs_unlock(vs);
653
654 /*
655 * Write the data via clustered writes. vs_cluster_write will
656 * loop if the address range specified crosses cluster
657 * boundaries.
658 */
0b4e3aa0 659 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
660
661 vs_finish_write(vs);
662
663 return KERN_SUCCESS;
664}
665
1c79356b
A
666kern_return_t
667dp_memory_object_data_unlock(
91447636
A
668 __unused memory_object_t mem_obj,
669 __unused memory_object_offset_t offset,
670 __unused vm_size_t size,
671 __unused vm_prot_t desired_access)
1c79356b 672{
0b4e3aa0 673 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
674 return KERN_FAILURE;
675}
676
677
91447636 678/*ARGSUSED8*/
1c79356b
A
679kern_return_t
680dp_memory_object_data_return(
0b4e3aa0
A
681 memory_object_t mem_obj,
682 memory_object_offset_t offset,
91447636
A
683 vm_size_t size,
684 __unused memory_object_offset_t *resid_offset,
685 __unused int *io_error,
686 __unused boolean_t dirty,
687 __unused boolean_t kernel_copy,
688 __unused int upl_flags)
1c79356b 689{
1c79356b 690 vstruct_t vs;
1c79356b 691
91447636
A
692 DP_DEBUG(DEBUG_MO_EXTERNAL,
693 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
694 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
695 GSTAT(global_stats.gs_pageout_calls++);
696
697 /* This routine is called by the pageout thread. The pageout thread */
698 /* cannot be blocked by read activities unless the read activities */
699 /* Therefore the grant of vs lock must be done on a try versus a */
700 /* blocking basis. The code below relies on the fact that the */
701 /* interface is synchronous. Should this interface be again async */
702 /* for some type of pager in the future the pages will have to be */
703 /* returned through a separate, asynchronous path. */
704
705 vs_lookup(mem_obj, vs);
706
707 default_pager_total++;
708 if(!VS_TRY_LOCK(vs)) {
709 /* the call below will not be done by caller when we have */
710 /* a synchronous interface */
711 /* return KERN_LOCK_OWNED; */
712 upl_t upl;
0b4e3aa0
A
713 int page_list_count = 0;
714 memory_object_super_upl_request(vs->vs_control,
715 (memory_object_offset_t)offset,
716 size, size,
717 &upl, NULL, &page_list_count,
718 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 719 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
720 upl_abort(upl,0);
721 upl_deallocate(upl);
1c79356b
A
722 return KERN_SUCCESS;
723 }
724
d12e1678
A
725 if ((vs->vs_seqno != vs->vs_next_seqno++)
726 || (vs->vs_readers)
727 || (vs->vs_xfer_pending)) {
1c79356b 728 upl_t upl;
0b4e3aa0
A
729 int page_list_count = 0;
730
1c79356b
A
731 vs->vs_next_seqno--;
732 VS_UNLOCK(vs);
0b4e3aa0 733
1c79356b
A
734 /* the call below will not be done by caller when we have */
735 /* a synchronous interface */
736 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
737 memory_object_super_upl_request(vs->vs_control,
738 (memory_object_offset_t)offset,
739 size, size,
740 &upl, NULL, &page_list_count,
1c79356b
A
741 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
742 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
743 upl_abort(upl,0);
744 upl_deallocate(upl);
1c79356b
A
745 return KERN_SUCCESS;
746 }
747
0b4e3aa0 748 if ((size % vm_page_size) != 0)
1c79356b
A
749 Panic("bad alignment");
750
751 vs_start_write(vs);
752
753
754 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 755 vs_unlock(vs);
1c79356b
A
756
757 /*
758 * Write the data via clustered writes. vs_cluster_write will
759 * loop if the address range specified crosses cluster
760 * boundaries.
761 */
0b4e3aa0 762 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
763
764 vs_finish_write(vs);
765
766 /* temporary, need a finer lock based on cluster */
767
768 VS_LOCK(vs);
769 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
770 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
771 vs->vs_waiting_async = FALSE;
1c79356b 772 VS_UNLOCK(vs);
0b4e3aa0 773 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
774 } else {
775 VS_UNLOCK(vs);
776 }
777
778
779 return KERN_SUCCESS;
780}
781
0b4e3aa0
A
782/*
783 * Routine: default_pager_memory_object_create
784 * Purpose:
785 * Handle requests for memory objects from the
786 * kernel.
787 * Notes:
788 * Because we only give out the default memory
789 * manager port to the kernel, we don't have to
790 * be so paranoid about the contents.
791 */
1c79356b 792kern_return_t
0b4e3aa0 793default_pager_memory_object_create(
91447636 794 __unused memory_object_default_t dmm,
0b4e3aa0
A
795 vm_size_t new_size,
796 memory_object_t *new_mem_obj)
1c79356b 797{
0b4e3aa0 798 vstruct_t vs;
1c79356b 799
0b4e3aa0
A
800 assert(dmm == default_pager_object);
801
802 vs = vs_object_create(new_size);
803 if (vs == VSTRUCT_NULL)
804 return KERN_RESOURCE_SHORTAGE;
805
806 vs->vs_next_seqno = 0;
807
808 /*
809 * Set up associations between this memory object
810 * and this default_pager structure
811 */
812
813 vs->vs_mem_obj = ISVS;
814 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
815
816 /*
817 * After this, other threads might receive requests
818 * for this memory object or find it in the port list.
819 */
820
821 vstruct_list_insert(vs);
822 *new_mem_obj = vs_to_mem_obj(vs);
823 return KERN_SUCCESS;
1c79356b
A
824}
825
826/*
827 * Create an external object.
828 */
829kern_return_t
830default_pager_object_create(
91447636 831 default_pager_t default_pager,
0b4e3aa0
A
832 vm_size_t size,
833 memory_object_t *mem_objp)
1c79356b
A
834{
835 vstruct_t vs;
1c79356b 836
91447636 837 if (default_pager != default_pager_object)
1c79356b
A
838 return KERN_INVALID_ARGUMENT;
839
840 vs = vs_object_create(size);
0b4e3aa0
A
841 if (vs == VSTRUCT_NULL)
842 return KERN_RESOURCE_SHORTAGE;
1c79356b 843
1c79356b 844 /*
0b4e3aa0 845 * Set up associations between the default pager
1c79356b
A
846 * and this vstruct structure
847 */
0b4e3aa0 848 vs->vs_mem_obj = ISVS;
1c79356b 849 vstruct_list_insert(vs);
0b4e3aa0 850 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
851 return KERN_SUCCESS;
852}
853
854kern_return_t
855default_pager_objects(
91447636 856 default_pager_t default_pager,
1c79356b
A
857 default_pager_object_array_t *objectsp,
858 mach_msg_type_number_t *ocountp,
91447636 859 mach_port_array_t *portsp,
1c79356b
A
860 mach_msg_type_number_t *pcountp)
861{
862 vm_offset_t oaddr = 0; /* memory for objects */
863 vm_size_t osize = 0; /* current size */
864 default_pager_object_t * objects;
91447636 865 unsigned int opotential = 0;
1c79356b 866
91447636 867 vm_map_copy_t pcopy = 0; /* copy handle for pagers */
1c79356b 868 vm_size_t psize = 0; /* current size */
0b4e3aa0 869 memory_object_t * pagers;
91447636 870 unsigned int ppotential = 0;
1c79356b
A
871
872 unsigned int actual;
873 unsigned int num_objects;
874 kern_return_t kr;
875 vstruct_t entry;
1c79356b 876
91447636
A
877 if (default_pager != default_pager_object)
878 return KERN_INVALID_ARGUMENT;
1c79356b
A
879
880 /*
881 * We will send no more than this many
882 */
883 actual = vstruct_list.vsl_count;
1c79356b 884
91447636
A
885 /*
886 * Out out-of-line port arrays are simply kalloc'ed.
887 */
888 psize = round_page(actual * sizeof * pagers);
889 ppotential = psize / sizeof * pagers;
890 pagers = (memory_object_t *)kalloc(psize);
891 if (0 == pagers)
892 return KERN_RESOURCE_SHORTAGE;
893
894 /*
895 * returned out of line data must be allocated out
896 * the ipc_kernel_map, wired down, filled in, and
897 * then "copied in" as if it had been sent by a
898 * user process.
899 */
900 osize = round_page(actual * sizeof * objects);
901 opotential = osize / sizeof * objects;
902 kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
903 if (KERN_SUCCESS != kr) {
904 kfree(pagers, psize);
905 return KERN_RESOURCE_SHORTAGE;
1c79356b 906 }
91447636 907 objects = (default_pager_object_t *)oaddr;
1c79356b 908
1c79356b
A
909
910 /*
911 * Now scan the list.
912 */
913
914 VSL_LOCK();
915
916 num_objects = 0;
917 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
918
91447636
A
919 memory_object_t pager;
920 vm_size_t size;
1c79356b
A
921
922 if ((num_objects >= opotential) ||
923 (num_objects >= ppotential)) {
924
925 /*
926 * This should be rare. In any case,
927 * we will only miss recent objects,
928 * because they are added at the end.
929 */
930 break;
931 }
932
933 /*
934 * Avoid interfering with normal operations
935 */
936 if (!VS_MAP_TRY_LOCK(entry))
937 goto not_this_one;
938 size = ps_vstruct_allocated_size(entry);
939 VS_MAP_UNLOCK(entry);
940
941 VS_LOCK(entry);
942
1c79356b 943 /*
0b4e3aa0
A
944 * We need a reference for our caller. Adding this
945 * reference through the linked list could race with
946 * destruction of the object. If we find the object
947 * has no references, just give up on it.
1c79356b 948 */
0b4e3aa0
A
949 VS_LOCK(entry);
950 if (entry->vs_references == 0) {
1c79356b 951 VS_UNLOCK(entry);
0b4e3aa0 952 goto not_this_one;
1c79356b 953 }
91447636
A
954 pager = vs_to_mem_obj(entry);
955 dp_memory_object_reference(pager);
1c79356b
A
956 VS_UNLOCK(entry);
957
958 /* the arrays are wired, so no deadlock worries */
959
960 objects[num_objects].dpo_object = (vm_offset_t) entry;
961 objects[num_objects].dpo_size = size;
0b4e3aa0 962 pagers [num_objects++] = pager;
1c79356b
A
963 continue;
964
965 not_this_one:
966 /*
967 * Do not return garbage
968 */
969 objects[num_objects].dpo_object = (vm_offset_t) 0;
970 objects[num_objects].dpo_size = 0;
0b4e3aa0 971 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
972
973 }
974
975 VSL_UNLOCK();
976
91447636
A
977 /* clear out any excess allocation */
978 while (num_objects < opotential) {
979 objects[--opotential].dpo_object = (vm_offset_t) 0;
980 objects[opotential].dpo_size = 0;
1c79356b 981 }
91447636
A
982 while (num_objects < ppotential) {
983 pagers[--ppotential] = MEMORY_OBJECT_NULL;
1c79356b
A
984 }
985
91447636
A
986 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
987 vm_map_round_page(oaddr + osize), FALSE);
988 assert(KERN_SUCCESS == kr);
989 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
990 (vm_map_size_t)osize, TRUE, &pcopy);
991 assert(KERN_SUCCESS == kr);
1c79356b 992
91447636
A
993 *objectsp = (default_pager_object_array_t)objects;
994 *ocountp = num_objects;
995 *portsp = (mach_port_array_t)pcopy;
996 *pcountp = num_objects;
1c79356b 997
91447636 998 return KERN_SUCCESS;
1c79356b
A
999}
1000
1001kern_return_t
1002default_pager_object_pages(
91447636
A
1003 default_pager_t default_pager,
1004 mach_port_t memory_object,
1c79356b
A
1005 default_pager_page_array_t *pagesp,
1006 mach_msg_type_number_t *countp)
1007{
91447636 1008 vm_offset_t addr = 0; /* memory for page offsets */
1c79356b 1009 vm_size_t size = 0; /* current memory size */
91447636
A
1010 vm_map_copy_t copy;
1011 default_pager_page_t * pages = 0;
1012 unsigned int potential;
1013 unsigned int actual;
1c79356b 1014 kern_return_t kr;
91447636 1015 memory_object_t object;
1c79356b 1016
91447636 1017 if (default_pager != default_pager_object)
1c79356b 1018 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1019
91447636 1020 object = (memory_object_t) memory_object;
1c79356b 1021
91447636 1022 potential = 0;
1c79356b
A
1023 for (;;) {
1024 vstruct_t entry;
1025
1026 VSL_LOCK();
1027 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1028 vs_links) {
1029 VS_LOCK(entry);
0b4e3aa0 1030 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1031 VSL_UNLOCK();
1032 goto found_object;
1033 }
1034 VS_UNLOCK(entry);
1035 }
1036 VSL_UNLOCK();
1037
1038 /* did not find the object */
91447636
A
1039 if (0 != addr)
1040 kmem_free(ipc_kernel_map, addr, size);
1c79356b 1041
1c79356b
A
1042 return KERN_INVALID_ARGUMENT;
1043
1044 found_object:
1045
1046 if (!VS_MAP_TRY_LOCK(entry)) {
1047 /* oh well bad luck */
9bccf70c 1048 int wresult;
1c79356b
A
1049
1050 VS_UNLOCK(entry);
1051
91447636 1052 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
9bccf70c
A
1053 wresult = thread_block(THREAD_CONTINUE_NULL);
1054 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1055 continue;
1056 }
1057
1058 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1059 VS_MAP_UNLOCK(entry);
1060 VS_UNLOCK(entry);
1061
1062 if (actual <= potential)
1063 break;
1064
1065 /* allocate more memory */
91447636
A
1066 if (0 != addr)
1067 kmem_free(ipc_kernel_map, addr, size);
1068
1069 size = round_page(actual * sizeof * pages);
1070 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1071 if (KERN_SUCCESS != kr)
1072 return KERN_RESOURCE_SHORTAGE;
1c79356b 1073
1c79356b
A
1074 pages = (default_pager_page_t *)addr;
1075 potential = size / sizeof * pages;
1076 }
1077
1078 /*
91447636 1079 * Clear unused memory.
1c79356b 1080 */
91447636
A
1081 while (actual < potential)
1082 pages[--potential].dpp_offset = 0;
1083
1084 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1085 vm_map_round_page(addr + size), FALSE);
1086 assert(KERN_SUCCESS == kr);
1087 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1088 (vm_map_size_t)size, TRUE, &copy);
1089 assert(KERN_SUCCESS == kr);
1090
1091
1092 *pagesp = (default_pager_page_array_t)copy;
1093 *countp = actual;
1c79356b
A
1094 return KERN_SUCCESS;
1095}