]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-1699.32.7.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57/*
58 * Default Pager.
59 * Memory Object Management.
60 */
61
62#include "default_pager_internal.h"
91447636
A
63#include <default_pager/default_pager_object_server.h>
64#include <mach/memory_object_default_server.h>
65#include <mach/memory_object_control.h>
0b4e3aa0 66#include <mach/memory_object_types.h>
1c79356b 67#include <mach/memory_object_server.h>
91447636
A
68#include <mach/upl.h>
69#include <mach/vm_map.h>
0b4e3aa0
A
70#include <vm/memory_object.h>
71#include <vm/vm_pageout.h>
91447636
A
72#include <vm/vm_map.h>
73#include <vm/vm_protos.h>
1c79356b 74
91447636 75/* forward declaration */
b0d623f7 76vstruct_t vs_object_create(dp_size_t size);
1c79356b
A
77
78/*
79 * List of all vstructs. A specific vstruct is
80 * found directly via its port, this list is
81 * only used for monitoring purposes by the
82 * default_pager_object* calls and by ps_delete
83 * when abstract memory objects must be scanned
84 * to remove any live storage on a segment which
85 * is to be removed.
86 */
87struct vstruct_list_head vstruct_list;
88
0b4e3aa0 89__private_extern__ void
1c79356b
A
90vstruct_list_insert(
91 vstruct_t vs)
92{
93 VSL_LOCK();
94 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
95 vstruct_list.vsl_count++;
96 VSL_UNLOCK();
97}
98
1c79356b 99
0b4e3aa0 100__private_extern__ void
1c79356b
A
101vstruct_list_delete(
102 vstruct_t vs)
103{
104 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
105 vstruct_list.vsl_count--;
106}
107
108/*
109 * We use the sequence numbers on requests to regulate
110 * our parallelism. In general, we allow multiple reads and writes
111 * to proceed in parallel, with the exception that reads must
112 * wait for previous writes to finish. (Because the kernel might
113 * generate a data-request for a page on the heels of a data-write
114 * for the same page, and we must avoid returning stale data.)
115 * terminate requests wait for proceeding reads and writes to finish.
116 */
117
0b4e3aa0
A
118static unsigned int default_pager_total = 0; /* debugging */
119static unsigned int default_pager_wait_seqno = 0; /* debugging */
120static unsigned int default_pager_wait_read = 0; /* debugging */
121static unsigned int default_pager_wait_write = 0; /* debugging */
1c79356b 122
0b4e3aa0 123__private_extern__ void
1c79356b
A
124vs_async_wait(
125 vstruct_t vs)
126{
1c79356b
A
127
128 ASSERT(vs->vs_async_pending >= 0);
129 while (vs->vs_async_pending > 0) {
130 vs->vs_waiting_async = TRUE;
0b4e3aa0 131 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 132 VS_UNLOCK(vs);
9bccf70c 133 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
134 VS_LOCK(vs);
135 }
136 ASSERT(vs->vs_async_pending == 0);
137}
138
1c79356b 139
0b4e3aa0 140#if PARALLEL
1c79356b
A
141/*
142 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
143 *
144 * JMM - Sequence numbers guarantee ordering of requests generated
145 * by a single thread if the receiver is multithreaded and
146 * the interfaces are asynchronous (i.e. sender can generate
147 * more than one request before the first is received in the
148 * pager). Normally, IPC would generate these number in that
149 * case. But we are trying to avoid using IPC for the in-kernel
150 * scenario. Since these are actually invoked synchronously
151 * anyway (in-kernel), we can just fake the sequence number
152 * generation here (thus avoiding the dependence on IPC).
1c79356b 153 */
0b4e3aa0 154__private_extern__ void
1c79356b 155vs_lock(
0b4e3aa0 156 vstruct_t vs)
1c79356b 157{
0b4e3aa0
A
158 mach_port_seqno_t seqno;
159
1c79356b
A
160 default_pager_total++;
161 VS_LOCK(vs);
162
163 seqno = vs->vs_next_seqno++;
164
165 while (vs->vs_seqno != seqno) {
166 default_pager_wait_seqno++;
167 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 168 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 169 VS_UNLOCK(vs);
9bccf70c 170 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
171 VS_LOCK(vs);
172 }
173}
174
175/*
176 * Increments sequence number and unlocks pager.
177 */
0b4e3aa0 178__private_extern__ void
1c79356b
A
179vs_unlock(vstruct_t vs)
180{
1c79356b 181 vs->vs_seqno++;
0b4e3aa0
A
182 if (vs->vs_waiting_seqno) {
183 vs->vs_waiting_seqno = FALSE;
184 VS_UNLOCK(vs);
185 thread_wakeup(&vs->vs_seqno);
186 return;
187 }
1c79356b 188 VS_UNLOCK(vs);
1c79356b
A
189}
190
191/*
192 * Start a read - one more reader. Pager must be locked.
193 */
0b4e3aa0 194__private_extern__ void
1c79356b
A
195vs_start_read(
196 vstruct_t vs)
197{
198 vs->vs_readers++;
199}
200
201/*
202 * Wait for readers. Unlocks and relocks pager if wait needed.
203 */
0b4e3aa0 204__private_extern__ void
1c79356b
A
205vs_wait_for_readers(
206 vstruct_t vs)
207{
208 while (vs->vs_readers != 0) {
209 default_pager_wait_read++;
210 vs->vs_waiting_read = TRUE;
0b4e3aa0 211 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 212 VS_UNLOCK(vs);
9bccf70c 213 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
214 VS_LOCK(vs);
215 }
216}
217
218/*
219 * Finish a read. Pager is unlocked and returns unlocked.
220 */
0b4e3aa0 221__private_extern__ void
1c79356b
A
222vs_finish_read(
223 vstruct_t vs)
224{
225 VS_LOCK(vs);
0b4e3aa0 226 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
227 vs->vs_waiting_read = FALSE;
228 VS_UNLOCK(vs);
0b4e3aa0
A
229 thread_wakeup(&vs->vs_readers);
230 return;
231 }
232 VS_UNLOCK(vs);
1c79356b
A
233}
234
235/*
236 * Start a write - one more writer. Pager must be locked.
237 */
0b4e3aa0 238__private_extern__ void
1c79356b
A
239vs_start_write(
240 vstruct_t vs)
241{
242 vs->vs_writers++;
243}
244
245/*
246 * Wait for writers. Unlocks and relocks pager if wait needed.
247 */
0b4e3aa0 248__private_extern__ void
1c79356b
A
249vs_wait_for_writers(
250 vstruct_t vs)
251{
252 while (vs->vs_writers != 0) {
253 default_pager_wait_write++;
254 vs->vs_waiting_write = TRUE;
0b4e3aa0 255 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 256 VS_UNLOCK(vs);
9bccf70c 257 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
258 VS_LOCK(vs);
259 }
260 vs_async_wait(vs);
261}
262
263/* This is to be used for the transfer from segment code ONLY */
264/* The transfer code holds off vs destruction by keeping the */
265/* vs_async_wait count non-zero. It will not ocnflict with */
266/* other writers on an async basis because it only writes on */
267/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
268
269__private_extern__ void
1c79356b
A
270vs_wait_for_sync_writers(
271 vstruct_t vs)
272{
273 while (vs->vs_writers != 0) {
274 default_pager_wait_write++;
275 vs->vs_waiting_write = TRUE;
0b4e3aa0 276 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 277 VS_UNLOCK(vs);
9bccf70c 278 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
279 VS_LOCK(vs);
280 }
281}
282
283
284/*
285 * Finish a write. Pager is unlocked and returns unlocked.
286 */
0b4e3aa0 287__private_extern__ void
1c79356b
A
288vs_finish_write(
289 vstruct_t vs)
290{
291 VS_LOCK(vs);
0b4e3aa0 292 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
293 vs->vs_waiting_write = FALSE;
294 VS_UNLOCK(vs);
0b4e3aa0
A
295 thread_wakeup(&vs->vs_writers);
296 return;
1c79356b 297 }
0b4e3aa0 298 VS_UNLOCK(vs);
1c79356b 299}
1c79356b
A
300#endif /* PARALLEL */
301
1c79356b
A
302vstruct_t
303vs_object_create(
b0d623f7 304 dp_size_t size)
1c79356b
A
305{
306 vstruct_t vs;
1c79356b
A
307
308 /*
309 * Allocate a vstruct. If there are any problems, then report them
310 * to the console.
311 */
312 vs = ps_vstruct_create(size);
313 if (vs == VSTRUCT_NULL) {
314 dprintf(("vs_object_create: unable to allocate %s\n",
315 "-- either run swapon command or reboot"));
316 return VSTRUCT_NULL;
317 }
318
319 return vs;
320}
321
0b4e3aa0 322#if 0
1c79356b
A
323void default_pager_add(vstruct_t, boolean_t); /* forward */
324
325void
326default_pager_add(
327 vstruct_t vs,
328 boolean_t internal)
329{
0b4e3aa0
A
330 memory_object_t mem_obj = vs->vs_mem_obj;
331 mach_port_t pset;
1c79356b 332 mach_port_mscount_t sync;
0b4e3aa0 333 mach_port_t previous;
1c79356b
A
334 kern_return_t kr;
335 static char here[] = "default_pager_add";
336
337 /*
338 * The port currently has a make-send count of zero,
339 * because either we just created the port or we just
340 * received the port in a memory_object_create request.
341 */
342
343 if (internal) {
344 /* possibly generate an immediate no-senders notification */
345 sync = 0;
346 pset = default_pager_internal_set;
347 } else {
348 /* delay notification till send right is created */
349 sync = 1;
350 pset = default_pager_external_set;
351 }
352
353 ipc_port_make_sonce(mem_obj);
354 ip_lock(mem_obj); /* unlocked in nsrequest below */
355 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
356}
357
0b4e3aa0 358#endif
1c79356b 359
0c530ab8
A
360const struct memory_object_pager_ops default_pager_ops = {
361 dp_memory_object_reference,
362 dp_memory_object_deallocate,
363 dp_memory_object_init,
364 dp_memory_object_terminate,
365 dp_memory_object_data_request,
366 dp_memory_object_data_return,
367 dp_memory_object_data_initialize,
368 dp_memory_object_data_unlock,
369 dp_memory_object_synchronize,
593a1d5f
A
370 dp_memory_object_map,
371 dp_memory_object_last_unmap,
6d2010ae 372 dp_memory_object_data_reclaim,
0c530ab8
A
373 "default pager"
374};
375
1c79356b
A
376kern_return_t
377dp_memory_object_init(
0b4e3aa0
A
378 memory_object_t mem_obj,
379 memory_object_control_t control,
b0d623f7 380 __unused memory_object_cluster_size_t pager_page_size)
1c79356b 381{
1c79356b 382 vstruct_t vs;
1c79356b
A
383
384 assert(pager_page_size == vm_page_size);
385
0b4e3aa0
A
386 memory_object_control_reference(control);
387
1c79356b 388 vs_lookup(mem_obj, vs);
0b4e3aa0 389 vs_lock(vs);
1c79356b 390
0b4e3aa0 391 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
392 Panic("bad request");
393
0b4e3aa0 394 vs->vs_control = control;
1c79356b
A
395 vs_unlock(vs);
396
397 return KERN_SUCCESS;
398}
399
400kern_return_t
401dp_memory_object_synchronize(
0b4e3aa0
A
402 memory_object_t mem_obj,
403 memory_object_offset_t offset,
b0d623f7 404 memory_object_size_t length,
91447636 405 __unused vm_sync_t flags)
1c79356b 406{
1c79356b 407 vstruct_t vs;
1c79356b
A
408
409 vs_lookup(mem_obj, vs);
0b4e3aa0 410 vs_lock(vs);
1c79356b
A
411 vs_unlock(vs);
412
0b4e3aa0 413 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
414
415 return KERN_SUCCESS;
416}
417
0b4e3aa0 418kern_return_t
593a1d5f
A
419dp_memory_object_map(
420 __unused memory_object_t mem_obj,
421 __unused vm_prot_t prot)
0b4e3aa0 422{
593a1d5f
A
423 panic("dp_memory_object_map");
424 return KERN_FAILURE;
425}
0b4e3aa0 426
593a1d5f
A
427kern_return_t
428dp_memory_object_last_unmap(
429 __unused memory_object_t mem_obj)
430{
431 panic("dp_memory_object_last_unmap");
0b4e3aa0
A
432 return KERN_FAILURE;
433}
434
6d2010ae
A
435kern_return_t
436dp_memory_object_data_reclaim(
437 memory_object_t mem_obj,
438 boolean_t reclaim_backing_store)
439{
440 vstruct_t vs;
441
442 vs_lookup(mem_obj, vs);
443 for (;;) {
444 vs_lock(vs);
445 vs_async_wait(vs);
446 if (!vs->vs_xfer_pending) {
447 break;
448 }
449 }
450 vs->vs_xfer_pending = TRUE;
451 vs_unlock(vs);
452
453 ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store);
454
455 vs_lock(vs);
456 vs->vs_xfer_pending = FALSE;
457 vs_unlock(vs);
458
459 return KERN_SUCCESS;
460}
461
1c79356b
A
462kern_return_t
463dp_memory_object_terminate(
0b4e3aa0 464 memory_object_t mem_obj)
1c79356b 465{
0b4e3aa0 466 memory_object_control_t control;
1c79356b 467 vstruct_t vs;
1c79356b
A
468
469 /*
470 * control port is a receive right, not a send right.
471 */
472
473 vs_lookup(mem_obj, vs);
0b4e3aa0 474 vs_lock(vs);
1c79356b
A
475
476 /*
477 * Wait for read and write requests to terminate.
478 */
479
480 vs_wait_for_readers(vs);
481 vs_wait_for_writers(vs);
482
483 /*
484 * After memory_object_terminate both memory_object_init
485 * and a no-senders notification are possible, so we need
0b4e3aa0
A
486 * to clean up our reference to the memory_object_control
487 * to prepare for a new init.
1c79356b
A
488 */
489
0b4e3aa0
A
490 control = vs->vs_control;
491 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
492
493 /* a bit of special case ugliness here. Wakeup any waiting reads */
494 /* these data requests had to be removed from the seqno traffic */
495 /* based on a performance bottleneck with large memory objects */
496 /* the problem will right itself with the new component based */
497 /* synchronous interface. The new async will be able to return */
498 /* failure during its sync phase. In the mean time ... */
499
0b4e3aa0
A
500 thread_wakeup(&vs->vs_writers);
501 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
502
503 vs_unlock(vs);
504
505 /*
0b4e3aa0 506 * Now we deallocate our reference on the control.
1c79356b 507 */
0b4e3aa0 508 memory_object_control_deallocate(control);
1c79356b
A
509 return KERN_SUCCESS;
510}
511
512void
0b4e3aa0
A
513dp_memory_object_reference(
514 memory_object_t mem_obj)
515{
516 vstruct_t vs;
517
518 vs_lookup_safe(mem_obj, vs);
519 if (vs == VSTRUCT_NULL)
520 return;
521
522 VS_LOCK(vs);
523 assert(vs->vs_references > 0);
524 vs->vs_references++;
525 VS_UNLOCK(vs);
526}
527
0b4e3aa0
A
528void
529dp_memory_object_deallocate(
530 memory_object_t mem_obj)
1c79356b
A
531{
532 vstruct_t vs;
0b4e3aa0 533 mach_port_seqno_t seqno;
1c79356b
A
534
535 /*
0b4e3aa0 536 * Because we don't give out multiple first references
1c79356b 537 * for a memory object, there can't be a race
0b4e3aa0
A
538 * between getting a deallocate call and creating
539 * a new reference for the object.
1c79356b
A
540 */
541
0b4e3aa0
A
542 vs_lookup_safe(mem_obj, vs);
543 if (vs == VSTRUCT_NULL)
544 return;
545
546 VS_LOCK(vs);
547 if (--vs->vs_references > 0) {
548 VS_UNLOCK(vs);
549 return;
550 }
551
552 seqno = vs->vs_next_seqno++;
553 while (vs->vs_seqno != seqno) {
554 default_pager_wait_seqno++;
555 vs->vs_waiting_seqno = TRUE;
556 assert_wait(&vs->vs_seqno, THREAD_UNINT);
557 VS_UNLOCK(vs);
9bccf70c 558 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
559 VS_LOCK(vs);
560 }
561
1c79356b
A
562 vs_async_wait(vs); /* wait for pending async IO */
563
564 /* do not delete the vs structure until the referencing pointers */
565 /* in the vstruct list have been expunged */
566
567 /* get VSL_LOCK out of order by using TRY mechanism */
568 while(!VSL_LOCK_TRY()) {
569 VS_UNLOCK(vs);
570 VSL_LOCK();
571 VSL_UNLOCK();
572 VS_LOCK(vs);
573 vs_async_wait(vs); /* wait for pending async IO */
574 }
0b4e3aa0
A
575
576
1c79356b 577 /*
0b4e3aa0 578 * We shouldn't get a deallocation call
1c79356b
A
579 * when the kernel has the object cached.
580 */
0b4e3aa0 581 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
582 Panic("bad request");
583
584 /*
585 * Unlock the pager (though there should be no one
586 * waiting for it).
587 */
588 VS_UNLOCK(vs);
589
0b4e3aa0
A
590 /* Lock out paging segment removal for the duration of this */
591 /* call. We are vulnerable to losing a paging segment we rely */
592 /* on as soon as we remove ourselves from the VSL and unlock */
593
594 /* Keep our thread from blocking on attempt to trigger backing */
595 /* store release */
596 backing_store_release_trigger_disable += 1;
597
1c79356b
A
598 /*
599 * Remove the memory object port association, and then
600 * the destroy the port itself. We must remove the object
601 * from the port list before deallocating the pager,
602 * because of default_pager_objects.
603 */
604 vstruct_list_delete(vs);
0b4e3aa0
A
605 VSL_UNLOCK();
606
1c79356b
A
607 ps_vstruct_dealloc(vs);
608
0b4e3aa0
A
609 VSL_LOCK();
610 backing_store_release_trigger_disable -= 1;
611 if(backing_store_release_trigger_disable == 0) {
9bccf70c 612 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
613 }
614 VSL_UNLOCK();
615}
616
617kern_return_t
618dp_memory_object_data_request(
0b4e3aa0
A
619 memory_object_t mem_obj,
620 memory_object_offset_t offset,
b0d623f7 621 memory_object_cluster_size_t length,
2d21ac55
A
622 __unused vm_prot_t protection_required,
623 memory_object_fault_info_t fault_info)
1c79356b 624{
1c79356b 625 vstruct_t vs;
b0d623f7 626 kern_return_t kr = KERN_SUCCESS;
1c79356b
A
627
628 GSTAT(global_stats.gs_pagein_calls++);
629
630
631 /* CDY at this moment vs_lookup panics when presented with the wrong */
632 /* port. As we are expanding this pager to support user interfaces */
633 /* this should be changed to return kern_failure */
634 vs_lookup(mem_obj, vs);
0b4e3aa0 635 vs_lock(vs);
1c79356b
A
636
637 /* We are going to relax the strict sequencing here for performance */
638 /* reasons. We can do this because we know that the read and */
639 /* write threads are different and we rely on synchronization */
640 /* of read and write requests at the cache memory_object level */
641 /* break out wait_for_writers, all of this goes away when */
642 /* we get real control of seqno with the new component interface */
0b4e3aa0 643
1c79356b
A
644 if (vs->vs_writers != 0) {
645 /* you can't hold on to the seqno and go */
646 /* to sleep like that */
647 vs_unlock(vs); /* bump internal count of seqno */
648 VS_LOCK(vs);
649 while (vs->vs_writers != 0) {
650 default_pager_wait_write++;
651 vs->vs_waiting_write = TRUE;
0b4e3aa0 652 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 653 VS_UNLOCK(vs);
9bccf70c 654 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
655 VS_LOCK(vs);
656 vs_async_wait(vs);
657 }
0b4e3aa0 658 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
659 VS_UNLOCK(vs);
660 return KERN_FAILURE;
661 }
662 vs_start_read(vs);
663 VS_UNLOCK(vs);
664 } else {
665 vs_start_read(vs);
666 vs_unlock(vs);
667 }
668
669 /*
670 * Request must be on a page boundary and a multiple of pages.
671 */
672 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
673 Panic("bad alignment");
674
b0d623f7
A
675 assert((dp_offset_t) offset == offset);
676 kr = pvs_cluster_read(vs, (dp_offset_t) offset, length, fault_info);
677
678 /* Regular data requests have a non-zero length and always return KERN_SUCCESS.
679 Their actual success is determined by the fact that they provide a page or not,
680 i.e whether we call upl_commit() or upl_abort(). A length of 0 means that the
681 caller is only asking if the pager has a copy of that page or not. The answer to
682 that question is provided by the return value. KERN_SUCCESS means that the pager
683 does have that page.
684 */
685 if(length) {
686 kr = KERN_SUCCESS;
687 }
688
1c79356b
A
689 vs_finish_read(vs);
690
b0d623f7 691 return kr;
1c79356b
A
692}
693
694/*
695 * memory_object_data_initialize: check whether we already have each page, and
696 * write it if we do not. The implementation is far from optimized, and
697 * also assumes that the default_pager is single-threaded.
698 */
699/* It is questionable whether or not a pager should decide what is relevant */
700/* and what is not in data sent from the kernel. Data initialize has been */
701/* changed to copy back all data sent to it in preparation for its eventual */
702/* merge with data return. It is the kernel that should decide what pages */
703/* to write back. As of the writing of this note, this is indeed the case */
704/* the kernel writes back one page at a time through this interface */
705
706kern_return_t
707dp_memory_object_data_initialize(
0b4e3aa0
A
708 memory_object_t mem_obj,
709 memory_object_offset_t offset,
b0d623f7 710 memory_object_cluster_size_t size)
1c79356b 711{
1c79356b 712 vstruct_t vs;
1c79356b 713
91447636
A
714 DP_DEBUG(DEBUG_MO_EXTERNAL,
715 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
716 (int)mem_obj, (int)offset, (int)size));
55e303ae 717 GSTAT(global_stats.gs_pages_init += atop_32(size));
1c79356b
A
718
719 vs_lookup(mem_obj, vs);
0b4e3aa0 720 vs_lock(vs);
1c79356b
A
721 vs_start_write(vs);
722 vs_unlock(vs);
723
724 /*
725 * Write the data via clustered writes. vs_cluster_write will
726 * loop if the address range specified crosses cluster
727 * boundaries.
728 */
b0d623f7
A
729 assert((upl_offset_t) offset == offset);
730 vs_cluster_write(vs, 0, (upl_offset_t)offset, size, FALSE, 0);
1c79356b
A
731
732 vs_finish_write(vs);
733
734 return KERN_SUCCESS;
735}
736
1c79356b
A
737kern_return_t
738dp_memory_object_data_unlock(
91447636
A
739 __unused memory_object_t mem_obj,
740 __unused memory_object_offset_t offset,
b0d623f7 741 __unused memory_object_size_t size,
91447636 742 __unused vm_prot_t desired_access)
1c79356b 743{
0b4e3aa0 744 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
745 return KERN_FAILURE;
746}
747
748
91447636 749/*ARGSUSED8*/
1c79356b
A
750kern_return_t
751dp_memory_object_data_return(
0b4e3aa0
A
752 memory_object_t mem_obj,
753 memory_object_offset_t offset,
b0d623f7 754 memory_object_cluster_size_t size,
91447636
A
755 __unused memory_object_offset_t *resid_offset,
756 __unused int *io_error,
757 __unused boolean_t dirty,
758 __unused boolean_t kernel_copy,
759 __unused int upl_flags)
1c79356b 760{
1c79356b 761 vstruct_t vs;
1c79356b 762
91447636
A
763 DP_DEBUG(DEBUG_MO_EXTERNAL,
764 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
765 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
766 GSTAT(global_stats.gs_pageout_calls++);
767
768 /* This routine is called by the pageout thread. The pageout thread */
769 /* cannot be blocked by read activities unless the read activities */
770 /* Therefore the grant of vs lock must be done on a try versus a */
771 /* blocking basis. The code below relies on the fact that the */
772 /* interface is synchronous. Should this interface be again async */
773 /* for some type of pager in the future the pages will have to be */
774 /* returned through a separate, asynchronous path. */
775
776 vs_lookup(mem_obj, vs);
777
778 default_pager_total++;
779 if(!VS_TRY_LOCK(vs)) {
780 /* the call below will not be done by caller when we have */
781 /* a synchronous interface */
782 /* return KERN_LOCK_OWNED; */
783 upl_t upl;
0c530ab8 784 unsigned int page_list_count = 0;
0b4e3aa0
A
785 memory_object_super_upl_request(vs->vs_control,
786 (memory_object_offset_t)offset,
787 size, size,
788 &upl, NULL, &page_list_count,
789 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 790 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
791 upl_abort(upl,0);
792 upl_deallocate(upl);
1c79356b
A
793 return KERN_SUCCESS;
794 }
795
d12e1678
A
796 if ((vs->vs_seqno != vs->vs_next_seqno++)
797 || (vs->vs_readers)
798 || (vs->vs_xfer_pending)) {
0c530ab8
A
799 upl_t upl;
800 unsigned int page_list_count = 0;
0b4e3aa0 801
1c79356b
A
802 vs->vs_next_seqno--;
803 VS_UNLOCK(vs);
0b4e3aa0 804
1c79356b
A
805 /* the call below will not be done by caller when we have */
806 /* a synchronous interface */
807 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
808 memory_object_super_upl_request(vs->vs_control,
809 (memory_object_offset_t)offset,
810 size, size,
811 &upl, NULL, &page_list_count,
1c79356b
A
812 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
813 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
814 upl_abort(upl,0);
815 upl_deallocate(upl);
1c79356b
A
816 return KERN_SUCCESS;
817 }
818
0b4e3aa0 819 if ((size % vm_page_size) != 0)
1c79356b
A
820 Panic("bad alignment");
821
822 vs_start_write(vs);
823
824
825 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 826 vs_unlock(vs);
1c79356b
A
827
828 /*
829 * Write the data via clustered writes. vs_cluster_write will
830 * loop if the address range specified crosses cluster
831 * boundaries.
832 */
b0d623f7
A
833 assert((upl_offset_t) offset == offset);
834 vs_cluster_write(vs, 0, (upl_offset_t) offset, size, FALSE, 0);
1c79356b
A
835
836 vs_finish_write(vs);
837
838 /* temporary, need a finer lock based on cluster */
839
840 VS_LOCK(vs);
841 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
842 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
843 vs->vs_waiting_async = FALSE;
1c79356b 844 VS_UNLOCK(vs);
0b4e3aa0 845 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
846 } else {
847 VS_UNLOCK(vs);
848 }
849
850
851 return KERN_SUCCESS;
852}
853
0b4e3aa0
A
854/*
855 * Routine: default_pager_memory_object_create
856 * Purpose:
857 * Handle requests for memory objects from the
858 * kernel.
859 * Notes:
860 * Because we only give out the default memory
861 * manager port to the kernel, we don't have to
862 * be so paranoid about the contents.
863 */
1c79356b 864kern_return_t
0b4e3aa0 865default_pager_memory_object_create(
91447636 866 __unused memory_object_default_t dmm,
0b4e3aa0
A
867 vm_size_t new_size,
868 memory_object_t *new_mem_obj)
1c79356b 869{
0b4e3aa0 870 vstruct_t vs;
1c79356b 871
0b4e3aa0
A
872 assert(dmm == default_pager_object);
873
b0d623f7
A
874 if ((dp_size_t) new_size != new_size) {
875 /* 32-bit overflow */
876 return KERN_INVALID_ARGUMENT;
877 }
878
879 vs = vs_object_create((dp_size_t) new_size);
0b4e3aa0
A
880 if (vs == VSTRUCT_NULL)
881 return KERN_RESOURCE_SHORTAGE;
882
883 vs->vs_next_seqno = 0;
884
885 /*
886 * Set up associations between this memory object
887 * and this default_pager structure
888 */
889
0c530ab8 890 vs->vs_pager_ops = &default_pager_ops;
b0d623f7 891 vs->vs_pager_header.io_bits = IKOT_MEMORY_OBJECT;
0b4e3aa0
A
892
893 /*
894 * After this, other threads might receive requests
895 * for this memory object or find it in the port list.
896 */
897
898 vstruct_list_insert(vs);
899 *new_mem_obj = vs_to_mem_obj(vs);
900 return KERN_SUCCESS;
1c79356b
A
901}
902
903/*
904 * Create an external object.
905 */
906kern_return_t
907default_pager_object_create(
91447636 908 default_pager_t default_pager,
0b4e3aa0
A
909 vm_size_t size,
910 memory_object_t *mem_objp)
1c79356b
A
911{
912 vstruct_t vs;
1c79356b 913
91447636 914 if (default_pager != default_pager_object)
1c79356b
A
915 return KERN_INVALID_ARGUMENT;
916
b0d623f7
A
917 if ((dp_size_t) size != size) {
918 /* 32-bit overflow */
919 return KERN_INVALID_ARGUMENT;
920 }
921
922 vs = vs_object_create((dp_size_t) size);
0b4e3aa0
A
923 if (vs == VSTRUCT_NULL)
924 return KERN_RESOURCE_SHORTAGE;
1c79356b 925
1c79356b 926 /*
0b4e3aa0 927 * Set up associations between the default pager
1c79356b
A
928 * and this vstruct structure
929 */
0c530ab8 930 vs->vs_pager_ops = &default_pager_ops;
1c79356b 931 vstruct_list_insert(vs);
0b4e3aa0 932 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
933 return KERN_SUCCESS;
934}
935
936kern_return_t
937default_pager_objects(
91447636 938 default_pager_t default_pager,
1c79356b
A
939 default_pager_object_array_t *objectsp,
940 mach_msg_type_number_t *ocountp,
91447636 941 mach_port_array_t *portsp,
1c79356b
A
942 mach_msg_type_number_t *pcountp)
943{
944 vm_offset_t oaddr = 0; /* memory for objects */
945 vm_size_t osize = 0; /* current size */
946 default_pager_object_t * objects;
91447636 947 unsigned int opotential = 0;
1c79356b 948
91447636 949 vm_map_copy_t pcopy = 0; /* copy handle for pagers */
1c79356b 950 vm_size_t psize = 0; /* current size */
0b4e3aa0 951 memory_object_t * pagers;
91447636 952 unsigned int ppotential = 0;
1c79356b
A
953
954 unsigned int actual;
955 unsigned int num_objects;
956 kern_return_t kr;
957 vstruct_t entry;
1c79356b 958
91447636
A
959 if (default_pager != default_pager_object)
960 return KERN_INVALID_ARGUMENT;
1c79356b
A
961
962 /*
963 * We will send no more than this many
964 */
965 actual = vstruct_list.vsl_count;
1c79356b 966
91447636
A
967 /*
968 * Out out-of-line port arrays are simply kalloc'ed.
969 */
b0d623f7
A
970 psize = round_page(actual * sizeof (*pagers));
971 ppotential = (unsigned int) (psize / sizeof (*pagers));
91447636
A
972 pagers = (memory_object_t *)kalloc(psize);
973 if (0 == pagers)
974 return KERN_RESOURCE_SHORTAGE;
975
976 /*
977 * returned out of line data must be allocated out
978 * the ipc_kernel_map, wired down, filled in, and
979 * then "copied in" as if it had been sent by a
980 * user process.
981 */
b0d623f7
A
982 osize = round_page(actual * sizeof (*objects));
983 opotential = (unsigned int) (osize / sizeof (*objects));
91447636
A
984 kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
985 if (KERN_SUCCESS != kr) {
986 kfree(pagers, psize);
987 return KERN_RESOURCE_SHORTAGE;
1c79356b 988 }
91447636 989 objects = (default_pager_object_t *)oaddr;
1c79356b 990
1c79356b
A
991
992 /*
993 * Now scan the list.
994 */
995
996 VSL_LOCK();
997
998 num_objects = 0;
999 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
1000
91447636
A
1001 memory_object_t pager;
1002 vm_size_t size;
1c79356b
A
1003
1004 if ((num_objects >= opotential) ||
1005 (num_objects >= ppotential)) {
1006
1007 /*
1008 * This should be rare. In any case,
1009 * we will only miss recent objects,
1010 * because they are added at the end.
1011 */
1012 break;
1013 }
1014
1015 /*
1016 * Avoid interfering with normal operations
1017 */
1018 if (!VS_MAP_TRY_LOCK(entry))
1019 goto not_this_one;
1020 size = ps_vstruct_allocated_size(entry);
1021 VS_MAP_UNLOCK(entry);
1022
1023 VS_LOCK(entry);
1024
1c79356b 1025 /*
0b4e3aa0
A
1026 * We need a reference for our caller. Adding this
1027 * reference through the linked list could race with
1028 * destruction of the object. If we find the object
1029 * has no references, just give up on it.
1c79356b 1030 */
0b4e3aa0
A
1031 VS_LOCK(entry);
1032 if (entry->vs_references == 0) {
1c79356b 1033 VS_UNLOCK(entry);
0b4e3aa0 1034 goto not_this_one;
1c79356b 1035 }
91447636
A
1036 pager = vs_to_mem_obj(entry);
1037 dp_memory_object_reference(pager);
1c79356b
A
1038 VS_UNLOCK(entry);
1039
1040 /* the arrays are wired, so no deadlock worries */
1041
1042 objects[num_objects].dpo_object = (vm_offset_t) entry;
1043 objects[num_objects].dpo_size = size;
0b4e3aa0 1044 pagers [num_objects++] = pager;
1c79356b
A
1045 continue;
1046
1047 not_this_one:
1048 /*
1049 * Do not return garbage
1050 */
1051 objects[num_objects].dpo_object = (vm_offset_t) 0;
1052 objects[num_objects].dpo_size = 0;
0b4e3aa0 1053 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
1054
1055 }
1056
1057 VSL_UNLOCK();
1058
91447636
A
1059 /* clear out any excess allocation */
1060 while (num_objects < opotential) {
1061 objects[--opotential].dpo_object = (vm_offset_t) 0;
1062 objects[opotential].dpo_size = 0;
1c79356b 1063 }
91447636
A
1064 while (num_objects < ppotential) {
1065 pagers[--ppotential] = MEMORY_OBJECT_NULL;
1c79356b
A
1066 }
1067
91447636
A
1068 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
1069 vm_map_round_page(oaddr + osize), FALSE);
1070 assert(KERN_SUCCESS == kr);
1071 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
1072 (vm_map_size_t)osize, TRUE, &pcopy);
1073 assert(KERN_SUCCESS == kr);
1c79356b 1074
91447636
A
1075 *objectsp = (default_pager_object_array_t)objects;
1076 *ocountp = num_objects;
1077 *portsp = (mach_port_array_t)pcopy;
1078 *pcountp = num_objects;
1c79356b 1079
91447636 1080 return KERN_SUCCESS;
1c79356b
A
1081}
1082
1083kern_return_t
1084default_pager_object_pages(
91447636
A
1085 default_pager_t default_pager,
1086 mach_port_t memory_object,
1c79356b
A
1087 default_pager_page_array_t *pagesp,
1088 mach_msg_type_number_t *countp)
1089{
91447636 1090 vm_offset_t addr = 0; /* memory for page offsets */
1c79356b 1091 vm_size_t size = 0; /* current memory size */
91447636
A
1092 vm_map_copy_t copy;
1093 default_pager_page_t * pages = 0;
1094 unsigned int potential;
1095 unsigned int actual;
1c79356b 1096 kern_return_t kr;
91447636 1097 memory_object_t object;
1c79356b 1098
91447636 1099 if (default_pager != default_pager_object)
1c79356b 1100 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1101
91447636 1102 object = (memory_object_t) memory_object;
1c79356b 1103
91447636 1104 potential = 0;
1c79356b
A
1105 for (;;) {
1106 vstruct_t entry;
1107
1108 VSL_LOCK();
1109 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1110 vs_links) {
1111 VS_LOCK(entry);
0b4e3aa0 1112 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1113 VSL_UNLOCK();
1114 goto found_object;
1115 }
1116 VS_UNLOCK(entry);
1117 }
1118 VSL_UNLOCK();
1119
1120 /* did not find the object */
91447636
A
1121 if (0 != addr)
1122 kmem_free(ipc_kernel_map, addr, size);
1c79356b 1123
1c79356b
A
1124 return KERN_INVALID_ARGUMENT;
1125
1126 found_object:
1127
1128 if (!VS_MAP_TRY_LOCK(entry)) {
1129 /* oh well bad luck */
9bccf70c 1130 int wresult;
1c79356b
A
1131
1132 VS_UNLOCK(entry);
1133
91447636 1134 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
9bccf70c
A
1135 wresult = thread_block(THREAD_CONTINUE_NULL);
1136 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1137 continue;
1138 }
1139
1140 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1141 VS_MAP_UNLOCK(entry);
1142 VS_UNLOCK(entry);
1143
1144 if (actual <= potential)
1145 break;
1146
1147 /* allocate more memory */
91447636
A
1148 if (0 != addr)
1149 kmem_free(ipc_kernel_map, addr, size);
1150
b0d623f7 1151 size = round_page(actual * sizeof (*pages));
91447636
A
1152 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1153 if (KERN_SUCCESS != kr)
1154 return KERN_RESOURCE_SHORTAGE;
1c79356b 1155
1c79356b 1156 pages = (default_pager_page_t *)addr;
b0d623f7 1157 potential = (unsigned int) (size / sizeof (*pages));
1c79356b
A
1158 }
1159
1160 /*
91447636 1161 * Clear unused memory.
1c79356b 1162 */
91447636
A
1163 while (actual < potential)
1164 pages[--potential].dpp_offset = 0;
1165
1166 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1167 vm_map_round_page(addr + size), FALSE);
1168 assert(KERN_SUCCESS == kr);
1169 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1170 (vm_map_size_t)size, TRUE, &copy);
1171 assert(KERN_SUCCESS == kr);
1172
1173
1174 *pagesp = (default_pager_page_array_t)copy;
1175 *countp = actual;
1c79356b
A
1176 return KERN_SUCCESS;
1177}